Merge tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
[linux-2.6-microblaze.git] / drivers / net / hyperv / netvsc.c
1 /*
2  * Copyright (c) 2009, Microsoft Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, see <http://www.gnu.org/licenses/>.
15  *
16  * Authors:
17  *   Haiyang Zhang <haiyangz@microsoft.com>
18  *   Hank Janssen  <hjanssen@microsoft.com>
19  */
20 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21
22 #include <linux/kernel.h>
23 #include <linux/sched.h>
24 #include <linux/wait.h>
25 #include <linux/mm.h>
26 #include <linux/delay.h>
27 #include <linux/io.h>
28 #include <linux/slab.h>
29 #include <linux/netdevice.h>
30 #include <linux/if_ether.h>
31 #include <linux/vmalloc.h>
32 #include <linux/rtnetlink.h>
33 #include <linux/prefetch.h>
34
35 #include <asm/sync_bitops.h>
36
37 #include "hyperv_net.h"
38 #include "netvsc_trace.h"
39
40 /*
41  * Switch the data path from the synthetic interface to the VF
42  * interface.
43  */
44 void netvsc_switch_datapath(struct net_device *ndev, bool vf)
45 {
46         struct net_device_context *net_device_ctx = netdev_priv(ndev);
47         struct hv_device *dev = net_device_ctx->device_ctx;
48         struct netvsc_device *nv_dev = rtnl_dereference(net_device_ctx->nvdev);
49         struct nvsp_message *init_pkt = &nv_dev->channel_init_pkt;
50
51         memset(init_pkt, 0, sizeof(struct nvsp_message));
52         init_pkt->hdr.msg_type = NVSP_MSG4_TYPE_SWITCH_DATA_PATH;
53         if (vf)
54                 init_pkt->msg.v4_msg.active_dp.active_datapath =
55                         NVSP_DATAPATH_VF;
56         else
57                 init_pkt->msg.v4_msg.active_dp.active_datapath =
58                         NVSP_DATAPATH_SYNTHETIC;
59
60         trace_nvsp_send(ndev, init_pkt);
61
62         vmbus_sendpacket(dev->channel, init_pkt,
63                                sizeof(struct nvsp_message),
64                                (unsigned long)init_pkt,
65                                VM_PKT_DATA_INBAND, 0);
66 }
67
68 static struct netvsc_device *alloc_net_device(void)
69 {
70         struct netvsc_device *net_device;
71
72         net_device = kzalloc(sizeof(struct netvsc_device), GFP_KERNEL);
73         if (!net_device)
74                 return NULL;
75
76         init_waitqueue_head(&net_device->wait_drain);
77         net_device->destroy = false;
78
79         net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
80         net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
81
82         init_completion(&net_device->channel_init_wait);
83         init_waitqueue_head(&net_device->subchan_open);
84         INIT_WORK(&net_device->subchan_work, rndis_set_subchannel);
85
86         return net_device;
87 }
88
89 static void free_netvsc_device(struct rcu_head *head)
90 {
91         struct netvsc_device *nvdev
92                 = container_of(head, struct netvsc_device, rcu);
93         int i;
94
95         kfree(nvdev->extension);
96         vfree(nvdev->recv_buf);
97         vfree(nvdev->send_buf);
98         kfree(nvdev->send_section_map);
99
100         for (i = 0; i < VRSS_CHANNEL_MAX; i++)
101                 vfree(nvdev->chan_table[i].mrc.slots);
102
103         kfree(nvdev);
104 }
105
106 static void free_netvsc_device_rcu(struct netvsc_device *nvdev)
107 {
108         call_rcu(&nvdev->rcu, free_netvsc_device);
109 }
110
111 static void netvsc_revoke_recv_buf(struct hv_device *device,
112                                    struct netvsc_device *net_device,
113                                    struct net_device *ndev)
114 {
115         struct nvsp_message *revoke_packet;
116         int ret;
117
118         /*
119          * If we got a section count, it means we received a
120          * SendReceiveBufferComplete msg (ie sent
121          * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need
122          * to send a revoke msg here
123          */
124         if (net_device->recv_section_cnt) {
125                 /* Send the revoke receive buffer */
126                 revoke_packet = &net_device->revoke_packet;
127                 memset(revoke_packet, 0, sizeof(struct nvsp_message));
128
129                 revoke_packet->hdr.msg_type =
130                         NVSP_MSG1_TYPE_REVOKE_RECV_BUF;
131                 revoke_packet->msg.v1_msg.
132                 revoke_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
133
134                 trace_nvsp_send(ndev, revoke_packet);
135
136                 ret = vmbus_sendpacket(device->channel,
137                                        revoke_packet,
138                                        sizeof(struct nvsp_message),
139                                        (unsigned long)revoke_packet,
140                                        VM_PKT_DATA_INBAND, 0);
141                 /* If the failure is because the channel is rescinded;
142                  * ignore the failure since we cannot send on a rescinded
143                  * channel. This would allow us to properly cleanup
144                  * even when the channel is rescinded.
145                  */
146                 if (device->channel->rescind)
147                         ret = 0;
148                 /*
149                  * If we failed here, we might as well return and
150                  * have a leak rather than continue and a bugchk
151                  */
152                 if (ret != 0) {
153                         netdev_err(ndev, "unable to send "
154                                 "revoke receive buffer to netvsp\n");
155                         return;
156                 }
157                 net_device->recv_section_cnt = 0;
158         }
159 }
160
161 static void netvsc_revoke_send_buf(struct hv_device *device,
162                                    struct netvsc_device *net_device,
163                                    struct net_device *ndev)
164 {
165         struct nvsp_message *revoke_packet;
166         int ret;
167
168         /* Deal with the send buffer we may have setup.
169          * If we got a  send section size, it means we received a
170          * NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent
171          * NVSP_MSG1_TYPE_SEND_SEND_BUF msg) therefore, we need
172          * to send a revoke msg here
173          */
174         if (net_device->send_section_cnt) {
175                 /* Send the revoke receive buffer */
176                 revoke_packet = &net_device->revoke_packet;
177                 memset(revoke_packet, 0, sizeof(struct nvsp_message));
178
179                 revoke_packet->hdr.msg_type =
180                         NVSP_MSG1_TYPE_REVOKE_SEND_BUF;
181                 revoke_packet->msg.v1_msg.revoke_send_buf.id =
182                         NETVSC_SEND_BUFFER_ID;
183
184                 trace_nvsp_send(ndev, revoke_packet);
185
186                 ret = vmbus_sendpacket(device->channel,
187                                        revoke_packet,
188                                        sizeof(struct nvsp_message),
189                                        (unsigned long)revoke_packet,
190                                        VM_PKT_DATA_INBAND, 0);
191
192                 /* If the failure is because the channel is rescinded;
193                  * ignore the failure since we cannot send on a rescinded
194                  * channel. This would allow us to properly cleanup
195                  * even when the channel is rescinded.
196                  */
197                 if (device->channel->rescind)
198                         ret = 0;
199
200                 /* If we failed here, we might as well return and
201                  * have a leak rather than continue and a bugchk
202                  */
203                 if (ret != 0) {
204                         netdev_err(ndev, "unable to send "
205                                    "revoke send buffer to netvsp\n");
206                         return;
207                 }
208                 net_device->send_section_cnt = 0;
209         }
210 }
211
212 static void netvsc_teardown_recv_gpadl(struct hv_device *device,
213                                        struct netvsc_device *net_device,
214                                        struct net_device *ndev)
215 {
216         int ret;
217
218         if (net_device->recv_buf_gpadl_handle) {
219                 ret = vmbus_teardown_gpadl(device->channel,
220                                            net_device->recv_buf_gpadl_handle);
221
222                 /* If we failed here, we might as well return and have a leak
223                  * rather than continue and a bugchk
224                  */
225                 if (ret != 0) {
226                         netdev_err(ndev,
227                                    "unable to teardown receive buffer's gpadl\n");
228                         return;
229                 }
230                 net_device->recv_buf_gpadl_handle = 0;
231         }
232 }
233
234 static void netvsc_teardown_send_gpadl(struct hv_device *device,
235                                        struct netvsc_device *net_device,
236                                        struct net_device *ndev)
237 {
238         int ret;
239
240         if (net_device->send_buf_gpadl_handle) {
241                 ret = vmbus_teardown_gpadl(device->channel,
242                                            net_device->send_buf_gpadl_handle);
243
244                 /* If we failed here, we might as well return and have a leak
245                  * rather than continue and a bugchk
246                  */
247                 if (ret != 0) {
248                         netdev_err(ndev,
249                                    "unable to teardown send buffer's gpadl\n");
250                         return;
251                 }
252                 net_device->send_buf_gpadl_handle = 0;
253         }
254 }
255
256 int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx)
257 {
258         struct netvsc_channel *nvchan = &net_device->chan_table[q_idx];
259         int node = cpu_to_node(nvchan->channel->target_cpu);
260         size_t size;
261
262         size = net_device->recv_completion_cnt * sizeof(struct recv_comp_data);
263         nvchan->mrc.slots = vzalloc_node(size, node);
264         if (!nvchan->mrc.slots)
265                 nvchan->mrc.slots = vzalloc(size);
266
267         return nvchan->mrc.slots ? 0 : -ENOMEM;
268 }
269
270 static int netvsc_init_buf(struct hv_device *device,
271                            struct netvsc_device *net_device,
272                            const struct netvsc_device_info *device_info)
273 {
274         struct nvsp_1_message_send_receive_buffer_complete *resp;
275         struct net_device *ndev = hv_get_drvdata(device);
276         struct nvsp_message *init_packet;
277         unsigned int buf_size;
278         size_t map_words;
279         int ret = 0;
280
281         /* Get receive buffer area. */
282         buf_size = device_info->recv_sections * device_info->recv_section_size;
283         buf_size = roundup(buf_size, PAGE_SIZE);
284
285         /* Legacy hosts only allow smaller receive buffer */
286         if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_2)
287                 buf_size = min_t(unsigned int, buf_size,
288                                  NETVSC_RECEIVE_BUFFER_SIZE_LEGACY);
289
290         net_device->recv_buf = vzalloc(buf_size);
291         if (!net_device->recv_buf) {
292                 netdev_err(ndev,
293                            "unable to allocate receive buffer of size %u\n",
294                            buf_size);
295                 ret = -ENOMEM;
296                 goto cleanup;
297         }
298
299         net_device->recv_buf_size = buf_size;
300
301         /*
302          * Establish the gpadl handle for this buffer on this
303          * channel.  Note: This call uses the vmbus connection rather
304          * than the channel to establish the gpadl handle.
305          */
306         ret = vmbus_establish_gpadl(device->channel, net_device->recv_buf,
307                                     buf_size,
308                                     &net_device->recv_buf_gpadl_handle);
309         if (ret != 0) {
310                 netdev_err(ndev,
311                         "unable to establish receive buffer's gpadl\n");
312                 goto cleanup;
313         }
314
315         /* Notify the NetVsp of the gpadl handle */
316         init_packet = &net_device->channel_init_pkt;
317         memset(init_packet, 0, sizeof(struct nvsp_message));
318         init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_RECV_BUF;
319         init_packet->msg.v1_msg.send_recv_buf.
320                 gpadl_handle = net_device->recv_buf_gpadl_handle;
321         init_packet->msg.v1_msg.
322                 send_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
323
324         trace_nvsp_send(ndev, init_packet);
325
326         /* Send the gpadl notification request */
327         ret = vmbus_sendpacket(device->channel, init_packet,
328                                sizeof(struct nvsp_message),
329                                (unsigned long)init_packet,
330                                VM_PKT_DATA_INBAND,
331                                VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
332         if (ret != 0) {
333                 netdev_err(ndev,
334                         "unable to send receive buffer's gpadl to netvsp\n");
335                 goto cleanup;
336         }
337
338         wait_for_completion(&net_device->channel_init_wait);
339
340         /* Check the response */
341         resp = &init_packet->msg.v1_msg.send_recv_buf_complete;
342         if (resp->status != NVSP_STAT_SUCCESS) {
343                 netdev_err(ndev,
344                            "Unable to complete receive buffer initialization with NetVsp - status %d\n",
345                            resp->status);
346                 ret = -EINVAL;
347                 goto cleanup;
348         }
349
350         /* Parse the response */
351         netdev_dbg(ndev, "Receive sections: %u sub_allocs: size %u count: %u\n",
352                    resp->num_sections, resp->sections[0].sub_alloc_size,
353                    resp->sections[0].num_sub_allocs);
354
355         /* There should only be one section for the entire receive buffer */
356         if (resp->num_sections != 1 || resp->sections[0].offset != 0) {
357                 ret = -EINVAL;
358                 goto cleanup;
359         }
360
361         net_device->recv_section_size = resp->sections[0].sub_alloc_size;
362         net_device->recv_section_cnt = resp->sections[0].num_sub_allocs;
363
364         /* Setup receive completion ring */
365         net_device->recv_completion_cnt
366                 = round_up(net_device->recv_section_cnt + 1,
367                            PAGE_SIZE / sizeof(u64));
368         ret = netvsc_alloc_recv_comp_ring(net_device, 0);
369         if (ret)
370                 goto cleanup;
371
372         /* Now setup the send buffer. */
373         buf_size = device_info->send_sections * device_info->send_section_size;
374         buf_size = round_up(buf_size, PAGE_SIZE);
375
376         net_device->send_buf = vzalloc(buf_size);
377         if (!net_device->send_buf) {
378                 netdev_err(ndev, "unable to allocate send buffer of size %u\n",
379                            buf_size);
380                 ret = -ENOMEM;
381                 goto cleanup;
382         }
383
384         /* Establish the gpadl handle for this buffer on this
385          * channel.  Note: This call uses the vmbus connection rather
386          * than the channel to establish the gpadl handle.
387          */
388         ret = vmbus_establish_gpadl(device->channel, net_device->send_buf,
389                                     buf_size,
390                                     &net_device->send_buf_gpadl_handle);
391         if (ret != 0) {
392                 netdev_err(ndev,
393                            "unable to establish send buffer's gpadl\n");
394                 goto cleanup;
395         }
396
397         /* Notify the NetVsp of the gpadl handle */
398         init_packet = &net_device->channel_init_pkt;
399         memset(init_packet, 0, sizeof(struct nvsp_message));
400         init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_SEND_BUF;
401         init_packet->msg.v1_msg.send_send_buf.gpadl_handle =
402                 net_device->send_buf_gpadl_handle;
403         init_packet->msg.v1_msg.send_send_buf.id = NETVSC_SEND_BUFFER_ID;
404
405         trace_nvsp_send(ndev, init_packet);
406
407         /* Send the gpadl notification request */
408         ret = vmbus_sendpacket(device->channel, init_packet,
409                                sizeof(struct nvsp_message),
410                                (unsigned long)init_packet,
411                                VM_PKT_DATA_INBAND,
412                                VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
413         if (ret != 0) {
414                 netdev_err(ndev,
415                            "unable to send send buffer's gpadl to netvsp\n");
416                 goto cleanup;
417         }
418
419         wait_for_completion(&net_device->channel_init_wait);
420
421         /* Check the response */
422         if (init_packet->msg.v1_msg.
423             send_send_buf_complete.status != NVSP_STAT_SUCCESS) {
424                 netdev_err(ndev, "Unable to complete send buffer "
425                            "initialization with NetVsp - status %d\n",
426                            init_packet->msg.v1_msg.
427                            send_send_buf_complete.status);
428                 ret = -EINVAL;
429                 goto cleanup;
430         }
431
432         /* Parse the response */
433         net_device->send_section_size = init_packet->msg.
434                                 v1_msg.send_send_buf_complete.section_size;
435
436         /* Section count is simply the size divided by the section size. */
437         net_device->send_section_cnt = buf_size / net_device->send_section_size;
438
439         netdev_dbg(ndev, "Send section size: %d, Section count:%d\n",
440                    net_device->send_section_size, net_device->send_section_cnt);
441
442         /* Setup state for managing the send buffer. */
443         map_words = DIV_ROUND_UP(net_device->send_section_cnt, BITS_PER_LONG);
444
445         net_device->send_section_map = kcalloc(map_words, sizeof(ulong), GFP_KERNEL);
446         if (net_device->send_section_map == NULL) {
447                 ret = -ENOMEM;
448                 goto cleanup;
449         }
450
451         goto exit;
452
453 cleanup:
454         netvsc_revoke_recv_buf(device, net_device, ndev);
455         netvsc_revoke_send_buf(device, net_device, ndev);
456         netvsc_teardown_recv_gpadl(device, net_device, ndev);
457         netvsc_teardown_send_gpadl(device, net_device, ndev);
458
459 exit:
460         return ret;
461 }
462
463 /* Negotiate NVSP protocol version */
464 static int negotiate_nvsp_ver(struct hv_device *device,
465                               struct netvsc_device *net_device,
466                               struct nvsp_message *init_packet,
467                               u32 nvsp_ver)
468 {
469         struct net_device *ndev = hv_get_drvdata(device);
470         int ret;
471
472         memset(init_packet, 0, sizeof(struct nvsp_message));
473         init_packet->hdr.msg_type = NVSP_MSG_TYPE_INIT;
474         init_packet->msg.init_msg.init.min_protocol_ver = nvsp_ver;
475         init_packet->msg.init_msg.init.max_protocol_ver = nvsp_ver;
476         trace_nvsp_send(ndev, init_packet);
477
478         /* Send the init request */
479         ret = vmbus_sendpacket(device->channel, init_packet,
480                                sizeof(struct nvsp_message),
481                                (unsigned long)init_packet,
482                                VM_PKT_DATA_INBAND,
483                                VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
484
485         if (ret != 0)
486                 return ret;
487
488         wait_for_completion(&net_device->channel_init_wait);
489
490         if (init_packet->msg.init_msg.init_complete.status !=
491             NVSP_STAT_SUCCESS)
492                 return -EINVAL;
493
494         if (nvsp_ver == NVSP_PROTOCOL_VERSION_1)
495                 return 0;
496
497         /* NVSPv2 or later: Send NDIS config */
498         memset(init_packet, 0, sizeof(struct nvsp_message));
499         init_packet->hdr.msg_type = NVSP_MSG2_TYPE_SEND_NDIS_CONFIG;
500         init_packet->msg.v2_msg.send_ndis_config.mtu = ndev->mtu + ETH_HLEN;
501         init_packet->msg.v2_msg.send_ndis_config.capability.ieee8021q = 1;
502
503         if (nvsp_ver >= NVSP_PROTOCOL_VERSION_5) {
504                 init_packet->msg.v2_msg.send_ndis_config.capability.sriov = 1;
505
506                 /* Teaming bit is needed to receive link speed updates */
507                 init_packet->msg.v2_msg.send_ndis_config.capability.teaming = 1;
508         }
509
510         trace_nvsp_send(ndev, init_packet);
511
512         ret = vmbus_sendpacket(device->channel, init_packet,
513                                 sizeof(struct nvsp_message),
514                                 (unsigned long)init_packet,
515                                 VM_PKT_DATA_INBAND, 0);
516
517         return ret;
518 }
519
520 static int netvsc_connect_vsp(struct hv_device *device,
521                               struct netvsc_device *net_device,
522                               const struct netvsc_device_info *device_info)
523 {
524         struct net_device *ndev = hv_get_drvdata(device);
525         static const u32 ver_list[] = {
526                 NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2,
527                 NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5,
528                 NVSP_PROTOCOL_VERSION_6, NVSP_PROTOCOL_VERSION_61
529         };
530         struct nvsp_message *init_packet;
531         int ndis_version, i, ret;
532
533         init_packet = &net_device->channel_init_pkt;
534
535         /* Negotiate the latest NVSP protocol supported */
536         for (i = ARRAY_SIZE(ver_list) - 1; i >= 0; i--)
537                 if (negotiate_nvsp_ver(device, net_device, init_packet,
538                                        ver_list[i])  == 0) {
539                         net_device->nvsp_version = ver_list[i];
540                         break;
541                 }
542
543         if (i < 0) {
544                 ret = -EPROTO;
545                 goto cleanup;
546         }
547
548         pr_debug("Negotiated NVSP version:%x\n", net_device->nvsp_version);
549
550         /* Send the ndis version */
551         memset(init_packet, 0, sizeof(struct nvsp_message));
552
553         if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_4)
554                 ndis_version = 0x00060001;
555         else
556                 ndis_version = 0x0006001e;
557
558         init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_NDIS_VER;
559         init_packet->msg.v1_msg.
560                 send_ndis_ver.ndis_major_ver =
561                                 (ndis_version & 0xFFFF0000) >> 16;
562         init_packet->msg.v1_msg.
563                 send_ndis_ver.ndis_minor_ver =
564                                 ndis_version & 0xFFFF;
565
566         trace_nvsp_send(ndev, init_packet);
567
568         /* Send the init request */
569         ret = vmbus_sendpacket(device->channel, init_packet,
570                                 sizeof(struct nvsp_message),
571                                 (unsigned long)init_packet,
572                                 VM_PKT_DATA_INBAND, 0);
573         if (ret != 0)
574                 goto cleanup;
575
576
577         ret = netvsc_init_buf(device, net_device, device_info);
578
579 cleanup:
580         return ret;
581 }
582
583 /*
584  * netvsc_device_remove - Callback when the root bus device is removed
585  */
586 void netvsc_device_remove(struct hv_device *device)
587 {
588         struct net_device *ndev = hv_get_drvdata(device);
589         struct net_device_context *net_device_ctx = netdev_priv(ndev);
590         struct netvsc_device *net_device
591                 = rtnl_dereference(net_device_ctx->nvdev);
592         int i;
593
594         /*
595          * Revoke receive buffer. If host is pre-Win2016 then tear down
596          * receive buffer GPADL. Do the same for send buffer.
597          */
598         netvsc_revoke_recv_buf(device, net_device, ndev);
599         if (vmbus_proto_version < VERSION_WIN10)
600                 netvsc_teardown_recv_gpadl(device, net_device, ndev);
601
602         netvsc_revoke_send_buf(device, net_device, ndev);
603         if (vmbus_proto_version < VERSION_WIN10)
604                 netvsc_teardown_send_gpadl(device, net_device, ndev);
605
606         RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
607
608         /* And disassociate NAPI context from device */
609         for (i = 0; i < net_device->num_chn; i++)
610                 netif_napi_del(&net_device->chan_table[i].napi);
611
612         /*
613          * At this point, no one should be accessing net_device
614          * except in here
615          */
616         netdev_dbg(ndev, "net device safe to remove\n");
617
618         /* Now, we can close the channel safely */
619         vmbus_close(device->channel);
620
621         /*
622          * If host is Win2016 or higher then we do the GPADL tear down
623          * here after VMBus is closed.
624         */
625         if (vmbus_proto_version >= VERSION_WIN10) {
626                 netvsc_teardown_recv_gpadl(device, net_device, ndev);
627                 netvsc_teardown_send_gpadl(device, net_device, ndev);
628         }
629
630         /* Release all resources */
631         free_netvsc_device_rcu(net_device);
632 }
633
634 #define RING_AVAIL_PERCENT_HIWATER 20
635 #define RING_AVAIL_PERCENT_LOWATER 10
636
637 static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
638                                          u32 index)
639 {
640         sync_change_bit(index, net_device->send_section_map);
641 }
642
643 static void netvsc_send_tx_complete(struct net_device *ndev,
644                                     struct netvsc_device *net_device,
645                                     struct vmbus_channel *channel,
646                                     const struct vmpacket_descriptor *desc,
647                                     int budget)
648 {
649         struct sk_buff *skb = (struct sk_buff *)(unsigned long)desc->trans_id;
650         struct net_device_context *ndev_ctx = netdev_priv(ndev);
651         u16 q_idx = 0;
652         int queue_sends;
653
654         /* Notify the layer above us */
655         if (likely(skb)) {
656                 const struct hv_netvsc_packet *packet
657                         = (struct hv_netvsc_packet *)skb->cb;
658                 u32 send_index = packet->send_buf_index;
659                 struct netvsc_stats *tx_stats;
660
661                 if (send_index != NETVSC_INVALID_INDEX)
662                         netvsc_free_send_slot(net_device, send_index);
663                 q_idx = packet->q_idx;
664
665                 tx_stats = &net_device->chan_table[q_idx].tx_stats;
666
667                 u64_stats_update_begin(&tx_stats->syncp);
668                 tx_stats->packets += packet->total_packets;
669                 tx_stats->bytes += packet->total_bytes;
670                 u64_stats_update_end(&tx_stats->syncp);
671
672                 napi_consume_skb(skb, budget);
673         }
674
675         queue_sends =
676                 atomic_dec_return(&net_device->chan_table[q_idx].queue_sends);
677
678         if (unlikely(net_device->destroy)) {
679                 if (queue_sends == 0)
680                         wake_up(&net_device->wait_drain);
681         } else {
682                 struct netdev_queue *txq = netdev_get_tx_queue(ndev, q_idx);
683
684                 if (netif_tx_queue_stopped(txq) &&
685                     (hv_get_avail_to_write_percent(&channel->outbound) >
686                      RING_AVAIL_PERCENT_HIWATER || queue_sends < 1)) {
687                         netif_tx_wake_queue(txq);
688                         ndev_ctx->eth_stats.wake_queue++;
689                 }
690         }
691 }
692
693 static void netvsc_send_completion(struct net_device *ndev,
694                                    struct netvsc_device *net_device,
695                                    struct vmbus_channel *incoming_channel,
696                                    const struct vmpacket_descriptor *desc,
697                                    int budget)
698 {
699         const struct nvsp_message *nvsp_packet = hv_pkt_data(desc);
700
701         switch (nvsp_packet->hdr.msg_type) {
702         case NVSP_MSG_TYPE_INIT_COMPLETE:
703         case NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE:
704         case NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE:
705         case NVSP_MSG5_TYPE_SUBCHANNEL:
706                 /* Copy the response back */
707                 memcpy(&net_device->channel_init_pkt, nvsp_packet,
708                        sizeof(struct nvsp_message));
709                 complete(&net_device->channel_init_wait);
710                 break;
711
712         case NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE:
713                 netvsc_send_tx_complete(ndev, net_device, incoming_channel,
714                                         desc, budget);
715                 break;
716
717         default:
718                 netdev_err(ndev,
719                            "Unknown send completion type %d received!!\n",
720                            nvsp_packet->hdr.msg_type);
721         }
722 }
723
724 static u32 netvsc_get_next_send_section(struct netvsc_device *net_device)
725 {
726         unsigned long *map_addr = net_device->send_section_map;
727         unsigned int i;
728
729         for_each_clear_bit(i, map_addr, net_device->send_section_cnt) {
730                 if (sync_test_and_set_bit(i, map_addr) == 0)
731                         return i;
732         }
733
734         return NETVSC_INVALID_INDEX;
735 }
736
737 static void netvsc_copy_to_send_buf(struct netvsc_device *net_device,
738                                     unsigned int section_index,
739                                     u32 pend_size,
740                                     struct hv_netvsc_packet *packet,
741                                     struct rndis_message *rndis_msg,
742                                     struct hv_page_buffer *pb,
743                                     bool xmit_more)
744 {
745         char *start = net_device->send_buf;
746         char *dest = start + (section_index * net_device->send_section_size)
747                      + pend_size;
748         int i;
749         u32 padding = 0;
750         u32 page_count = packet->cp_partial ? packet->rmsg_pgcnt :
751                 packet->page_buf_cnt;
752         u32 remain;
753
754         /* Add padding */
755         remain = packet->total_data_buflen & (net_device->pkt_align - 1);
756         if (xmit_more && remain) {
757                 padding = net_device->pkt_align - remain;
758                 rndis_msg->msg_len += padding;
759                 packet->total_data_buflen += padding;
760         }
761
762         for (i = 0; i < page_count; i++) {
763                 char *src = phys_to_virt(pb[i].pfn << PAGE_SHIFT);
764                 u32 offset = pb[i].offset;
765                 u32 len = pb[i].len;
766
767                 memcpy(dest, (src + offset), len);
768                 dest += len;
769         }
770
771         if (padding)
772                 memset(dest, 0, padding);
773 }
774
775 static inline int netvsc_send_pkt(
776         struct hv_device *device,
777         struct hv_netvsc_packet *packet,
778         struct netvsc_device *net_device,
779         struct hv_page_buffer *pb,
780         struct sk_buff *skb)
781 {
782         struct nvsp_message nvmsg;
783         struct nvsp_1_message_send_rndis_packet *rpkt =
784                 &nvmsg.msg.v1_msg.send_rndis_pkt;
785         struct netvsc_channel * const nvchan =
786                 &net_device->chan_table[packet->q_idx];
787         struct vmbus_channel *out_channel = nvchan->channel;
788         struct net_device *ndev = hv_get_drvdata(device);
789         struct net_device_context *ndev_ctx = netdev_priv(ndev);
790         struct netdev_queue *txq = netdev_get_tx_queue(ndev, packet->q_idx);
791         u64 req_id;
792         int ret;
793         u32 ring_avail = hv_get_avail_to_write_percent(&out_channel->outbound);
794
795         nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
796         if (skb)
797                 rpkt->channel_type = 0;         /* 0 is RMC_DATA */
798         else
799                 rpkt->channel_type = 1;         /* 1 is RMC_CONTROL */
800
801         rpkt->send_buf_section_index = packet->send_buf_index;
802         if (packet->send_buf_index == NETVSC_INVALID_INDEX)
803                 rpkt->send_buf_section_size = 0;
804         else
805                 rpkt->send_buf_section_size = packet->total_data_buflen;
806
807         req_id = (ulong)skb;
808
809         if (out_channel->rescind)
810                 return -ENODEV;
811
812         trace_nvsp_send_pkt(ndev, out_channel, rpkt);
813
814         if (packet->page_buf_cnt) {
815                 if (packet->cp_partial)
816                         pb += packet->rmsg_pgcnt;
817
818                 ret = vmbus_sendpacket_pagebuffer(out_channel,
819                                                   pb, packet->page_buf_cnt,
820                                                   &nvmsg, sizeof(nvmsg),
821                                                   req_id);
822         } else {
823                 ret = vmbus_sendpacket(out_channel,
824                                        &nvmsg, sizeof(nvmsg),
825                                        req_id, VM_PKT_DATA_INBAND,
826                                        VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
827         }
828
829         if (ret == 0) {
830                 atomic_inc_return(&nvchan->queue_sends);
831
832                 if (ring_avail < RING_AVAIL_PERCENT_LOWATER) {
833                         netif_tx_stop_queue(txq);
834                         ndev_ctx->eth_stats.stop_queue++;
835                 }
836         } else if (ret == -EAGAIN) {
837                 netif_tx_stop_queue(txq);
838                 ndev_ctx->eth_stats.stop_queue++;
839                 if (atomic_read(&nvchan->queue_sends) < 1) {
840                         netif_tx_wake_queue(txq);
841                         ndev_ctx->eth_stats.wake_queue++;
842                         ret = -ENOSPC;
843                 }
844         } else {
845                 netdev_err(ndev,
846                            "Unable to send packet pages %u len %u, ret %d\n",
847                            packet->page_buf_cnt, packet->total_data_buflen,
848                            ret);
849         }
850
851         return ret;
852 }
853
854 /* Move packet out of multi send data (msd), and clear msd */
855 static inline void move_pkt_msd(struct hv_netvsc_packet **msd_send,
856                                 struct sk_buff **msd_skb,
857                                 struct multi_send_data *msdp)
858 {
859         *msd_skb = msdp->skb;
860         *msd_send = msdp->pkt;
861         msdp->skb = NULL;
862         msdp->pkt = NULL;
863         msdp->count = 0;
864 }
865
866 /* RCU already held by caller */
867 int netvsc_send(struct net_device *ndev,
868                 struct hv_netvsc_packet *packet,
869                 struct rndis_message *rndis_msg,
870                 struct hv_page_buffer *pb,
871                 struct sk_buff *skb)
872 {
873         struct net_device_context *ndev_ctx = netdev_priv(ndev);
874         struct netvsc_device *net_device
875                 = rcu_dereference_bh(ndev_ctx->nvdev);
876         struct hv_device *device = ndev_ctx->device_ctx;
877         int ret = 0;
878         struct netvsc_channel *nvchan;
879         u32 pktlen = packet->total_data_buflen, msd_len = 0;
880         unsigned int section_index = NETVSC_INVALID_INDEX;
881         struct multi_send_data *msdp;
882         struct hv_netvsc_packet *msd_send = NULL, *cur_send = NULL;
883         struct sk_buff *msd_skb = NULL;
884         bool try_batch, xmit_more;
885
886         /* If device is rescinded, return error and packet will get dropped. */
887         if (unlikely(!net_device || net_device->destroy))
888                 return -ENODEV;
889
890         nvchan = &net_device->chan_table[packet->q_idx];
891         packet->send_buf_index = NETVSC_INVALID_INDEX;
892         packet->cp_partial = false;
893
894         /* Send control message directly without accessing msd (Multi-Send
895          * Data) field which may be changed during data packet processing.
896          */
897         if (!skb)
898                 return netvsc_send_pkt(device, packet, net_device, pb, skb);
899
900         /* batch packets in send buffer if possible */
901         msdp = &nvchan->msd;
902         if (msdp->pkt)
903                 msd_len = msdp->pkt->total_data_buflen;
904
905         try_batch =  msd_len > 0 && msdp->count < net_device->max_pkt;
906         if (try_batch && msd_len + pktlen + net_device->pkt_align <
907             net_device->send_section_size) {
908                 section_index = msdp->pkt->send_buf_index;
909
910         } else if (try_batch && msd_len + packet->rmsg_size <
911                    net_device->send_section_size) {
912                 section_index = msdp->pkt->send_buf_index;
913                 packet->cp_partial = true;
914
915         } else if (pktlen + net_device->pkt_align <
916                    net_device->send_section_size) {
917                 section_index = netvsc_get_next_send_section(net_device);
918                 if (unlikely(section_index == NETVSC_INVALID_INDEX)) {
919                         ++ndev_ctx->eth_stats.tx_send_full;
920                 } else {
921                         move_pkt_msd(&msd_send, &msd_skb, msdp);
922                         msd_len = 0;
923                 }
924         }
925
926         /* Keep aggregating only if stack says more data is coming
927          * and not doing mixed modes send and not flow blocked
928          */
929         xmit_more = skb->xmit_more &&
930                 !packet->cp_partial &&
931                 !netif_xmit_stopped(netdev_get_tx_queue(ndev, packet->q_idx));
932
933         if (section_index != NETVSC_INVALID_INDEX) {
934                 netvsc_copy_to_send_buf(net_device,
935                                         section_index, msd_len,
936                                         packet, rndis_msg, pb, xmit_more);
937
938                 packet->send_buf_index = section_index;
939
940                 if (packet->cp_partial) {
941                         packet->page_buf_cnt -= packet->rmsg_pgcnt;
942                         packet->total_data_buflen = msd_len + packet->rmsg_size;
943                 } else {
944                         packet->page_buf_cnt = 0;
945                         packet->total_data_buflen += msd_len;
946                 }
947
948                 if (msdp->pkt) {
949                         packet->total_packets += msdp->pkt->total_packets;
950                         packet->total_bytes += msdp->pkt->total_bytes;
951                 }
952
953                 if (msdp->skb)
954                         dev_consume_skb_any(msdp->skb);
955
956                 if (xmit_more) {
957                         msdp->skb = skb;
958                         msdp->pkt = packet;
959                         msdp->count++;
960                 } else {
961                         cur_send = packet;
962                         msdp->skb = NULL;
963                         msdp->pkt = NULL;
964                         msdp->count = 0;
965                 }
966         } else {
967                 move_pkt_msd(&msd_send, &msd_skb, msdp);
968                 cur_send = packet;
969         }
970
971         if (msd_send) {
972                 int m_ret = netvsc_send_pkt(device, msd_send, net_device,
973                                             NULL, msd_skb);
974
975                 if (m_ret != 0) {
976                         netvsc_free_send_slot(net_device,
977                                               msd_send->send_buf_index);
978                         dev_kfree_skb_any(msd_skb);
979                 }
980         }
981
982         if (cur_send)
983                 ret = netvsc_send_pkt(device, cur_send, net_device, pb, skb);
984
985         if (ret != 0 && section_index != NETVSC_INVALID_INDEX)
986                 netvsc_free_send_slot(net_device, section_index);
987
988         return ret;
989 }
990
991 /* Send pending recv completions */
992 static int send_recv_completions(struct net_device *ndev,
993                                  struct netvsc_device *nvdev,
994                                  struct netvsc_channel *nvchan)
995 {
996         struct multi_recv_comp *mrc = &nvchan->mrc;
997         struct recv_comp_msg {
998                 struct nvsp_message_header hdr;
999                 u32 status;
1000         }  __packed;
1001         struct recv_comp_msg msg = {
1002                 .hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE,
1003         };
1004         int ret;
1005
1006         while (mrc->first != mrc->next) {
1007                 const struct recv_comp_data *rcd
1008                         = mrc->slots + mrc->first;
1009
1010                 msg.status = rcd->status;
1011                 ret = vmbus_sendpacket(nvchan->channel, &msg, sizeof(msg),
1012                                        rcd->tid, VM_PKT_COMP, 0);
1013                 if (unlikely(ret)) {
1014                         struct net_device_context *ndev_ctx = netdev_priv(ndev);
1015
1016                         ++ndev_ctx->eth_stats.rx_comp_busy;
1017                         return ret;
1018                 }
1019
1020                 if (++mrc->first == nvdev->recv_completion_cnt)
1021                         mrc->first = 0;
1022         }
1023
1024         /* receive completion ring has been emptied */
1025         if (unlikely(nvdev->destroy))
1026                 wake_up(&nvdev->wait_drain);
1027
1028         return 0;
1029 }
1030
1031 /* Count how many receive completions are outstanding */
1032 static void recv_comp_slot_avail(const struct netvsc_device *nvdev,
1033                                  const struct multi_recv_comp *mrc,
1034                                  u32 *filled, u32 *avail)
1035 {
1036         u32 count = nvdev->recv_completion_cnt;
1037
1038         if (mrc->next >= mrc->first)
1039                 *filled = mrc->next - mrc->first;
1040         else
1041                 *filled = (count - mrc->first) + mrc->next;
1042
1043         *avail = count - *filled - 1;
1044 }
1045
1046 /* Add receive complete to ring to send to host. */
1047 static void enq_receive_complete(struct net_device *ndev,
1048                                  struct netvsc_device *nvdev, u16 q_idx,
1049                                  u64 tid, u32 status)
1050 {
1051         struct netvsc_channel *nvchan = &nvdev->chan_table[q_idx];
1052         struct multi_recv_comp *mrc = &nvchan->mrc;
1053         struct recv_comp_data *rcd;
1054         u32 filled, avail;
1055
1056         recv_comp_slot_avail(nvdev, mrc, &filled, &avail);
1057
1058         if (unlikely(filled > NAPI_POLL_WEIGHT)) {
1059                 send_recv_completions(ndev, nvdev, nvchan);
1060                 recv_comp_slot_avail(nvdev, mrc, &filled, &avail);
1061         }
1062
1063         if (unlikely(!avail)) {
1064                 netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n",
1065                            q_idx, tid);
1066                 return;
1067         }
1068
1069         rcd = mrc->slots + mrc->next;
1070         rcd->tid = tid;
1071         rcd->status = status;
1072
1073         if (++mrc->next == nvdev->recv_completion_cnt)
1074                 mrc->next = 0;
1075 }
1076
1077 static int netvsc_receive(struct net_device *ndev,
1078                           struct netvsc_device *net_device,
1079                           struct vmbus_channel *channel,
1080                           const struct vmpacket_descriptor *desc,
1081                           const struct nvsp_message *nvsp)
1082 {
1083         struct net_device_context *net_device_ctx = netdev_priv(ndev);
1084         const struct vmtransfer_page_packet_header *vmxferpage_packet
1085                 = container_of(desc, const struct vmtransfer_page_packet_header, d);
1086         u16 q_idx = channel->offermsg.offer.sub_channel_index;
1087         char *recv_buf = net_device->recv_buf;
1088         u32 status = NVSP_STAT_SUCCESS;
1089         int i;
1090         int count = 0;
1091
1092         /* Make sure this is a valid nvsp packet */
1093         if (unlikely(nvsp->hdr.msg_type != NVSP_MSG1_TYPE_SEND_RNDIS_PKT)) {
1094                 netif_err(net_device_ctx, rx_err, ndev,
1095                           "Unknown nvsp packet type received %u\n",
1096                           nvsp->hdr.msg_type);
1097                 return 0;
1098         }
1099
1100         if (unlikely(vmxferpage_packet->xfer_pageset_id != NETVSC_RECEIVE_BUFFER_ID)) {
1101                 netif_err(net_device_ctx, rx_err, ndev,
1102                           "Invalid xfer page set id - expecting %x got %x\n",
1103                           NETVSC_RECEIVE_BUFFER_ID,
1104                           vmxferpage_packet->xfer_pageset_id);
1105                 return 0;
1106         }
1107
1108         count = vmxferpage_packet->range_cnt;
1109
1110         /* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */
1111         for (i = 0; i < count; i++) {
1112                 u32 offset = vmxferpage_packet->ranges[i].byte_offset;
1113                 u32 buflen = vmxferpage_packet->ranges[i].byte_count;
1114                 void *data;
1115                 int ret;
1116
1117                 if (unlikely(offset + buflen > net_device->recv_buf_size)) {
1118                         status = NVSP_STAT_FAIL;
1119                         netif_err(net_device_ctx, rx_err, ndev,
1120                                   "Packet offset:%u + len:%u too big\n",
1121                                   offset, buflen);
1122
1123                         continue;
1124                 }
1125
1126                 data = recv_buf + offset;
1127
1128                 trace_rndis_recv(ndev, q_idx, data);
1129
1130                 /* Pass it to the upper layer */
1131                 ret = rndis_filter_receive(ndev, net_device,
1132                                            channel, data, buflen);
1133
1134                 if (unlikely(ret != NVSP_STAT_SUCCESS))
1135                         status = NVSP_STAT_FAIL;
1136         }
1137
1138         enq_receive_complete(ndev, net_device, q_idx,
1139                              vmxferpage_packet->d.trans_id, status);
1140
1141         return count;
1142 }
1143
1144 static void netvsc_send_table(struct net_device *ndev,
1145                               const struct nvsp_message *nvmsg)
1146 {
1147         struct net_device_context *net_device_ctx = netdev_priv(ndev);
1148         u32 count, *tab;
1149         int i;
1150
1151         count = nvmsg->msg.v5_msg.send_table.count;
1152         if (count != VRSS_SEND_TAB_SIZE) {
1153                 netdev_err(ndev, "Received wrong send-table size:%u\n", count);
1154                 return;
1155         }
1156
1157         tab = (u32 *)((unsigned long)&nvmsg->msg.v5_msg.send_table +
1158                       nvmsg->msg.v5_msg.send_table.offset);
1159
1160         for (i = 0; i < count; i++)
1161                 net_device_ctx->tx_table[i] = tab[i];
1162 }
1163
1164 static void netvsc_send_vf(struct net_device *ndev,
1165                            const struct nvsp_message *nvmsg)
1166 {
1167         struct net_device_context *net_device_ctx = netdev_priv(ndev);
1168
1169         net_device_ctx->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated;
1170         net_device_ctx->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial;
1171 }
1172
1173 static  void netvsc_receive_inband(struct net_device *ndev,
1174                                    const struct nvsp_message *nvmsg)
1175 {
1176         switch (nvmsg->hdr.msg_type) {
1177         case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE:
1178                 netvsc_send_table(ndev, nvmsg);
1179                 break;
1180
1181         case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION:
1182                 netvsc_send_vf(ndev, nvmsg);
1183                 break;
1184         }
1185 }
1186
1187 static int netvsc_process_raw_pkt(struct hv_device *device,
1188                                   struct vmbus_channel *channel,
1189                                   struct netvsc_device *net_device,
1190                                   struct net_device *ndev,
1191                                   const struct vmpacket_descriptor *desc,
1192                                   int budget)
1193 {
1194         const struct nvsp_message *nvmsg = hv_pkt_data(desc);
1195
1196         trace_nvsp_recv(ndev, channel, nvmsg);
1197
1198         switch (desc->type) {
1199         case VM_PKT_COMP:
1200                 netvsc_send_completion(ndev, net_device, channel,
1201                                        desc, budget);
1202                 break;
1203
1204         case VM_PKT_DATA_USING_XFER_PAGES:
1205                 return netvsc_receive(ndev, net_device, channel,
1206                                       desc, nvmsg);
1207                 break;
1208
1209         case VM_PKT_DATA_INBAND:
1210                 netvsc_receive_inband(ndev, nvmsg);
1211                 break;
1212
1213         default:
1214                 netdev_err(ndev, "unhandled packet type %d, tid %llx\n",
1215                            desc->type, desc->trans_id);
1216                 break;
1217         }
1218
1219         return 0;
1220 }
1221
1222 static struct hv_device *netvsc_channel_to_device(struct vmbus_channel *channel)
1223 {
1224         struct vmbus_channel *primary = channel->primary_channel;
1225
1226         return primary ? primary->device_obj : channel->device_obj;
1227 }
1228
1229 /* Network processing softirq
1230  * Process data in incoming ring buffer from host
1231  * Stops when ring is empty or budget is met or exceeded.
1232  */
1233 int netvsc_poll(struct napi_struct *napi, int budget)
1234 {
1235         struct netvsc_channel *nvchan
1236                 = container_of(napi, struct netvsc_channel, napi);
1237         struct netvsc_device *net_device = nvchan->net_device;
1238         struct vmbus_channel *channel = nvchan->channel;
1239         struct hv_device *device = netvsc_channel_to_device(channel);
1240         struct net_device *ndev = hv_get_drvdata(device);
1241         int work_done = 0;
1242
1243         /* If starting a new interval */
1244         if (!nvchan->desc)
1245                 nvchan->desc = hv_pkt_iter_first(channel);
1246
1247         while (nvchan->desc && work_done < budget) {
1248                 work_done += netvsc_process_raw_pkt(device, channel, net_device,
1249                                                     ndev, nvchan->desc, budget);
1250                 nvchan->desc = hv_pkt_iter_next(channel, nvchan->desc);
1251         }
1252
1253         /* If send of pending receive completions suceeded
1254          *   and did not exhaust NAPI budget this time
1255          *   and not doing busy poll
1256          * then re-enable host interrupts
1257          *     and reschedule if ring is not empty.
1258          */
1259         if (send_recv_completions(ndev, net_device, nvchan) == 0 &&
1260             work_done < budget &&
1261             napi_complete_done(napi, work_done) &&
1262             hv_end_read(&channel->inbound) &&
1263             napi_schedule_prep(napi)) {
1264                 hv_begin_read(&channel->inbound);
1265                 __napi_schedule(napi);
1266         }
1267
1268         /* Driver may overshoot since multiple packets per descriptor */
1269         return min(work_done, budget);
1270 }
1271
1272 /* Call back when data is available in host ring buffer.
1273  * Processing is deferred until network softirq (NAPI)
1274  */
1275 void netvsc_channel_cb(void *context)
1276 {
1277         struct netvsc_channel *nvchan = context;
1278         struct vmbus_channel *channel = nvchan->channel;
1279         struct hv_ring_buffer_info *rbi = &channel->inbound;
1280
1281         /* preload first vmpacket descriptor */
1282         prefetch(hv_get_ring_buffer(rbi) + rbi->priv_read_index);
1283
1284         if (napi_schedule_prep(&nvchan->napi)) {
1285                 /* disable interupts from host */
1286                 hv_begin_read(rbi);
1287
1288                 __napi_schedule_irqoff(&nvchan->napi);
1289         }
1290 }
1291
1292 /*
1293  * netvsc_device_add - Callback when the device belonging to this
1294  * driver is added
1295  */
1296 struct netvsc_device *netvsc_device_add(struct hv_device *device,
1297                                 const struct netvsc_device_info *device_info)
1298 {
1299         int i, ret = 0;
1300         struct netvsc_device *net_device;
1301         struct net_device *ndev = hv_get_drvdata(device);
1302         struct net_device_context *net_device_ctx = netdev_priv(ndev);
1303
1304         net_device = alloc_net_device();
1305         if (!net_device)
1306                 return ERR_PTR(-ENOMEM);
1307
1308         for (i = 0; i < VRSS_SEND_TAB_SIZE; i++)
1309                 net_device_ctx->tx_table[i] = 0;
1310
1311         /* Because the device uses NAPI, all the interrupt batching and
1312          * control is done via Net softirq, not the channel handling
1313          */
1314         set_channel_read_mode(device->channel, HV_CALL_ISR);
1315
1316         /* If we're reopening the device we may have multiple queues, fill the
1317          * chn_table with the default channel to use it before subchannels are
1318          * opened.
1319          * Initialize the channel state before we open;
1320          * we can be interrupted as soon as we open the channel.
1321          */
1322
1323         for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
1324                 struct netvsc_channel *nvchan = &net_device->chan_table[i];
1325
1326                 nvchan->channel = device->channel;
1327                 nvchan->net_device = net_device;
1328                 u64_stats_init(&nvchan->tx_stats.syncp);
1329                 u64_stats_init(&nvchan->rx_stats.syncp);
1330         }
1331
1332         /* Enable NAPI handler before init callbacks */
1333         netif_napi_add(ndev, &net_device->chan_table[0].napi,
1334                        netvsc_poll, NAPI_POLL_WEIGHT);
1335
1336         /* Open the channel */
1337         ret = vmbus_open(device->channel, netvsc_ring_bytes,
1338                          netvsc_ring_bytes,  NULL, 0,
1339                          netvsc_channel_cb, net_device->chan_table);
1340
1341         if (ret != 0) {
1342                 netdev_err(ndev, "unable to open channel: %d\n", ret);
1343                 goto cleanup;
1344         }
1345
1346         /* Channel is opened */
1347         netdev_dbg(ndev, "hv_netvsc channel opened successfully\n");
1348
1349         napi_enable(&net_device->chan_table[0].napi);
1350
1351         /* Connect with the NetVsp */
1352         ret = netvsc_connect_vsp(device, net_device, device_info);
1353         if (ret != 0) {
1354                 netdev_err(ndev,
1355                         "unable to connect to NetVSP - %d\n", ret);
1356                 goto close;
1357         }
1358
1359         /* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is
1360          * populated.
1361          */
1362         rcu_assign_pointer(net_device_ctx->nvdev, net_device);
1363
1364         return net_device;
1365
1366 close:
1367         RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
1368         napi_disable(&net_device->chan_table[0].napi);
1369
1370         /* Now, we can close the channel safely */
1371         vmbus_close(device->channel);
1372
1373 cleanup:
1374         netif_napi_del(&net_device->chan_table[0].napi);
1375         free_netvsc_device(&net_device->rcu);
1376
1377         return ERR_PTR(ret);
1378 }