netfs: fix test for whether we can skip read when writing beyond EOF
[linux-2.6-microblaze.git] / drivers / net / ethernet / mellanox / mlx5 / core / en / rep / neigh.c
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies. */
3
4 #include <linux/refcount.h>
5 #include <linux/list.h>
6 #include <linux/rculist.h>
7 #include <linux/rtnetlink.h>
8 #include <linux/workqueue.h>
9 #include <linux/spinlock.h>
10 #include <linux/notifier.h>
11 #include <net/netevent.h>
12 #include <net/arp.h>
13 #include "neigh.h"
14 #include "tc.h"
15 #include "en_rep.h"
16 #include "fs_core.h"
17 #include "diag/en_rep_tracepoint.h"
18
19 static unsigned long mlx5e_rep_ipv6_interval(void)
20 {
21         if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl)
22                 return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME);
23
24         return ~0UL;
25 }
26
27 static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv)
28 {
29         unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
30         unsigned long ipv6_interval = mlx5e_rep_ipv6_interval();
31         struct net_device *netdev = rpriv->netdev;
32         struct mlx5e_priv *priv = netdev_priv(netdev);
33
34         rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval);
35         mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval);
36 }
37
38 void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv)
39 {
40         struct mlx5e_rep_priv *rpriv = priv->ppriv;
41         struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
42
43         mlx5_fc_queue_stats_work(priv->mdev,
44                                  &neigh_update->neigh_stats_work,
45                                  neigh_update->min_interval);
46 }
47
48 static bool mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe)
49 {
50         return refcount_inc_not_zero(&nhe->refcnt);
51 }
52
53 static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe);
54
55 void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe)
56 {
57         if (refcount_dec_and_test(&nhe->refcnt)) {
58                 mlx5e_rep_neigh_entry_remove(nhe);
59                 kfree_rcu(nhe, rcu);
60         }
61 }
62
63 static struct mlx5e_neigh_hash_entry *
64 mlx5e_get_next_nhe(struct mlx5e_rep_priv *rpriv,
65                    struct mlx5e_neigh_hash_entry *nhe)
66 {
67         struct mlx5e_neigh_hash_entry *next = NULL;
68
69         rcu_read_lock();
70
71         for (next = nhe ?
72                      list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
73                                            &nhe->neigh_list,
74                                            struct mlx5e_neigh_hash_entry,
75                                            neigh_list) :
76                      list_first_or_null_rcu(&rpriv->neigh_update.neigh_list,
77                                             struct mlx5e_neigh_hash_entry,
78                                             neigh_list);
79              next;
80              next = list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
81                                           &next->neigh_list,
82                                           struct mlx5e_neigh_hash_entry,
83                                           neigh_list))
84                 if (mlx5e_rep_neigh_entry_hold(next))
85                         break;
86
87         rcu_read_unlock();
88
89         if (nhe)
90                 mlx5e_rep_neigh_entry_release(nhe);
91
92         return next;
93 }
94
95 static void mlx5e_rep_neigh_stats_work(struct work_struct *work)
96 {
97         struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv,
98                                                     neigh_update.neigh_stats_work.work);
99         struct net_device *netdev = rpriv->netdev;
100         struct mlx5e_priv *priv = netdev_priv(netdev);
101         struct mlx5e_neigh_hash_entry *nhe = NULL;
102
103         rtnl_lock();
104         if (!list_empty(&rpriv->neigh_update.neigh_list))
105                 mlx5e_rep_queue_neigh_stats_work(priv);
106
107         while ((nhe = mlx5e_get_next_nhe(rpriv, nhe)) != NULL)
108                 mlx5e_tc_update_neigh_used_value(nhe);
109
110         rtnl_unlock();
111 }
112
113 struct neigh_update_work {
114         struct work_struct work;
115         struct neighbour *n;
116         struct mlx5e_neigh_hash_entry *nhe;
117 };
118
119 static void mlx5e_release_neigh_update_work(struct neigh_update_work *update_work)
120 {
121         neigh_release(update_work->n);
122         mlx5e_rep_neigh_entry_release(update_work->nhe);
123         kfree(update_work);
124 }
125
126 static void mlx5e_rep_neigh_update(struct work_struct *work)
127 {
128         struct neigh_update_work *update_work = container_of(work, struct neigh_update_work,
129                                                              work);
130         struct mlx5e_neigh_hash_entry *nhe = update_work->nhe;
131         struct neighbour *n = update_work->n;
132         bool neigh_connected, same_dev;
133         struct mlx5e_encap_entry *e;
134         unsigned char ha[ETH_ALEN];
135         struct mlx5e_priv *priv;
136         u8 nud_state, dead;
137
138         rtnl_lock();
139
140         /* If these parameters are changed after we release the lock,
141          * we'll receive another event letting us know about it.
142          * We use this lock to avoid inconsistency between the neigh validity
143          * and it's hw address.
144          */
145         read_lock_bh(&n->lock);
146         memcpy(ha, n->ha, ETH_ALEN);
147         nud_state = n->nud_state;
148         dead = n->dead;
149         same_dev = READ_ONCE(nhe->neigh_dev) == n->dev;
150         read_unlock_bh(&n->lock);
151
152         neigh_connected = (nud_state & NUD_VALID) && !dead;
153
154         trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected);
155
156         if (!same_dev)
157                 goto out;
158
159         list_for_each_entry(e, &nhe->encap_list, encap_list) {
160                 if (!mlx5e_encap_take(e))
161                         continue;
162
163                 priv = netdev_priv(e->out_dev);
164                 mlx5e_rep_update_flows(priv, e, neigh_connected, ha);
165                 mlx5e_encap_put(priv, e);
166         }
167 out:
168         rtnl_unlock();
169         mlx5e_release_neigh_update_work(update_work);
170 }
171
172 static struct neigh_update_work *mlx5e_alloc_neigh_update_work(struct mlx5e_priv *priv,
173                                                                struct neighbour *n)
174 {
175         struct neigh_update_work *update_work;
176         struct mlx5e_neigh_hash_entry *nhe;
177         struct mlx5e_neigh m_neigh = {};
178
179         update_work = kzalloc(sizeof(*update_work), GFP_ATOMIC);
180         if (WARN_ON(!update_work))
181                 return NULL;
182
183         m_neigh.family = n->ops->family;
184         memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
185
186         /* Obtain reference to nhe as last step in order not to release it in
187          * atomic context.
188          */
189         rcu_read_lock();
190         nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh);
191         rcu_read_unlock();
192         if (!nhe) {
193                 kfree(update_work);
194                 return NULL;
195         }
196
197         INIT_WORK(&update_work->work, mlx5e_rep_neigh_update);
198         neigh_hold(n);
199         update_work->n = n;
200         update_work->nhe = nhe;
201
202         return update_work;
203 }
204
205 static int mlx5e_rep_netevent_event(struct notifier_block *nb,
206                                     unsigned long event, void *ptr)
207 {
208         struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
209                                                     neigh_update.netevent_nb);
210         struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
211         struct net_device *netdev = rpriv->netdev;
212         struct mlx5e_priv *priv = netdev_priv(netdev);
213         struct mlx5e_neigh_hash_entry *nhe = NULL;
214         struct neigh_update_work *update_work;
215         struct neigh_parms *p;
216         struct neighbour *n;
217         bool found = false;
218
219         switch (event) {
220         case NETEVENT_NEIGH_UPDATE:
221                 n = ptr;
222 #if IS_ENABLED(CONFIG_IPV6)
223                 if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl)
224 #else
225                 if (n->tbl != &arp_tbl)
226 #endif
227                         return NOTIFY_DONE;
228
229                 update_work = mlx5e_alloc_neigh_update_work(priv, n);
230                 if (!update_work)
231                         return NOTIFY_DONE;
232
233                 queue_work(priv->wq, &update_work->work);
234                 break;
235
236         case NETEVENT_DELAY_PROBE_TIME_UPDATE:
237                 p = ptr;
238
239                 /* We check the device is present since we don't care about
240                  * changes in the default table, we only care about changes
241                  * done per device delay prob time parameter.
242                  */
243 #if IS_ENABLED(CONFIG_IPV6)
244                 if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl))
245 #else
246                 if (!p->dev || p->tbl != &arp_tbl)
247 #endif
248                         return NOTIFY_DONE;
249
250                 rcu_read_lock();
251                 list_for_each_entry_rcu(nhe, &neigh_update->neigh_list,
252                                         neigh_list) {
253                         if (p->dev == READ_ONCE(nhe->neigh_dev)) {
254                                 found = true;
255                                 break;
256                         }
257                 }
258                 rcu_read_unlock();
259                 if (!found)
260                         return NOTIFY_DONE;
261
262                 neigh_update->min_interval = min_t(unsigned long,
263                                                    NEIGH_VAR(p, DELAY_PROBE_TIME),
264                                                    neigh_update->min_interval);
265                 mlx5_fc_update_sampling_interval(priv->mdev,
266                                                  neigh_update->min_interval);
267                 break;
268         }
269         return NOTIFY_DONE;
270 }
271
272 static const struct rhashtable_params mlx5e_neigh_ht_params = {
273         .head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node),
274         .key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh),
275         .key_len = sizeof(struct mlx5e_neigh),
276         .automatic_shrinking = true,
277 };
278
279 int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv)
280 {
281         struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
282         int err;
283
284         err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params);
285         if (err)
286                 goto out_err;
287
288         INIT_LIST_HEAD(&neigh_update->neigh_list);
289         mutex_init(&neigh_update->encap_lock);
290         INIT_DELAYED_WORK(&neigh_update->neigh_stats_work,
291                           mlx5e_rep_neigh_stats_work);
292         mlx5e_rep_neigh_update_init_interval(rpriv);
293
294         neigh_update->netevent_nb.notifier_call = mlx5e_rep_netevent_event;
295         err = register_netevent_notifier(&neigh_update->netevent_nb);
296         if (err)
297                 goto out_notifier;
298         return 0;
299
300 out_notifier:
301         neigh_update->netevent_nb.notifier_call = NULL;
302         rhashtable_destroy(&neigh_update->neigh_ht);
303 out_err:
304         netdev_warn(rpriv->netdev,
305                     "Failed to initialize neighbours handling for vport %d\n",
306                     rpriv->rep->vport);
307         return err;
308 }
309
310 void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv)
311 {
312         struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
313         struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
314
315         if (!rpriv->neigh_update.netevent_nb.notifier_call)
316                 return;
317
318         unregister_netevent_notifier(&neigh_update->netevent_nb);
319
320         flush_workqueue(priv->wq); /* flush neigh update works */
321
322         cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work);
323
324         mutex_destroy(&neigh_update->encap_lock);
325         rhashtable_destroy(&neigh_update->neigh_ht);
326 }
327
328 static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv,
329                                         struct mlx5e_neigh_hash_entry *nhe)
330 {
331         struct mlx5e_rep_priv *rpriv = priv->ppriv;
332         int err;
333
334         err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht,
335                                      &nhe->rhash_node,
336                                      mlx5e_neigh_ht_params);
337         if (err)
338                 return err;
339
340         list_add_rcu(&nhe->neigh_list, &rpriv->neigh_update.neigh_list);
341
342         return err;
343 }
344
345 static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe)
346 {
347         struct mlx5e_rep_priv *rpriv = nhe->priv->ppriv;
348
349         mutex_lock(&rpriv->neigh_update.encap_lock);
350
351         list_del_rcu(&nhe->neigh_list);
352
353         rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht,
354                                &nhe->rhash_node,
355                                mlx5e_neigh_ht_params);
356         mutex_unlock(&rpriv->neigh_update.encap_lock);
357 }
358
359 /* This function must only be called under the representor's encap_lock or
360  * inside rcu read lock section.
361  */
362 struct mlx5e_neigh_hash_entry *
363 mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
364                              struct mlx5e_neigh *m_neigh)
365 {
366         struct mlx5e_rep_priv *rpriv = priv->ppriv;
367         struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
368         struct mlx5e_neigh_hash_entry *nhe;
369
370         nhe = rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh,
371                                      mlx5e_neigh_ht_params);
372         return nhe && mlx5e_rep_neigh_entry_hold(nhe) ? nhe : NULL;
373 }
374
375 int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
376                                  struct mlx5e_neigh *m_neigh,
377                                  struct net_device *neigh_dev,
378                                  struct mlx5e_neigh_hash_entry **nhe)
379 {
380         int err;
381
382         *nhe = kzalloc(sizeof(**nhe), GFP_KERNEL);
383         if (!*nhe)
384                 return -ENOMEM;
385
386         (*nhe)->priv = priv;
387         memcpy(&(*nhe)->m_neigh, m_neigh, sizeof(*m_neigh));
388         spin_lock_init(&(*nhe)->encap_list_lock);
389         INIT_LIST_HEAD(&(*nhe)->encap_list);
390         refcount_set(&(*nhe)->refcnt, 1);
391         WRITE_ONCE((*nhe)->neigh_dev, neigh_dev);
392
393         err = mlx5e_rep_neigh_entry_insert(priv, *nhe);
394         if (err)
395                 goto out_free;
396         return 0;
397
398 out_free:
399         kfree(*nhe);
400         return err;
401 }