drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c

   1 /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
   2 /* Copyright (c) 2018 Mellanox Technologies. */
   3
   4 #include <net/inet_ecn.h>
   5 #include <net/vxlan.h>
   6 #include <net/gre.h>
   7 #include <net/geneve.h>
   8 #include <net/bareudp.h>
   9 #include "en/tc_tun.h"
  10 #include "en/tc_priv.h"
  11 #include "en_tc.h"
  12 #include "rep/tc.h"
  13 #include "rep/neigh.h"
  14 #include "lag/lag.h"
  15 #include "lag/mp.h"
  16
  17 struct mlx5e_tc_tun_route_attr {
  18         struct net_device *out_dev;
  19         struct net_device *route_dev;
  20         union {
  21                 struct flowi4 fl4;
  22                 struct flowi6 fl6;
  23         } fl;
  24         struct neighbour *n;
  25         u8 ttl;
  26 };
  27
  28 #define TC_TUN_ROUTE_ATTR_INIT(name) struct mlx5e_tc_tun_route_attr name = {}
  29
  30 static void mlx5e_tc_tun_route_attr_cleanup(struct mlx5e_tc_tun_route_attr *attr)
  31 {
  32         if (attr->n)
  33                 neigh_release(attr->n);
  34         if (attr->route_dev)
  35                 dev_put(attr->route_dev);
  36 }
  37
  38 struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev)
  39 {
  40         if (netif_is_vxlan(tunnel_dev))
  41                 return &vxlan_tunnel;
  42         else if (netif_is_geneve(tunnel_dev))
  43                 return &geneve_tunnel;
  44         else if (netif_is_gretap(tunnel_dev) ||
  45                  netif_is_ip6gretap(tunnel_dev))
  46                 return &gre_tunnel;
  47         else if (netif_is_bareudp(tunnel_dev))
  48                 return &mplsoudp_tunnel;
  49         else
  50                 return NULL;
  51 }
  52
  53 static int get_route_and_out_devs(struct mlx5e_priv *priv,
  54                                   struct net_device *dev,
  55                                   struct net_device **route_dev,
  56                                   struct net_device **out_dev)
  57 {
  58         struct net_device *uplink_dev, *uplink_upper, *real_dev;
  59         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
  60         bool dst_is_lag_dev;
  61
  62         real_dev = is_vlan_dev(dev) ? vlan_dev_real_dev(dev) : dev;
  63         uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
  64
  65         rcu_read_lock();
  66         uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev);
  67         /* mlx5_lag_is_sriov() is a blocking function which can't be called
  68          * while holding rcu read lock. Take the net_device for correctness
  69          * sake.
  70          */
  71         if (uplink_upper)
  72                 dev_hold(uplink_upper);
  73         rcu_read_unlock();
  74
  75         dst_is_lag_dev = (uplink_upper &&
  76                           netif_is_lag_master(uplink_upper) &&
  77                           real_dev == uplink_upper &&
  78                           mlx5_lag_is_sriov(priv->mdev));
  79         if (uplink_upper)
  80                 dev_put(uplink_upper);
  81
  82         /* if the egress device isn't on the same HW e-switch or
  83          * it's a LAG device, use the uplink
  84          */
  85         *route_dev = dev;
  86         if (!netdev_port_same_parent_id(priv->netdev, real_dev) ||
  87             dst_is_lag_dev || is_vlan_dev(*route_dev) ||
  88             netif_is_ovs_master(*route_dev))
  89                 *out_dev = uplink_dev;
  90         else if (mlx5e_eswitch_rep(dev) &&
  91                  mlx5e_is_valid_eswitch_fwd_dev(priv, dev))
  92                 *out_dev = *route_dev;
  93         else
  94                 return -EOPNOTSUPP;
  95
  96         if (!(mlx5e_eswitch_rep(*out_dev) &&
  97               mlx5e_is_uplink_rep(netdev_priv(*out_dev))))
  98                 return -EOPNOTSUPP;
  99
 100         if (mlx5e_eswitch_uplink_rep(priv->netdev) && *out_dev != priv->netdev)
 101                 return -EOPNOTSUPP;
 102
 103         return 0;
 104 }
 105
 106 static int mlx5e_route_lookup_ipv4_get(struct mlx5e_priv *priv,
 107                                        struct net_device *dev,
 108                                        struct mlx5e_tc_tun_route_attr *attr)
 109 {
 110         struct net_device *route_dev;
 111         struct net_device *out_dev;
 112         struct neighbour *n;
 113         struct rtable *rt;
 114
 115 #if IS_ENABLED(CONFIG_INET)
 116         struct mlx5_core_dev *mdev = priv->mdev;
 117         struct net_device *uplink_dev;
 118         int ret;
 119
 120         if (mlx5_lag_is_multipath(mdev)) {
 121                 struct mlx5_eswitch *esw = mdev->priv.eswitch;
 122
 123                 uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
 124                 attr->fl.fl4.flowi4_oif = uplink_dev->ifindex;
 125         } else {
 126                 struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(dev);
 127
 128                 if (tunnel && tunnel->get_remote_ifindex)
 129                         attr->fl.fl4.flowi4_oif = tunnel->get_remote_ifindex(dev);
 130         }
 131
 132         rt = ip_route_output_key(dev_net(dev), &attr->fl.fl4);
 133         if (IS_ERR(rt))
 134                 return PTR_ERR(rt);
 135
 136         if (rt->rt_type != RTN_UNICAST) {
 137                 ret = -ENETUNREACH;
 138                 goto err_rt_release;
 139         }
 140
 141         if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET) {
 142                 ret = -ENETUNREACH;
 143                 goto err_rt_release;
 144         }
 145 #else
 146         return -EOPNOTSUPP;
 147 #endif
 148
 149         ret = get_route_and_out_devs(priv, rt->dst.dev, &route_dev, &out_dev);
 150         if (ret < 0)
 151                 goto err_rt_release;
 152         dev_hold(route_dev);
 153
 154         if (!attr->ttl)
 155                 attr->ttl = ip4_dst_hoplimit(&rt->dst);
 156         n = dst_neigh_lookup(&rt->dst, &attr->fl.fl4.daddr);
 157         if (!n) {
 158                 ret = -ENOMEM;
 159                 goto err_dev_release;
 160         }
 161
 162         ip_rt_put(rt);
 163         attr->route_dev = route_dev;
 164         attr->out_dev = out_dev;
 165         attr->n = n;
 166         return 0;
 167
 168 err_dev_release:
 169         dev_put(route_dev);
 170 err_rt_release:
 171         ip_rt_put(rt);
 172         return ret;
 173 }
 174
 175 static void mlx5e_route_lookup_ipv4_put(struct mlx5e_tc_tun_route_attr *attr)
 176 {
 177         mlx5e_tc_tun_route_attr_cleanup(attr);
 178 }
 179
 180 static const char *mlx5e_netdev_kind(struct net_device *dev)
 181 {
 182         if (dev->rtnl_link_ops)
 183                 return dev->rtnl_link_ops->kind;
 184         else
 185                 return "unknown";
 186 }
 187
 188 static int mlx5e_gen_ip_tunnel_header(char buf[], __u8 *ip_proto,
 189                                       struct mlx5e_encap_entry *e)
 190 {
 191         if (!e->tunnel) {
 192                 pr_warn("mlx5: Cannot generate tunnel header for this tunnel\n");
 193                 return -EOPNOTSUPP;
 194         }
 195
 196         return e->tunnel->generate_ip_tun_hdr(buf, ip_proto, e);
 197 }
 198
 199 static char *gen_eth_tnl_hdr(char *buf, struct net_device *dev,
 200                              struct mlx5e_encap_entry *e,
 201                              u16 proto)
 202 {
 203         struct ethhdr *eth = (struct ethhdr *)buf;
 204         char *ip;
 205
 206         ether_addr_copy(eth->h_dest, e->h_dest);
 207         ether_addr_copy(eth->h_source, dev->dev_addr);
 208         if (is_vlan_dev(dev)) {
 209                 struct vlan_hdr *vlan = (struct vlan_hdr *)
 210                                         ((char *)eth + ETH_HLEN);
 211                 ip = (char *)vlan + VLAN_HLEN;
 212                 eth->h_proto = vlan_dev_vlan_proto(dev);
 213                 vlan->h_vlan_TCI = htons(vlan_dev_vlan_id(dev));
 214                 vlan->h_vlan_encapsulated_proto = htons(proto);
 215         } else {
 216                 eth->h_proto = htons(proto);
 217                 ip = (char *)eth + ETH_HLEN;
 218         }
 219
 220         return ip;
 221 }
 222
 223 int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
 224                                     struct net_device *mirred_dev,
 225                                     struct mlx5e_encap_entry *e)
 226 {
 227         int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
 228         const struct ip_tunnel_key *tun_key = &e->tun_info->key;
 229         struct mlx5_pkt_reformat_params reformat_params;
 230         struct mlx5e_neigh m_neigh = {};
 231         TC_TUN_ROUTE_ATTR_INIT(attr);
 232         int ipv4_encap_size;
 233         char *encap_header;
 234         struct iphdr *ip;
 235         u8 nud_state;
 236         int err;
 237
 238         /* add the IP fields */
 239         attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK;
 240         attr.fl.fl4.daddr = tun_key->u.ipv4.dst;
 241         attr.fl.fl4.saddr = tun_key->u.ipv4.src;
 242         attr.ttl = tun_key->ttl;
 243
 244         err = mlx5e_route_lookup_ipv4_get(priv, mirred_dev, &attr);
 245         if (err)
 246                 return err;
 247
 248         ipv4_encap_size =
 249                 (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
 250                 sizeof(struct iphdr) +
 251                 e->tunnel->calc_hlen(e);
 252
 253         if (max_encap_size < ipv4_encap_size) {
 254                 mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
 255                                ipv4_encap_size, max_encap_size);
 256                 err = -EOPNOTSUPP;
 257                 goto release_neigh;
 258         }
 259
 260         encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL);
 261         if (!encap_header) {
 262                 err = -ENOMEM;
 263                 goto release_neigh;
 264         }
 265
 266         m_neigh.family = attr.n->ops->family;
 267         memcpy(&m_neigh.dst_ip, attr.n->primary_key, attr.n->tbl->key_len);
 268         e->out_dev = attr.out_dev;
 269         e->route_dev_ifindex = attr.route_dev->ifindex;
 270
 271         /* It's important to add the neigh to the hash table before checking
 272          * the neigh validity state. So if we'll get a notification, in case the
 273          * neigh changes it's validity state, we would find the relevant neigh
 274          * in the hash.
 275          */
 276         err = mlx5e_rep_encap_entry_attach(netdev_priv(attr.out_dev), e, &m_neigh, attr.n->dev);
 277         if (err)
 278                 goto free_encap;
 279
 280         read_lock_bh(&attr.n->lock);
 281         nud_state = attr.n->nud_state;
 282         ether_addr_copy(e->h_dest, attr.n->ha);
 283         read_unlock_bh(&attr.n->lock);
 284
 285         /* add ethernet header */
 286         ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e,
 287                                              ETH_P_IP);
 288
 289         /* add ip header */
 290         ip->tos = tun_key->tos;
 291         ip->version = 0x4;
 292         ip->ihl = 0x5;
 293         ip->ttl = attr.ttl;
 294         ip->daddr = attr.fl.fl4.daddr;
 295         ip->saddr = attr.fl.fl4.saddr;
 296
 297         /* add tunneling protocol header */
 298         err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr),
 299                                          &ip->protocol, e);
 300         if (err)
 301                 goto destroy_neigh_entry;
 302
 303         e->encap_size = ipv4_encap_size;
 304         e->encap_header = encap_header;
 305
 306         if (!(nud_state & NUD_VALID)) {
 307                 neigh_event_send(attr.n, NULL);
 308                 /* the encap entry will be made valid on neigh update event
 309                  * and not used before that.
 310                  */
 311                 goto release_neigh;
 312         }
 313
 314         memset(&reformat_params, 0, sizeof(reformat_params));
 315         reformat_params.type = e->reformat_type;
 316         reformat_params.size = ipv4_encap_size;
 317         reformat_params.data = encap_header;
 318         e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params,
 319                                                      MLX5_FLOW_NAMESPACE_FDB);
 320         if (IS_ERR(e->pkt_reformat)) {
 321                 err = PTR_ERR(e->pkt_reformat);
 322                 goto destroy_neigh_entry;
 323         }
 324
 325         e->flags |= MLX5_ENCAP_ENTRY_VALID;
 326         mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
 327         mlx5e_route_lookup_ipv4_put(&attr);
 328         return err;
 329
 330 destroy_neigh_entry:
 331         mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
 332 free_encap:
 333         kfree(encap_header);
 334 release_neigh:
 335         mlx5e_route_lookup_ipv4_put(&attr);
 336         return err;
 337 }
 338
 339 int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv,
 340                                     struct net_device *mirred_dev,
 341                                     struct mlx5e_encap_entry *e)
 342 {
 343         int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
 344         const struct ip_tunnel_key *tun_key = &e->tun_info->key;
 345         struct mlx5_pkt_reformat_params reformat_params;
 346         TC_TUN_ROUTE_ATTR_INIT(attr);
 347         int ipv4_encap_size;
 348         char *encap_header;
 349         struct iphdr *ip;
 350         u8 nud_state;
 351         int err;
 352
 353         /* add the IP fields */
 354         attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK;
 355         attr.fl.fl4.daddr = tun_key->u.ipv4.dst;
 356         attr.fl.fl4.saddr = tun_key->u.ipv4.src;
 357         attr.ttl = tun_key->ttl;
 358
 359         err = mlx5e_route_lookup_ipv4_get(priv, mirred_dev, &attr);
 360         if (err)
 361                 return err;
 362
 363         ipv4_encap_size =
 364                 (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
 365                 sizeof(struct iphdr) +
 366                 e->tunnel->calc_hlen(e);
 367
 368         if (max_encap_size < ipv4_encap_size) {
 369                 mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
 370                                ipv4_encap_size, max_encap_size);
 371                 err = -EOPNOTSUPP;
 372                 goto release_neigh;
 373         }
 374
 375         encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL);
 376         if (!encap_header) {
 377                 err = -ENOMEM;
 378                 goto release_neigh;
 379         }
 380
 381         e->route_dev_ifindex = attr.route_dev->ifindex;
 382
 383         read_lock_bh(&attr.n->lock);
 384         nud_state = attr.n->nud_state;
 385         ether_addr_copy(e->h_dest, attr.n->ha);
 386         WRITE_ONCE(e->nhe->neigh_dev, attr.n->dev);
 387         read_unlock_bh(&attr.n->lock);
 388
 389         /* add ethernet header */
 390         ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e,
 391                                              ETH_P_IP);
 392
 393         /* add ip header */
 394         ip->tos = tun_key->tos;
 395         ip->version = 0x4;
 396         ip->ihl = 0x5;
 397         ip->ttl = attr.ttl;
 398         ip->daddr = attr.fl.fl4.daddr;
 399         ip->saddr = attr.fl.fl4.saddr;
 400
 401         /* add tunneling protocol header */
 402         err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr),
 403                                          &ip->protocol, e);
 404         if (err)
 405                 goto free_encap;
 406
 407         e->encap_size = ipv4_encap_size;
 408         kfree(e->encap_header);
 409         e->encap_header = encap_header;
 410
 411         if (!(nud_state & NUD_VALID)) {
 412                 neigh_event_send(attr.n, NULL);
 413                 /* the encap entry will be made valid on neigh update event
 414                  * and not used before that.
 415                  */
 416                 goto release_neigh;
 417         }
 418
 419         memset(&reformat_params, 0, sizeof(reformat_params));
 420         reformat_params.type = e->reformat_type;
 421         reformat_params.size = ipv4_encap_size;
 422         reformat_params.data = encap_header;
 423         e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params,
 424                                                      MLX5_FLOW_NAMESPACE_FDB);
 425         if (IS_ERR(e->pkt_reformat)) {
 426                 err = PTR_ERR(e->pkt_reformat);
 427                 goto free_encap;
 428         }
 429
 430         e->flags |= MLX5_ENCAP_ENTRY_VALID;
 431         mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
 432         mlx5e_route_lookup_ipv4_put(&attr);
 433         return err;
 434
 435 free_encap:
 436         kfree(encap_header);
 437 release_neigh:
 438         mlx5e_route_lookup_ipv4_put(&attr);
 439         return err;
 440 }
 441
 442 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
 443 static int mlx5e_route_lookup_ipv6_get(struct mlx5e_priv *priv,
 444                                        struct net_device *dev,
 445                                        struct mlx5e_tc_tun_route_attr *attr)
 446 {
 447         struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(dev);
 448         struct net_device *route_dev;
 449         struct net_device *out_dev;
 450         struct dst_entry *dst;
 451         struct neighbour *n;
 452         int ret;
 453
 454         if (tunnel && tunnel->get_remote_ifindex)
 455                 attr->fl.fl6.flowi6_oif = tunnel->get_remote_ifindex(dev);
 456         dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(dev), NULL, &attr->fl.fl6,
 457                                               NULL);
 458         if (IS_ERR(dst))
 459                 return PTR_ERR(dst);
 460
 461         if (!attr->ttl)
 462                 attr->ttl = ip6_dst_hoplimit(dst);
 463
 464         ret = get_route_and_out_devs(priv, dst->dev, &route_dev, &out_dev);
 465         if (ret < 0)
 466                 goto err_dst_release;
 467
 468         dev_hold(route_dev);
 469         n = dst_neigh_lookup(dst, &attr->fl.fl6.daddr);
 470         if (!n) {
 471                 ret = -ENOMEM;
 472                 goto err_dev_release;
 473         }
 474
 475         dst_release(dst);
 476         attr->out_dev = out_dev;
 477         attr->route_dev = route_dev;
 478         attr->n = n;
 479         return 0;
 480
 481 err_dev_release:
 482         dev_put(route_dev);
 483 err_dst_release:
 484         dst_release(dst);
 485         return ret;
 486 }
 487
 488 static void mlx5e_route_lookup_ipv6_put(struct mlx5e_tc_tun_route_attr *attr)
 489 {
 490         mlx5e_tc_tun_route_attr_cleanup(attr);
 491 }
 492
 493 int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
 494                                     struct net_device *mirred_dev,
 495                                     struct mlx5e_encap_entry *e)
 496 {
 497         int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
 498         const struct ip_tunnel_key *tun_key = &e->tun_info->key;
 499         struct mlx5_pkt_reformat_params reformat_params;
 500         struct mlx5e_neigh m_neigh = {};
 501         TC_TUN_ROUTE_ATTR_INIT(attr);
 502         struct ipv6hdr *ip6h;
 503         int ipv6_encap_size;
 504         char *encap_header;
 505         u8 nud_state;
 506         int err;
 507
 508         attr.ttl = tun_key->ttl;
 509         attr.fl.fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
 510         attr.fl.fl6.daddr = tun_key->u.ipv6.dst;
 511         attr.fl.fl6.saddr = tun_key->u.ipv6.src;
 512
 513         err = mlx5e_route_lookup_ipv6_get(priv, mirred_dev, &attr);
 514         if (err)
 515                 return err;
 516
 517         ipv6_encap_size =
 518                 (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
 519                 sizeof(struct ipv6hdr) +
 520                 e->tunnel->calc_hlen(e);
 521
 522         if (max_encap_size < ipv6_encap_size) {
 523                 mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
 524                                ipv6_encap_size, max_encap_size);
 525                 err = -EOPNOTSUPP;
 526                 goto release_neigh;
 527         }
 528
 529         encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL);
 530         if (!encap_header) {
 531                 err = -ENOMEM;
 532                 goto release_neigh;
 533         }
 534
 535         m_neigh.family = attr.n->ops->family;
 536         memcpy(&m_neigh.dst_ip, attr.n->primary_key, attr.n->tbl->key_len);
 537         e->out_dev = attr.out_dev;
 538         e->route_dev_ifindex = attr.route_dev->ifindex;
 539
 540         /* It's important to add the neigh to the hash table before checking
 541          * the neigh validity state. So if we'll get a notification, in case the
 542          * neigh changes it's validity state, we would find the relevant neigh
 543          * in the hash.
 544          */
 545         err = mlx5e_rep_encap_entry_attach(netdev_priv(attr.out_dev), e, &m_neigh, attr.n->dev);
 546         if (err)
 547                 goto free_encap;
 548
 549         read_lock_bh(&attr.n->lock);
 550         nud_state = attr.n->nud_state;
 551         ether_addr_copy(e->h_dest, attr.n->ha);
 552         read_unlock_bh(&attr.n->lock);
 553
 554         /* add ethernet header */
 555         ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e,
 556                                                  ETH_P_IPV6);
 557
 558         /* add ip header */
 559         ip6_flow_hdr(ip6h, tun_key->tos, 0);
 560         /* the HW fills up ipv6 payload len */
 561         ip6h->hop_limit   = attr.ttl;
 562         ip6h->daddr       = attr.fl.fl6.daddr;
 563         ip6h->saddr       = attr.fl.fl6.saddr;
 564
 565         /* add tunneling protocol header */
 566         err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr),
 567                                          &ip6h->nexthdr, e);
 568         if (err)
 569                 goto destroy_neigh_entry;
 570
 571         e->encap_size = ipv6_encap_size;
 572         e->encap_header = encap_header;
 573
 574         if (!(nud_state & NUD_VALID)) {
 575                 neigh_event_send(attr.n, NULL);
 576                 /* the encap entry will be made valid on neigh update event
 577                  * and not used before that.
 578                  */
 579                 goto release_neigh;
 580         }
 581
 582         memset(&reformat_params, 0, sizeof(reformat_params));
 583         reformat_params.type = e->reformat_type;
 584         reformat_params.size = ipv6_encap_size;
 585         reformat_params.data = encap_header;
 586         e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params,
 587                                                      MLX5_FLOW_NAMESPACE_FDB);
 588         if (IS_ERR(e->pkt_reformat)) {
 589                 err = PTR_ERR(e->pkt_reformat);
 590                 goto destroy_neigh_entry;
 591         }
 592
 593         e->flags |= MLX5_ENCAP_ENTRY_VALID;
 594         mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
 595         mlx5e_route_lookup_ipv6_put(&attr);
 596         return err;
 597
 598 destroy_neigh_entry:
 599         mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
 600 free_encap:
 601         kfree(encap_header);
 602 release_neigh:
 603         mlx5e_route_lookup_ipv6_put(&attr);
 604         return err;
 605 }
 606
 607 int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
 608                                     struct net_device *mirred_dev,
 609                                     struct mlx5e_encap_entry *e)
 610 {
 611         int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
 612         const struct ip_tunnel_key *tun_key = &e->tun_info->key;
 613         struct mlx5_pkt_reformat_params reformat_params;
 614         TC_TUN_ROUTE_ATTR_INIT(attr);
 615         struct ipv6hdr *ip6h;
 616         int ipv6_encap_size;
 617         char *encap_header;
 618         u8 nud_state;
 619         int err;
 620
 621         attr.ttl = tun_key->ttl;
 622
 623         attr.fl.fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
 624         attr.fl.fl6.daddr = tun_key->u.ipv6.dst;
 625         attr.fl.fl6.saddr = tun_key->u.ipv6.src;
 626
 627         err = mlx5e_route_lookup_ipv6_get(priv, mirred_dev, &attr);
 628         if (err)
 629                 return err;
 630
 631         ipv6_encap_size =
 632                 (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
 633                 sizeof(struct ipv6hdr) +
 634                 e->tunnel->calc_hlen(e);
 635
 636         if (max_encap_size < ipv6_encap_size) {
 637                 mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
 638                                ipv6_encap_size, max_encap_size);
 639                 err = -EOPNOTSUPP;
 640                 goto release_neigh;
 641         }
 642
 643         encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL);
 644         if (!encap_header) {
 645                 err = -ENOMEM;
 646                 goto release_neigh;
 647         }
 648
 649         e->route_dev_ifindex = attr.route_dev->ifindex;
 650
 651         read_lock_bh(&attr.n->lock);
 652         nud_state = attr.n->nud_state;
 653         ether_addr_copy(e->h_dest, attr.n->ha);
 654         WRITE_ONCE(e->nhe->neigh_dev, attr.n->dev);
 655         read_unlock_bh(&attr.n->lock);
 656
 657         /* add ethernet header */
 658         ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e,
 659                                                  ETH_P_IPV6);
 660
 661         /* add ip header */
 662         ip6_flow_hdr(ip6h, tun_key->tos, 0);
 663         /* the HW fills up ipv6 payload len */
 664         ip6h->hop_limit   = attr.ttl;
 665         ip6h->daddr       = attr.fl.fl6.daddr;
 666         ip6h->saddr       = attr.fl.fl6.saddr;
 667
 668         /* add tunneling protocol header */
 669         err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr),
 670                                          &ip6h->nexthdr, e);
 671         if (err)
 672                 goto free_encap;
 673
 674         e->encap_size = ipv6_encap_size;
 675         kfree(e->encap_header);
 676         e->encap_header = encap_header;
 677
 678         if (!(nud_state & NUD_VALID)) {
 679                 neigh_event_send(attr.n, NULL);
 680                 /* the encap entry will be made valid on neigh update event
 681                  * and not used before that.
 682                  */
 683                 goto release_neigh;
 684         }
 685
 686         memset(&reformat_params, 0, sizeof(reformat_params));
 687         reformat_params.type = e->reformat_type;
 688         reformat_params.size = ipv6_encap_size;
 689         reformat_params.data = encap_header;
 690         e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params,
 691                                                      MLX5_FLOW_NAMESPACE_FDB);
 692         if (IS_ERR(e->pkt_reformat)) {
 693                 err = PTR_ERR(e->pkt_reformat);
 694                 goto free_encap;
 695         }
 696
 697         e->flags |= MLX5_ENCAP_ENTRY_VALID;
 698         mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
 699         mlx5e_route_lookup_ipv6_put(&attr);
 700         return err;
 701
 702 free_encap:
 703         kfree(encap_header);
 704 release_neigh:
 705         mlx5e_route_lookup_ipv6_put(&attr);
 706         return err;
 707 }
 708 #endif
 709
 710 int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv,
 711                               struct mlx5_flow_spec *spec,
 712                               struct mlx5_flow_attr *flow_attr,
 713                               struct net_device *filter_dev)
 714 {
 715         struct mlx5_esw_flow_attr *esw_attr = flow_attr->esw_attr;
 716         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 717         struct mlx5e_tc_int_port *int_port;
 718         TC_TUN_ROUTE_ATTR_INIT(attr);
 719         u16 vport_num;
 720         int err = 0;
 721
 722         if (flow_attr->tun_ip_version == 4) {
 723                 /* Addresses are swapped for decap */
 724                 attr.fl.fl4.saddr = esw_attr->rx_tun_attr->dst_ip.v4;
 725                 attr.fl.fl4.daddr = esw_attr->rx_tun_attr->src_ip.v4;
 726                 err = mlx5e_route_lookup_ipv4_get(priv, filter_dev, &attr);
 727         }
 728 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
 729         else if (flow_attr->tun_ip_version == 6) {
 730                 /* Addresses are swapped for decap */
 731                 attr.fl.fl6.saddr = esw_attr->rx_tun_attr->dst_ip.v6;
 732                 attr.fl.fl6.daddr = esw_attr->rx_tun_attr->src_ip.v6;
 733                 err = mlx5e_route_lookup_ipv6_get(priv, filter_dev, &attr);
 734         }
 735 #endif
 736         else
 737                 return 0;
 738
 739         if (err)
 740                 return err;
 741
 742         if (attr.route_dev->netdev_ops == &mlx5e_netdev_ops &&
 743             mlx5e_tc_is_vf_tunnel(attr.out_dev, attr.route_dev)) {
 744                 err = mlx5e_tc_query_route_vport(attr.out_dev, attr.route_dev, &vport_num);
 745                 if (err)
 746                         goto out;
 747
 748                 esw_attr->rx_tun_attr->vni = MLX5_GET(fte_match_param, spec->match_value,
 749                                                       misc_parameters.vxlan_vni);
 750                 esw_attr->rx_tun_attr->decap_vport = vport_num;
 751         } else if (netif_is_ovs_master(attr.route_dev) && mlx5e_tc_int_port_supported(esw)) {
 752                 int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv),
 753                                                  attr.route_dev->ifindex,
 754                                                  MLX5E_TC_INT_PORT_INGRESS);
 755                 if (IS_ERR(int_port)) {
 756                         err = PTR_ERR(int_port);
 757                         goto out;
 758                 }
 759                 esw_attr->int_port = int_port;
 760         }
 761
 762 out:
 763         if (flow_attr->tun_ip_version == 4)
 764                 mlx5e_route_lookup_ipv4_put(&attr);
 765 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
 766         else if (flow_attr->tun_ip_version == 6)
 767                 mlx5e_route_lookup_ipv6_put(&attr);
 768 #endif
 769         return err;
 770 }
 771
 772 bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv,
 773                                     struct net_device *netdev)
 774 {
 775         struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(netdev);
 776
 777         if (tunnel && tunnel->can_offload(priv))
 778                 return true;
 779         else
 780                 return false;
 781 }
 782
 783 int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev,
 784                                  struct mlx5e_priv *priv,
 785                                  struct mlx5e_encap_entry *e,
 786                                  struct netlink_ext_ack *extack)
 787 {
 788         struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(tunnel_dev);
 789
 790         if (!tunnel) {
 791                 e->reformat_type = -1;
 792                 return -EOPNOTSUPP;
 793         }
 794
 795         return tunnel->init_encap_attr(tunnel_dev, priv, e, extack);
 796 }
 797
 798 int mlx5e_tc_tun_parse(struct net_device *filter_dev,
 799                        struct mlx5e_priv *priv,
 800                        struct mlx5_flow_spec *spec,
 801                        struct flow_cls_offload *f,
 802                        u8 *match_level)
 803 {
 804         struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev);
 805         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
 806         void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
 807                                        outer_headers);
 808         void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 809                                        outer_headers);
 810         struct netlink_ext_ack *extack = f->common.extack;
 811         int err = 0;
 812
 813         if (!tunnel) {
 814                 netdev_warn(priv->netdev,
 815                             "decapsulation offload is not supported for %s net device\n",
 816                             mlx5e_netdev_kind(filter_dev));
 817                 err = -EOPNOTSUPP;
 818                 goto out;
 819         }
 820
 821         *match_level = tunnel->match_level;
 822
 823         if (tunnel->parse_udp_ports) {
 824                 err = tunnel->parse_udp_ports(priv, spec, f,
 825                                               headers_c, headers_v);
 826                 if (err)
 827                         goto out;
 828         }
 829
 830         if (tunnel->parse_tunnel) {
 831                 err = tunnel->parse_tunnel(priv, spec, f,
 832                                            headers_c, headers_v);
 833                 if (err)
 834                         goto out;
 835         }
 836
 837         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
 838                 struct flow_dissector_key_basic key_basic = {};
 839                 struct flow_dissector_key_basic mask_basic = {
 840                         .n_proto = htons(0xFFFF),
 841                 };
 842                 struct flow_match_basic match_basic = {
 843                         .key = &key_basic, .mask = &mask_basic,
 844                 };
 845                 struct flow_match_control match;
 846                 u16 addr_type;
 847
 848                 flow_rule_match_enc_control(rule, &match);
 849                 addr_type = match.key->addr_type;
 850
 851                 /* For tunnel addr_type used same key id`s as for non-tunnel */
 852                 if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
 853                         struct flow_match_ipv4_addrs match;
 854
 855                         flow_rule_match_enc_ipv4_addrs(rule, &match);
 856                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 857                                  src_ipv4_src_ipv6.ipv4_layout.ipv4,
 858                                  ntohl(match.mask->src));
 859                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 860                                  src_ipv4_src_ipv6.ipv4_layout.ipv4,
 861                                  ntohl(match.key->src));
 862
 863                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 864                                  dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
 865                                  ntohl(match.mask->dst));
 866                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 867                                  dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
 868                                  ntohl(match.key->dst));
 869
 870                         key_basic.n_proto = htons(ETH_P_IP);
 871                         mlx5e_tc_set_ethertype(priv->mdev, &match_basic, true,
 872                                                headers_c, headers_v);
 873                 } else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
 874                         struct flow_match_ipv6_addrs match;
 875
 876                         flow_rule_match_enc_ipv6_addrs(rule, &match);
 877                         memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 878                                             src_ipv4_src_ipv6.ipv6_layout.ipv6),
 879                                &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout,
 880                                                                    ipv6));
 881                         memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 882                                             src_ipv4_src_ipv6.ipv6_layout.ipv6),
 883                                &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout,
 884                                                                   ipv6));
 885
 886                         memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 887                                             dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
 888                                &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
 889                                                                    ipv6));
 890                         memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 891                                             dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
 892                                &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
 893                                                                   ipv6));
 894
 895                         key_basic.n_proto = htons(ETH_P_IPV6);
 896                         mlx5e_tc_set_ethertype(priv->mdev, &match_basic, true,
 897                                                headers_c, headers_v);
 898                 }
 899         }
 900
 901         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
 902                 struct flow_match_ip match;
 903
 904                 flow_rule_match_enc_ip(rule, &match);
 905                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
 906                          match.mask->tos & 0x3);
 907                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
 908                          match.key->tos & 0x3);
 909
 910                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
 911                          match.mask->tos >> 2);
 912                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
 913                          match.key->tos  >> 2);
 914
 915                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
 916                          match.mask->ttl);
 917                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
 918                          match.key->ttl);
 919
 920                 if (match.mask->ttl &&
 921                     !MLX5_CAP_ESW_FLOWTABLE_FDB
 922                         (priv->mdev,
 923                          ft_field_support.outer_ipv4_ttl)) {
 924                         NL_SET_ERR_MSG_MOD(extack,
 925                                            "Matching on TTL is not supported");
 926                         err = -EOPNOTSUPP;
 927                         goto out;
 928                 }
 929         }
 930
 931         /* let software handle IP fragments */
 932         MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
 933         MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0);
 934
 935         return 0;
 936
 937 out:
 938         return err;
 939 }
 940
 941 int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv,
 942                                  struct mlx5_flow_spec *spec,
 943                                  struct flow_cls_offload *f,
 944                                  void *headers_c,
 945                                  void *headers_v)
 946 {
 947         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
 948         struct netlink_ext_ack *extack = f->common.extack;
 949         struct flow_match_ports enc_ports;
 950
 951         /* Full udp dst port must be given */
 952
 953         if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
 954                 NL_SET_ERR_MSG_MOD(extack,
 955                                    "UDP tunnel decap filter must include enc_dst_port condition");
 956                 netdev_warn(priv->netdev,
 957                             "UDP tunnel decap filter must include enc_dst_port condition\n");
 958                 return -EOPNOTSUPP;
 959         }
 960
 961         flow_rule_match_enc_ports(rule, &enc_ports);
 962
 963         if (memchr_inv(&enc_ports.mask->dst, 0xff,
 964                        sizeof(enc_ports.mask->dst))) {
 965                 NL_SET_ERR_MSG_MOD(extack,
 966                                    "UDP tunnel decap filter must match enc_dst_port fully");
 967                 netdev_warn(priv->netdev,
 968                             "UDP tunnel decap filter must match enc_dst_port fully\n");
 969                 return -EOPNOTSUPP;
 970         }
 971
 972         /* match on UDP protocol and dst port number */
 973
 974         MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
 975         MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP);
 976
 977         MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
 978                  ntohs(enc_ports.mask->dst));
 979         MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
 980                  ntohs(enc_ports.key->dst));
 981
 982         /* UDP src port on outer header is generated by HW,
 983          * so it is probably a bad idea to request matching it.
 984          * Nonetheless, it is allowed.
 985          */
 986
 987         MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
 988                  ntohs(enc_ports.mask->src));
 989         MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
 990                  ntohs(enc_ports.key->src));
 991
 992         return 0;
 993 }