2 * Copyright (c) 2015, Mellanox Technologies, Ltd. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #ifndef __MLX5_ESWITCH_H__
34 #define __MLX5_ESWITCH_H__
36 #include <linux/if_ether.h>
37 #include <linux/if_link.h>
38 #include <net/devlink.h>
39 #include <linux/mlx5/device.h>
40 #include <linux/mlx5/eswitch.h>
41 #include <linux/mlx5/vport.h>
42 #include <linux/mlx5/fs.h>
45 #ifdef CONFIG_MLX5_ESWITCH
47 #define MLX5_MAX_UC_PER_VPORT(dev) \
48 (1 << MLX5_CAP_GEN(dev, log_max_current_uc_list))
50 #define MLX5_MAX_MC_PER_VPORT(dev) \
51 (1 << MLX5_CAP_GEN(dev, log_max_current_mc_list))
53 #define MLX5_MIN_BW_SHARE 1
55 #define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \
56 min_t(u32, max_t(u32, (rate) / (divider), MLX5_MIN_BW_SHARE), limit)
58 #define mlx5_esw_has_fwd_fdb(dev) \
59 MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_to_table)
61 #define FDB_MAX_CHAIN 3
62 #define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1)
63 #define FDB_MAX_PRIO 16
65 struct vport_ingress {
66 struct mlx5_flow_table *acl;
67 struct mlx5_flow_group *allow_untagged_spoofchk_grp;
68 struct mlx5_flow_group *allow_spoofchk_only_grp;
69 struct mlx5_flow_group *allow_untagged_only_grp;
70 struct mlx5_flow_group *drop_grp;
71 int modify_metadata_id;
72 struct mlx5_flow_handle *modify_metadata_rule;
73 struct mlx5_flow_handle *allow_rule;
74 struct mlx5_flow_handle *drop_rule;
75 struct mlx5_fc *drop_counter;
79 struct mlx5_flow_table *acl;
80 struct mlx5_flow_group *allowed_vlans_grp;
81 struct mlx5_flow_group *drop_grp;
82 struct mlx5_flow_handle *allowed_vlan;
83 struct mlx5_flow_handle *drop_rule;
84 struct mlx5_fc *drop_counter;
87 struct mlx5_vport_drop_stats {
92 struct mlx5_vport_info {
105 struct mlx5_core_dev *dev;
107 struct hlist_head uc_list[MLX5_L2_ADDR_HASH_SIZE];
108 struct hlist_head mc_list[MLX5_L2_ADDR_HASH_SIZE];
109 struct mlx5_flow_handle *promisc_rule;
110 struct mlx5_flow_handle *allmulti_rule;
111 struct work_struct vport_change_handler;
113 struct vport_ingress ingress;
114 struct vport_egress egress;
116 struct mlx5_vport_info info;
128 enum offloads_fdb_flags {
129 ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED = BIT(0),
132 extern const unsigned int ESW_POOLS[4];
134 #define PRIO_LEVELS 2
135 struct mlx5_eswitch_fdb {
138 struct mlx5_flow_table *fdb;
139 struct mlx5_flow_group *addr_grp;
140 struct mlx5_flow_group *allmulti_grp;
141 struct mlx5_flow_group *promisc_grp;
142 struct mlx5_flow_table *vepa_fdb;
143 struct mlx5_flow_handle *vepa_uplink_rule;
144 struct mlx5_flow_handle *vepa_star_rule;
147 struct offloads_fdb {
148 struct mlx5_flow_table *slow_fdb;
149 struct mlx5_flow_group *send_to_vport_grp;
150 struct mlx5_flow_group *peer_miss_grp;
151 struct mlx5_flow_handle **peer_miss_rules;
152 struct mlx5_flow_group *miss_grp;
153 struct mlx5_flow_handle *miss_rule_uni;
154 struct mlx5_flow_handle *miss_rule_multi;
155 int vlan_push_pop_refcount;
158 struct mlx5_flow_table *fdb;
160 } fdb_prio[FDB_MAX_CHAIN + 1][FDB_MAX_PRIO + 1][PRIO_LEVELS];
161 /* Protects fdb_prio table */
162 struct mutex fdb_prio_lock;
164 int fdb_left[ARRAY_SIZE(ESW_POOLS)];
170 struct mlx5_esw_offload {
171 struct mlx5_flow_table *ft_offloads;
172 struct mlx5_flow_group *vport_rx_group;
173 struct mlx5_eswitch_rep *vport_reps;
174 struct list_head peer_flows;
175 struct mutex peer_mutex;
176 DECLARE_HASHTABLE(encap_tbl, 8);
177 DECLARE_HASHTABLE(mod_hdr_tbl, 8);
178 DECLARE_HASHTABLE(termtbl_tbl, 8);
179 struct mutex termtbl_mutex; /* protects termtbl hash */
180 const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES];
183 enum devlink_eswitch_encap_mode encap;
186 /* E-Switch MC FDB table hash node */
187 struct esw_mc_addr { /* SRIOV only */
188 struct l2addr_node node;
189 struct mlx5_flow_handle *uplink_rule; /* Forward to uplink rule */
193 struct mlx5_host_work {
194 struct work_struct work;
195 struct mlx5_eswitch *esw;
198 struct mlx5_esw_functions {
204 MLX5_ESWITCH_VPORT_MATCH_METADATA = BIT(0),
207 struct mlx5_eswitch {
208 struct mlx5_core_dev *dev;
210 struct mlx5_eswitch_fdb fdb_table;
211 struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE];
212 struct workqueue_struct *work_queue;
213 struct mlx5_vport *vports;
217 /* Synchronize between vport change events
218 * and async SRIOV admin state changes
220 struct mutex state_lock;
221 struct esw_mc_addr mc_promisc;
228 struct mlx5_esw_offload offloads;
232 u16 first_host_vport;
233 struct mlx5_esw_functions esw_funcs;
236 void esw_offloads_cleanup(struct mlx5_eswitch *esw);
237 int esw_offloads_init(struct mlx5_eswitch *esw);
238 void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw);
239 int esw_offloads_init_reps(struct mlx5_eswitch *esw);
240 void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw,
241 struct mlx5_vport *vport);
242 int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw,
243 struct mlx5_vport *vport);
244 void esw_vport_cleanup_egress_rules(struct mlx5_eswitch *esw,
245 struct mlx5_vport *vport);
246 int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw,
247 struct mlx5_vport *vport);
248 void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw,
249 struct mlx5_vport *vport);
250 void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw,
251 struct mlx5_vport *vport);
252 void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
253 struct mlx5_vport *vport);
256 int mlx5_eswitch_init(struct mlx5_core_dev *dev);
257 void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw);
258 int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode);
259 void mlx5_eswitch_disable(struct mlx5_eswitch *esw);
260 int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
261 u16 vport, u8 mac[ETH_ALEN]);
262 int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
263 u16 vport, int link_state);
264 int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
265 u16 vport, u16 vlan, u8 qos);
266 int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
267 u16 vport, bool spoofchk);
268 int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
269 u16 vport_num, bool setting);
270 int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport,
271 u32 max_rate, u32 min_rate);
272 int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting);
273 int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting);
274 int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
275 u16 vport, struct ifla_vf_info *ivi);
276 int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
278 struct ifla_vf_stats *vf_stats);
279 void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule);
281 int mlx5_eswitch_modify_esw_vport_context(struct mlx5_eswitch *esw, u16 vport,
282 void *in, int inlen);
283 int mlx5_eswitch_query_esw_vport_context(struct mlx5_eswitch *esw, u16 vport,
284 void *out, int outlen);
286 struct mlx5_flow_spec;
287 struct mlx5_esw_flow_attr;
288 struct mlx5_termtbl_handle;
291 mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw,
292 struct mlx5_flow_act *flow_act,
293 struct mlx5_flow_spec *spec);
295 struct mlx5_flow_handle *
296 mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw,
297 struct mlx5_flow_table *ft,
298 struct mlx5_flow_spec *spec,
299 struct mlx5_esw_flow_attr *attr,
300 struct mlx5_flow_act *flow_act,
301 struct mlx5_flow_destination *dest,
305 mlx5_eswitch_termtbl_put(struct mlx5_eswitch *esw,
306 struct mlx5_termtbl_handle *tt);
308 struct mlx5_flow_handle *
309 mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
310 struct mlx5_flow_spec *spec,
311 struct mlx5_esw_flow_attr *attr);
312 struct mlx5_flow_handle *
313 mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
314 struct mlx5_flow_spec *spec,
315 struct mlx5_esw_flow_attr *attr);
317 mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
318 struct mlx5_flow_handle *rule,
319 struct mlx5_esw_flow_attr *attr);
321 mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw,
322 struct mlx5_flow_handle *rule,
323 struct mlx5_esw_flow_attr *attr);
326 mlx5_eswitch_prios_supported(struct mlx5_eswitch *esw);
329 mlx5_eswitch_get_prio_range(struct mlx5_eswitch *esw);
332 mlx5_eswitch_get_chain_range(struct mlx5_eswitch *esw);
334 struct mlx5_flow_handle *
335 mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport,
336 struct mlx5_flow_destination *dest);
339 SET_VLAN_STRIP = BIT(0),
340 SET_VLAN_INSERT = BIT(1)
343 enum mlx5_flow_match_level {
344 MLX5_MATCH_NONE = MLX5_INLINE_MODE_NONE,
345 MLX5_MATCH_L2 = MLX5_INLINE_MODE_L2,
346 MLX5_MATCH_L3 = MLX5_INLINE_MODE_IP,
347 MLX5_MATCH_L4 = MLX5_INLINE_MODE_TCP_UDP,
350 /* current maximum for flow based vport multicasting */
351 #define MLX5_MAX_FLOW_FWD_VPORTS 2
354 MLX5_ESW_DEST_ENCAP = BIT(0),
355 MLX5_ESW_DEST_ENCAP_VALID = BIT(1),
358 struct mlx5_esw_flow_attr {
359 struct mlx5_eswitch_rep *in_rep;
360 struct mlx5_core_dev *in_mdev;
361 struct mlx5_core_dev *counter_dev;
367 __be16 vlan_proto[MLX5_FS_VLAN_DEPTH];
368 u16 vlan_vid[MLX5_FS_VLAN_DEPTH];
369 u8 vlan_prio[MLX5_FS_VLAN_DEPTH];
374 struct mlx5_eswitch_rep *rep;
375 struct mlx5_core_dev *mdev;
377 struct mlx5_termtbl_handle *termtbl;
378 } dests[MLX5_MAX_FLOW_FWD_VPORTS];
381 u8 tunnel_match_level;
382 struct mlx5_fc *counter;
386 struct mlx5e_tc_flow_parse_attr *parse_attr;
389 int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
390 struct netlink_ext_ack *extack);
391 int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode);
392 int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
393 struct netlink_ext_ack *extack);
394 int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode);
395 int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode);
396 int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
397 enum devlink_eswitch_encap_mode encap,
398 struct netlink_ext_ack *extack);
399 int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
400 enum devlink_eswitch_encap_mode *encap);
401 void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type);
403 int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
404 struct mlx5_esw_flow_attr *attr);
405 int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
406 struct mlx5_esw_flow_attr *attr);
407 int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
408 u16 vport, u16 vlan, u8 qos, u8 set_flags);
410 static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev,
413 bool ret = MLX5_CAP_ESW_FLOWTABLE_FDB(dev, pop_vlan) &&
414 MLX5_CAP_ESW_FLOWTABLE_FDB(dev, push_vlan);
419 return ret && MLX5_CAP_ESW_FLOWTABLE_FDB(dev, pop_vlan_2) &&
420 MLX5_CAP_ESW_FLOWTABLE_FDB(dev, push_vlan_2);
423 bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0,
424 struct mlx5_core_dev *dev1);
425 bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0,
426 struct mlx5_core_dev *dev1);
428 const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev);
430 #define MLX5_DEBUG_ESWITCH_MASK BIT(3)
432 #define esw_info(__dev, format, ...) \
433 dev_info((__dev)->device, "E-Switch: " format, ##__VA_ARGS__)
435 #define esw_warn(__dev, format, ...) \
436 dev_warn((__dev)->device, "E-Switch: " format, ##__VA_ARGS__)
438 #define esw_debug(dev, format, ...) \
439 mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__)
441 /* The returned number is valid only when the dev is eswitch manager. */
442 static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev)
444 return mlx5_core_is_ecpf_esw_manager(dev) ?
445 MLX5_VPORT_ECPF : MLX5_VPORT_PF;
448 static inline u16 mlx5_eswitch_first_host_vport_num(struct mlx5_core_dev *dev)
450 return mlx5_core_is_ecpf_esw_manager(dev) ?
451 MLX5_VPORT_PF : MLX5_VPORT_FIRST_VF;
454 static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev)
456 /* Ideally device should have the functions changed supported
457 * capability regardless of it being ECPF or PF wherever such
458 * event should be processed such as on eswitch manager device.
459 * However, some ECPF based device might not have this capability
460 * set. Hence OR for ECPF check to cover such device.
462 return MLX5_CAP_ESW(dev, esw_functions_changed) ||
463 mlx5_core_is_ecpf_esw_manager(dev);
466 static inline int mlx5_eswitch_uplink_idx(struct mlx5_eswitch *esw)
468 /* Uplink always locate at the last element of the array.*/
469 return esw->total_vports - 1;
472 static inline int mlx5_eswitch_ecpf_idx(struct mlx5_eswitch *esw)
474 return esw->total_vports - 2;
477 static inline int mlx5_eswitch_vport_num_to_index(struct mlx5_eswitch *esw,
480 if (vport_num == MLX5_VPORT_ECPF) {
481 if (!mlx5_ecpf_vport_exists(esw->dev))
482 esw_warn(esw->dev, "ECPF vport doesn't exist!\n");
483 return mlx5_eswitch_ecpf_idx(esw);
486 if (vport_num == MLX5_VPORT_UPLINK)
487 return mlx5_eswitch_uplink_idx(esw);
492 static inline u16 mlx5_eswitch_index_to_vport_num(struct mlx5_eswitch *esw,
495 if (index == mlx5_eswitch_ecpf_idx(esw) &&
496 mlx5_ecpf_vport_exists(esw->dev))
497 return MLX5_VPORT_ECPF;
499 if (index == mlx5_eswitch_uplink_idx(esw))
500 return MLX5_VPORT_UPLINK;
505 /* TODO: This mlx5e_tc function shouldn't be called by eswitch */
506 void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw);
508 /* The vport getter/iterator are only valid after esw->total_vports
509 * and vport->vport are initialized in mlx5_eswitch_init.
511 #define mlx5_esw_for_all_vports(esw, i, vport) \
512 for ((i) = MLX5_VPORT_PF; \
513 (vport) = &(esw)->vports[i], \
514 (i) < (esw)->total_vports; (i)++)
516 #define mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs) \
517 for ((i) = MLX5_VPORT_FIRST_VF; \
518 (vport) = &(esw)->vports[(i)], \
519 (i) <= (nvfs); (i)++)
521 #define mlx5_esw_for_each_vf_vport_reverse(esw, i, vport, nvfs) \
523 (vport) = &(esw)->vports[(i)], \
524 (i) >= MLX5_VPORT_FIRST_VF; (i)--)
526 /* The rep getter/iterator are only valid after esw->total_vports
527 * and vport->vport are initialized in mlx5_eswitch_init.
529 #define mlx5_esw_for_all_reps(esw, i, rep) \
530 for ((i) = MLX5_VPORT_PF; \
531 (rep) = &(esw)->offloads.vport_reps[i], \
532 (i) < (esw)->total_vports; (i)++)
534 #define mlx5_esw_for_each_vf_rep(esw, i, rep, nvfs) \
535 for ((i) = MLX5_VPORT_FIRST_VF; \
536 (rep) = &(esw)->offloads.vport_reps[i], \
537 (i) <= (nvfs); (i)++)
539 #define mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, nvfs) \
541 (rep) = &(esw)->offloads.vport_reps[i], \
542 (i) >= MLX5_VPORT_FIRST_VF; (i)--)
544 #define mlx5_esw_for_each_vf_vport_num(esw, vport, nvfs) \
545 for ((vport) = MLX5_VPORT_FIRST_VF; (vport) <= (nvfs); (vport)++)
547 #define mlx5_esw_for_each_vf_vport_num_reverse(esw, vport, nvfs) \
548 for ((vport) = (nvfs); (vport) >= MLX5_VPORT_FIRST_VF; (vport)--)
550 /* Includes host PF (vport 0) if it's not esw manager. */
551 #define mlx5_esw_for_each_host_func_rep(esw, i, rep, nvfs) \
552 for ((i) = (esw)->first_host_vport; \
553 (rep) = &(esw)->offloads.vport_reps[i], \
554 (i) <= (nvfs); (i)++)
556 #define mlx5_esw_for_each_host_func_rep_reverse(esw, i, rep, nvfs) \
558 (rep) = &(esw)->offloads.vport_reps[i], \
559 (i) >= (esw)->first_host_vport; (i)--)
561 #define mlx5_esw_for_each_host_func_vport(esw, vport, nvfs) \
562 for ((vport) = (esw)->first_host_vport; \
563 (vport) <= (nvfs); (vport)++)
565 #define mlx5_esw_for_each_host_func_vport_reverse(esw, vport, nvfs) \
566 for ((vport) = (nvfs); \
567 (vport) >= (esw)->first_host_vport; (vport)--)
569 struct mlx5_vport *__must_check
570 mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num);
572 bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num);
574 void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs);
575 int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data);
577 #else /* CONFIG_MLX5_ESWITCH */
578 /* eswitch API stubs */
579 static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
580 static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {}
581 static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) { return 0; }
582 static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw) {}
583 static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; }
584 static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; }
585 static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev)
587 return ERR_PTR(-EOPNOTSUPP);
590 static inline void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs) {}
592 #define FDB_MAX_CHAIN 1
593 #define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1)
594 #define FDB_MAX_PRIO 1
596 #endif /* CONFIG_MLX5_ESWITCH */
598 #endif /* __MLX5_ESWITCH_H__ */