int iif, int type, u32 portid, u32 seq,
unsigned int flags);
static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
- struct in6_addr *daddr,
- struct in6_addr *saddr);
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr);
#ifdef CONFIG_IPV6_ROUTE_INFO
static struct fib6_info *rt6_add_route_info(struct net *net,
{
struct rt6_info *pcpu_rt, **p;
- p = this_cpu_ptr(res->f6i->rt6i_pcpu);
+ p = this_cpu_ptr(res->nh->rt6i_pcpu);
pcpu_rt = *p;
if (pcpu_rt)
}
dst_hold(&pcpu_rt->dst);
- p = this_cpu_ptr(res->f6i->rt6i_pcpu);
+ p = this_cpu_ptr(res->nh->rt6i_pcpu);
prev = cmpxchg(p, NULL, pcpu_rt);
BUG_ON(prev);
+ if (res->f6i->fib6_destroying) {
+ struct fib6_info *from;
+
+ from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
+ fib6_info_release(from);
+ }
+
return pcpu_rt;
}
return err;
}
-void rt6_flush_exceptions(struct fib6_info *rt)
+static void fib6_nh_flush_exceptions(struct fib6_nh *nh, struct fib6_info *from)
{
struct rt6_exception_bucket *bucket;
struct rt6_exception *rt6_ex;
spin_lock_bh(&rt6_exception_lock);
/* Prevent rt6_insert_exception() to recreate the bucket list */
- rt->exception_bucket_flushed = 1;
+ from->exception_bucket_flushed = 1;
- bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+ bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
lockdep_is_held(&rt6_exception_lock));
if (!bucket)
goto out;
spin_unlock_bh(&rt6_exception_lock);
}
+void rt6_flush_exceptions(struct fib6_info *f6i)
+{
+ fib6_nh_flush_exceptions(&f6i->fib6_nh, f6i);
+}
+
/* Find cached rt in the hash table inside passed in rt
* Caller has to hold rcu_read_lock()
*/
static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
- struct in6_addr *daddr,
- struct in6_addr *saddr)
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr)
{
+ const struct in6_addr *src_key = NULL;
struct rt6_exception_bucket *bucket;
- struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
struct rt6_info *ret = NULL;
- bucket = rcu_dereference(res->f6i->rt6i_exception_bucket);
-
#ifdef CONFIG_IPV6_SUBTREES
/* fib6i_src.plen != 0 indicates f6i is in subtree
* and exception table is indexed by a hash of
* both fib6_dst and fib6_src.
- * Otherwise, the exception table is indexed by
- * a hash of only fib6_dst.
+ * However, the src addr used to create the hash
+ * might not be exactly the passed in saddr which
+ * is a /128 addr from the flow.
+ * So we need to use f6i->fib6_src to redo lookup
+ * if the passed in saddr does not find anything.
+ * (See the logic in ip6_rt_cache_alloc() on how
+ * rt->rt6i_src is updated.)
*/
if (res->f6i->fib6_src.plen)
src_key = saddr;
+find_ex:
#endif
+ bucket = rcu_dereference(res->f6i->rt6i_exception_bucket);
rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
ret = rt6_ex->rt6i;
+#ifdef CONFIG_IPV6_SUBTREES
+ /* Use fib6_src as src_key and redo lookup */
+ if (!ret && src_key && src_key != &res->f6i->fib6_src.addr) {
+ src_key = &res->f6i->fib6_src.addr;
+ goto find_ex;
+ }
+#endif
+
return ret;
}
/* Remove the passed in cached rt from the hash table that contains it */
-static int rt6_remove_exception_rt(struct rt6_info *rt)
+static int fib6_nh_remove_exception(const struct fib6_info *from, int plen,
+ const struct rt6_info *rt)
{
+ const struct in6_addr *src_key = NULL;
struct rt6_exception_bucket *bucket;
- struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
- struct fib6_info *from;
int err;
- from = rcu_dereference(rt->from);
- if (!from ||
- !(rt->rt6i_flags & RTF_CACHE))
- return -EINVAL;
-
if (!rcu_access_pointer(from->rt6i_exception_bucket))
return -ENOENT;
bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
lockdep_is_held(&rt6_exception_lock));
#ifdef CONFIG_IPV6_SUBTREES
- /* rt6i_src.plen != 0 indicates 'from' is in subtree
- * and exception table is indexed by a hash of
- * both rt6i_dst and rt6i_src.
+ /* plen != 0 indicates 'from' is in subtree and exception
+ * table is indexed by a hash of both rt6i_dst and rt6i_src.
* Otherwise, the exception table is indexed by
* a hash of only rt6i_dst.
*/
- if (from->fib6_src.plen)
+ if (plen)
src_key = &rt->rt6i_src.addr;
#endif
rt6_ex = __rt6_find_exception_spinlock(&bucket,
return err;
}
+static int rt6_remove_exception_rt(struct rt6_info *rt)
+{
+ struct fib6_info *from;
+
+ from = rcu_dereference(rt->from);
+ if (!from ||
+ !(rt->rt6i_flags & RTF_CACHE))
+ return -EINVAL;
+
+ return fib6_nh_remove_exception(from, from->fib6_src.plen, rt);
+}
+
/* Find rt6_ex which contains the passed in rt cache and
* refresh its stamp
*/
-static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
+static void fib6_nh_update_exception(const struct fib6_info *from, int plen,
+ const struct rt6_info *rt)
{
+ const struct in6_addr *src_key = NULL;
struct rt6_exception_bucket *bucket;
- struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
- struct fib6_info *from;
-
- rcu_read_lock();
- from = rcu_dereference(rt->from);
- if (!from || !(rt->rt6i_flags & RTF_CACHE))
- goto unlock;
bucket = rcu_dereference(from->rt6i_exception_bucket);
#ifdef CONFIG_IPV6_SUBTREES
- /* rt6i_src.plen != 0 indicates 'from' is in subtree
- * and exception table is indexed by a hash of
- * both rt6i_dst and rt6i_src.
+ /* plen != 0 indicates 'from' is in subtree and exception
+ * table is indexed by a hash of both rt6i_dst and rt6i_src.
* Otherwise, the exception table is indexed by
* a hash of only rt6i_dst.
*/
- if (from->fib6_src.plen)
+ if (plen)
src_key = &rt->rt6i_src.addr;
#endif
rt6_ex = __rt6_find_exception_rcu(&bucket,
src_key);
if (rt6_ex)
rt6_ex->stamp = jiffies;
+}
+
+static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
+{
+ struct fib6_info *from;
+ rcu_read_lock();
+
+ from = rcu_dereference(rt->from);
+ if (!from || !(rt->rt6i_flags & RTF_CACHE))
+ goto unlock;
+
+ fib6_nh_update_exception(from, from->fib6_src.plen, rt);
unlock:
rcu_read_unlock();
}
gc_args->more++;
}
-void rt6_age_exceptions(struct fib6_info *rt,
- struct fib6_gc_args *gc_args,
- unsigned long now)
+static void fib6_nh_age_exceptions(struct fib6_info *rt,
+ struct fib6_gc_args *gc_args,
+ unsigned long now)
{
struct rt6_exception_bucket *bucket;
struct rt6_exception *rt6_ex;
rcu_read_unlock_bh();
}
+void rt6_age_exceptions(struct fib6_info *rt,
+ struct fib6_gc_args *gc_args,
+ unsigned long now)
+{
+ fib6_nh_age_exceptions(rt, gc_args, now);
+}
+
/* must be called with rcu lock held */
int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif,
struct flowi6 *fl6, struct fib6_result *res, int strict)
const struct in6_addr *daddr,
const struct in6_addr *saddr)
{
- struct rt6_exception_bucket *bucket;
const struct fib6_nh *nh = res->nh;
struct fib6_info *f6i = res->f6i;
- const struct in6_addr *src_key;
- struct rt6_exception *rt6_ex;
struct inet6_dev *idev;
+ struct rt6_info *rt;
u32 mtu = 0;
if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
goto out;
}
- src_key = NULL;
-#ifdef CONFIG_IPV6_SUBTREES
- if (f6i->fib6_src.plen)
- src_key = saddr;
-#endif
-
- bucket = rcu_dereference(f6i->rt6i_exception_bucket);
- rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
- if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
- mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
-
- if (likely(!mtu)) {
+ rt = rt6_find_cached_rt(res, daddr, saddr);
+ if (unlikely(rt)) {
+ mtu = dst_metric_raw(&rt->dst, RTAX_MTU);
+ } else {
struct net_device *dev = nh->fib_nh_dev;
mtu = IPV6_MIN_MTU;
!netif_carrier_ok(dev))
fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
+ fib6_nh->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags);
+ if (!fib6_nh->rt6i_pcpu) {
+ err = -ENOMEM;
+ goto out;
+ }
+
err = fib_nh_common_init(&fib6_nh->nh_common, cfg->fc_encap,
cfg->fc_encap_type, cfg, gfp_flags, extack);
if (err)
void fib6_nh_release(struct fib6_nh *fib6_nh)
{
+ if (fib6_nh->rt6i_pcpu) {
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct rt6_info **ppcpu_rt;
+ struct rt6_info *pcpu_rt;
+
+ ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
+ pcpu_rt = *ppcpu_rt;
+ if (pcpu_rt) {
+ dst_dev_put(&pcpu_rt->dst);
+ dst_release(&pcpu_rt->dst);
+ *ppcpu_rt = NULL;
+ }
+ }
+
+ free_percpu(fib6_nh->rt6i_pcpu);
+ }
+
fib_nh_common_release(&fib6_nh->nh_common);
}
struct rt6_mtu_change_arg {
struct net_device *dev;
unsigned int mtu;
+ struct fib6_info *f6i;
};
-static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
+static int fib6_nh_mtu_change(struct fib6_info *f6i, void *_arg)
+{
+ struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *)_arg;
+ struct fib6_nh *nh = &f6i->fib6_nh;
+
+ /* For administrative MTU increase, there is no way to discover
+ * IPv6 PMTU increase, so PMTU increase should be updated here.
+ * Since RFC 1981 doesn't include administrative MTU increase
+ * update PMTU increase is a MUST. (i.e. jumbo frame)
+ */
+ if (nh->fib_nh_dev == arg->dev) {
+ struct inet6_dev *idev = __in6_dev_get(arg->dev);
+ u32 mtu = f6i->fib6_pmtu;
+
+ if (mtu >= arg->mtu ||
+ (mtu < arg->mtu && mtu == idev->cnf.mtu6))
+ fib6_metric_set(f6i, RTAX_MTU, arg->mtu);
+
+ spin_lock_bh(&rt6_exception_lock);
+ rt6_exceptions_update_pmtu(idev, f6i, arg->mtu);
+ spin_unlock_bh(&rt6_exception_lock);
+ }
+
+ return 0;
+}
+
+static int rt6_mtu_change_route(struct fib6_info *f6i, void *p_arg)
{
struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
struct inet6_dev *idev;
if (!idev)
return 0;
- /* For administrative MTU increase, there is no way to discover
- IPv6 PMTU increase, so PMTU increase should be updated here.
- Since RFC 1981 doesn't include administrative MTU increase
- update PMTU increase is a MUST. (i.e. jumbo frame)
- */
- if (rt->fib6_nh.fib_nh_dev == arg->dev &&
- !fib6_metric_locked(rt, RTAX_MTU)) {
- u32 mtu = rt->fib6_pmtu;
-
- if (mtu >= arg->mtu ||
- (mtu < arg->mtu && mtu == idev->cnf.mtu6))
- fib6_metric_set(rt, RTAX_MTU, arg->mtu);
+ if (fib6_metric_locked(f6i, RTAX_MTU))
+ return 0;
- spin_lock_bh(&rt6_exception_lock);
- rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
- spin_unlock_bh(&rt6_exception_lock);
- }
- return 0;
+ arg->f6i = f6i;
+ return fib6_nh_mtu_change(f6i, arg);
}
void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
}
static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
+ [RTA_UNSPEC] = { .strict_start_type = RTA_DPORT + 1 },
[RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
[RTA_PREFSRC] = { .len = sizeof(struct in6_addr) },
[RTA_OIF] = { .type = NLA_U32 },
rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
}
+void fib6_rt_update(struct net *net, struct fib6_info *rt,
+ struct nl_info *info)
+{
+ u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
+ struct sk_buff *skb;
+ int err = -ENOBUFS;
+
+ /* call_fib6_entry_notifiers will be removed when in-kernel notifier
+ * is implemented and supported for nexthop objects
+ */
+ call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, rt, NULL);
+
+ skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
+ if (!skb)
+ goto errout;
+
+ err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
+ RTM_NEWROUTE, info->portid, seq, NLM_F_REPLACE);
+ if (err < 0) {
+ /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(skb);
+ goto errout;
+ }
+ rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
+ info->nlh, gfp_any());
+ return;
+errout:
+ if (err < 0)
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
+}
+
static int ip6_route_dev_notify(struct notifier_block *this,
unsigned long event, void *ptr)
{