}
}
+static bool lookup_address_in_vec(struct mptcp_addr_info *addrs, unsigned int nr,
+ struct mptcp_addr_info *addr)
+{
+ int i;
+
+ for (i = 0; i < nr; i++) {
+ if (addresses_equal(&addrs[i], addr, addr->port))
+ return true;
+ }
+
+ return false;
+}
+
+/* Fill all the remote addresses into the array addrs[],
+ * and return the array size.
+ */
+static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullmesh,
+ struct mptcp_addr_info *addrs)
+{
+ struct sock *sk = (struct sock *)msk, *ssk;
+ struct mptcp_subflow_context *subflow;
+ struct mptcp_addr_info remote = { 0 };
+ unsigned int subflows_max;
+ int i = 0;
+
+ subflows_max = mptcp_pm_get_subflows_max(msk);
+
+ /* Non-fullmesh endpoint, fill in the single entry
+ * corresponding to the primary MPC subflow remote address
+ */
+ if (!fullmesh) {
+ remote_address((struct sock_common *)sk, &remote);
+ msk->pm.subflows++;
+ addrs[i++] = remote;
+ } else {
+ mptcp_for_each_subflow(msk, subflow) {
+ ssk = mptcp_subflow_tcp_sock(subflow);
+ remote_address((struct sock_common *)ssk, &remote);
+ if (!lookup_address_in_vec(addrs, i, &remote) &&
+ msk->pm.subflows < subflows_max) {
+ msk->pm.subflows++;
+ addrs[i++] = remote;
+ }
+ }
+ }
+
+ return i;
+}
+
static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
{
struct sock *sk = (struct sock *)msk;
!READ_ONCE(msk->pm.remote_deny_join_id0)) {
local = select_local_address(pernet, msk);
if (local) {
- struct mptcp_addr_info remote = { 0 };
+ bool fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH);
+ struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX];
+ int i, nr;
msk->pm.local_addr_used++;
- msk->pm.subflows++;
check_work_pending(msk);
- remote_address((struct sock_common *)sk, &remote);
+ nr = fill_remote_addresses_vec(msk, fullmesh, addrs);
spin_unlock_bh(&msk->pm.lock);
- __mptcp_subflow_connect(sk, &local->addr, &remote,
- local->flags, local->ifindex);
+ for (i = 0; i < nr; i++)
+ __mptcp_subflow_connect(sk, &local->addr, &addrs[i]);
spin_lock_bh(&msk->pm.lock);
return;
}
mptcp_pm_create_subflow_or_signal_addr(msk);
}
+/* Fill all the local addresses into the array addrs[],
+ * and return the array size.
+ */
+static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
+ struct mptcp_addr_info *addrs)
+{
+ struct sock *sk = (struct sock *)msk;
+ struct mptcp_pm_addr_entry *entry;
+ struct mptcp_addr_info local;
+ struct pm_nl_pernet *pernet;
+ unsigned int subflows_max;
+ int i = 0;
+
+ pernet = net_generic(sock_net(sk), pm_nl_pernet_id);
+ subflows_max = mptcp_pm_get_subflows_max(msk);
+
+ rcu_read_lock();
+ __mptcp_flush_join_list(msk);
+ list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
+ if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH))
+ continue;
+
+ if (entry->addr.family != sk->sk_family) {
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ if ((entry->addr.family == AF_INET &&
+ !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) ||
+ (sk->sk_family == AF_INET &&
+ !ipv6_addr_v4mapped(&entry->addr.addr6)))
+#endif
+ continue;
+ }
+
+ if (msk->pm.subflows < subflows_max) {
+ msk->pm.subflows++;
+ addrs[i++] = entry->addr;
+ }
+ }
+ rcu_read_unlock();
+
+ /* If the array is empty, fill in the single
+ * 'IPADDRANY' local address
+ */
+ if (!i) {
+ memset(&local, 0, sizeof(local));
+ local.family = msk->pm.remote.family;
+
+ msk->pm.subflows++;
+ addrs[i++] = local;
+ }
+
+ return i;
+}
+
static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
{
+ struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX];
struct sock *sk = (struct sock *)msk;
unsigned int add_addr_accept_max;
struct mptcp_addr_info remote;
- struct mptcp_addr_info local;
unsigned int subflows_max;
+ int i, nr;
add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk);
subflows_max = mptcp_pm_get_subflows_max(msk);
if (lookup_subflow_by_daddr(&msk->conn_list, &msk->pm.remote))
goto add_addr_echo;
- msk->pm.add_addr_accepted++;
- msk->pm.subflows++;
- if (msk->pm.add_addr_accepted >= add_addr_accept_max ||
- msk->pm.subflows >= subflows_max)
- WRITE_ONCE(msk->pm.accept_addr, false);
-
/* connect to the specified remote address, using whatever
* local address the routing configuration will pick.
*/
remote = msk->pm.remote;
if (!remote.port)
remote.port = sk->sk_dport;
- memset(&local, 0, sizeof(local));
- local.family = remote.family;
+ nr = fill_local_addresses_vec(msk, addrs);
+
+ msk->pm.add_addr_accepted++;
+ if (msk->pm.add_addr_accepted >= add_addr_accept_max ||
+ msk->pm.subflows >= subflows_max)
+ WRITE_ONCE(msk->pm.accept_addr, false);
spin_unlock_bh(&msk->pm.lock);
- __mptcp_subflow_connect(sk, &local, &remote, 0, 0);
+ for (i = 0; i < nr; i++)
+ __mptcp_subflow_connect(sk, &addrs[i], &remote);
spin_lock_bh(&msk->pm.lock);
add_addr_echo:
if (!tcp_rtx_and_write_queues_empty(ssk)) {
subflow->stale = 1;
__mptcp_retransmit_pending_data(sk);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_SUBFLOWSTALE);
}
unlock_sock_fast(ssk, slow);
return NULL;
}
+int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id,
+ u8 *flags, int *ifindex)
+{
+ struct mptcp_pm_addr_entry *entry;
+
+ *flags = 0;
+ *ifindex = 0;
+
+ if (id) {
+ rcu_read_lock();
+ entry = __lookup_addr_by_id(net_generic(net, pm_nl_pernet_id), id);
+ if (entry) {
+ *flags = entry->flags;
+ *ifindex = entry->ifindex;
+ }
+ rcu_read_unlock();
+ }
+
+ return 0;
+}
+
static bool remove_anno_list_by_saddr(struct mptcp_sock *msk,
struct mptcp_addr_info *addr)
{
return 0;
}
-struct addr_entry_release_work {
- struct rcu_work rwork;
- struct mptcp_pm_addr_entry *entry;
-};
-
-static void mptcp_pm_release_addr_entry(struct work_struct *work)
-{
- struct addr_entry_release_work *w;
- struct mptcp_pm_addr_entry *entry;
-
- w = container_of(to_rcu_work(work), struct addr_entry_release_work, rwork);
- entry = w->entry;
- if (entry) {
- if (entry->lsk)
- sock_release(entry->lsk);
- kfree(entry);
- }
- kfree(w);
-}
-
-static void mptcp_pm_free_addr_entry(struct mptcp_pm_addr_entry *entry)
+/* caller must ensure the RCU grace period is already elapsed */
+static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry)
{
- struct addr_entry_release_work *w;
-
- w = kmalloc(sizeof(*w), GFP_ATOMIC);
- if (w) {
- INIT_RCU_WORK(&w->rwork, mptcp_pm_release_addr_entry);
- w->entry = entry;
- queue_rcu_work(system_wq, &w->rwork);
- }
+ if (entry->lsk)
+ sock_release(entry->lsk);
+ kfree(entry);
}
static int mptcp_nl_remove_id_zero_address(struct net *net,
spin_unlock_bh(&pernet->lock);
mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), &entry->addr);
- mptcp_pm_free_addr_entry(entry);
+ synchronize_rcu();
+ __mptcp_pm_release_addr_entry(entry);
return ret;
}
}
}
+/* caller must ensure the RCU grace period is already elapsed */
static void __flush_addrs(struct list_head *list)
{
while (!list_empty(list)) {
cur = list_entry(list->next,
struct mptcp_pm_addr_entry, list);
list_del_rcu(&cur->list);
- mptcp_pm_free_addr_entry(cur);
+ __mptcp_pm_release_addr_entry(cur);
}
}
bitmap_zero(pernet->id_bitmap, MAX_ADDR_ID + 1);
spin_unlock_bh(&pernet->lock);
mptcp_nl_remove_addrs_list(sock_net(skb->sk), &free_list);
+ synchronize_rcu();
__flush_addrs(&free_list);
return 0;
}
struct pm_nl_pernet *pernet = net_generic(net, pm_nl_pernet_id);
/* net is removed from namespace list, can't race with
- * other modifiers
+ * other modifiers, also netns core already waited for a
+ * RCU grace period.
*/
__flush_addrs(&pernet->local_addr_list);
}