X-Git-Url: http://git.monstr.eu/?a=blobdiff_plain;f=include%2Fnet%2Fsock.h;h=a7273b28918846233d93665f0fcd4b0a18b90d23;hb=272ac32f566e3f925b20c231a2b30f6893aa258a;hp=9fa54762e07752313a181fec44df21794cdfc982;hpb=d945404f74f34b76cb02d73025b92ce8b4729d3f;p=linux-2.6-microblaze.git diff --git a/include/net/sock.h b/include/net/sock.h index 9fa54762e077..a7273b289188 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -609,7 +609,7 @@ void sock_net_set(struct sock *sk, struct net *net) int sk_set_peek_off(struct sock *sk, int val); -static inline int sk_peek_offset(struct sock *sk, int flags) +static inline int sk_peek_offset(const struct sock *sk, int flags) { if (unlikely(flags & MSG_PEEK)) { return READ_ONCE(sk->sk_peek_off); @@ -849,7 +849,7 @@ static inline void sk_add_bind_node(struct sock *sk, ({ tpos = (typeof(*tpos) *)((void *)pos - offset); 1;}); \ pos = rcu_dereference(hlist_next_rcu(pos))) -static inline struct user_namespace *sk_user_ns(struct sock *sk) +static inline struct user_namespace *sk_user_ns(const struct sock *sk) { /* Careful only use this in a context where these parameters * can not change and must all be valid, such as recvmsg from @@ -895,7 +895,7 @@ enum sock_flags { #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) -static inline void sock_copy_flags(struct sock *nsk, struct sock *osk) +static inline void sock_copy_flags(struct sock *nsk, const struct sock *osk) { nsk->sk_flags = osk->sk_flags; } @@ -1240,6 +1240,7 @@ struct proto { void (*enter_memory_pressure)(struct sock *sk); void (*leave_memory_pressure)(struct sock *sk); atomic_long_t *memory_allocated; /* Current allocated memory. */ + int __percpu *per_cpu_fw_alloc; struct percpu_counter *sockets_allocated; /* Current number of sockets. */ /* @@ -1383,21 +1384,46 @@ static inline bool sk_under_memory_pressure(const struct sock *sk) } static inline long -sk_memory_allocated(const struct sock *sk) +proto_memory_allocated(const struct proto *prot) { - return atomic_long_read(sk->sk_prot->memory_allocated); + return max(0L, atomic_long_read(prot->memory_allocated)); } static inline long +sk_memory_allocated(const struct sock *sk) +{ + return proto_memory_allocated(sk->sk_prot); +} + +/* 1 MB per cpu, in page units */ +#define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT)) + +static inline void sk_memory_allocated_add(struct sock *sk, int amt) { - return atomic_long_add_return(amt, sk->sk_prot->memory_allocated); + int local_reserve; + + preempt_disable(); + local_reserve = __this_cpu_add_return(*sk->sk_prot->per_cpu_fw_alloc, amt); + if (local_reserve >= SK_MEMORY_PCPU_RESERVE) { + __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve); + atomic_long_add(local_reserve, sk->sk_prot->memory_allocated); + } + preempt_enable(); } static inline void sk_memory_allocated_sub(struct sock *sk, int amt) { - atomic_long_sub(amt, sk->sk_prot->memory_allocated); + int local_reserve; + + preempt_disable(); + local_reserve = __this_cpu_sub_return(*sk->sk_prot->per_cpu_fw_alloc, amt); + if (local_reserve <= -SK_MEMORY_PCPU_RESERVE) { + __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve); + atomic_long_add(local_reserve, sk->sk_prot->memory_allocated); + } + preempt_enable(); } #define SK_ALLOC_PERCPU_COUNTER_BATCH 16 @@ -1426,12 +1452,6 @@ proto_sockets_allocated_sum_positive(struct proto *prot) return percpu_counter_sum_positive(prot->sockets_allocated); } -static inline long -proto_memory_allocated(struct proto *prot) -{ - return atomic_long_read(prot->memory_allocated); -} - static inline bool proto_memory_pressure(struct proto *prot) { @@ -1518,30 +1538,18 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind); void __sk_mem_reduce_allocated(struct sock *sk, int amount); void __sk_mem_reclaim(struct sock *sk, int amount); -/* We used to have PAGE_SIZE here, but systems with 64KB pages - * do not necessarily have 16x time more memory than 4KB ones. - */ -#define SK_MEM_QUANTUM 4096 -#define SK_MEM_QUANTUM_SHIFT ilog2(SK_MEM_QUANTUM) #define SK_MEM_SEND 0 #define SK_MEM_RECV 1 -/* sysctl_mem values are in pages, we convert them in SK_MEM_QUANTUM units */ +/* sysctl_mem values are in pages */ static inline long sk_prot_mem_limits(const struct sock *sk, int index) { - long val = READ_ONCE(sk->sk_prot->sysctl_mem[index]); - -#if PAGE_SIZE > SK_MEM_QUANTUM - val <<= PAGE_SHIFT - SK_MEM_QUANTUM_SHIFT; -#elif PAGE_SIZE < SK_MEM_QUANTUM - val >>= SK_MEM_QUANTUM_SHIFT - PAGE_SHIFT; -#endif - return val; + return READ_ONCE(sk->sk_prot->sysctl_mem[index]); } static inline int sk_mem_pages(int amt) { - return (amt + SK_MEM_QUANTUM - 1) >> SK_MEM_QUANTUM_SHIFT; + return (amt + PAGE_SIZE - 1) >> PAGE_SHIFT; } static inline bool sk_has_account(struct sock *sk) @@ -1552,19 +1560,23 @@ static inline bool sk_has_account(struct sock *sk) static inline bool sk_wmem_schedule(struct sock *sk, int size) { + int delta; + if (!sk_has_account(sk)) return true; - return size <= sk->sk_forward_alloc || - __sk_mem_schedule(sk, size, SK_MEM_SEND); + delta = size - sk->sk_forward_alloc; + return delta <= 0 || __sk_mem_schedule(sk, delta, SK_MEM_SEND); } static inline bool sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size) { + int delta; + if (!sk_has_account(sk)) return true; - return size <= sk->sk_forward_alloc || - __sk_mem_schedule(sk, size, SK_MEM_RECV) || + delta = size - sk->sk_forward_alloc; + return delta <= 0 || __sk_mem_schedule(sk, delta, SK_MEM_RECV) || skb_pfmemalloc(skb); } @@ -1590,7 +1602,7 @@ static inline void sk_mem_reclaim(struct sock *sk) reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk); - if (reclaimable >= SK_MEM_QUANTUM) + if (reclaimable >= (int)PAGE_SIZE) __sk_mem_reclaim(sk, reclaimable); } @@ -1600,19 +1612,6 @@ static inline void sk_mem_reclaim_final(struct sock *sk) sk_mem_reclaim(sk); } -static inline void sk_mem_reclaim_partial(struct sock *sk) -{ - int reclaimable; - - if (!sk_has_account(sk)) - return; - - reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk); - - if (reclaimable > SK_MEM_QUANTUM) - __sk_mem_reclaim(sk, reclaimable - 1); -} - static inline void sk_mem_charge(struct sock *sk, int size) { if (!sk_has_account(sk)) @@ -1620,29 +1619,12 @@ static inline void sk_mem_charge(struct sock *sk, int size) sk->sk_forward_alloc -= size; } -/* the following macros control memory reclaiming in sk_mem_uncharge() - */ -#define SK_RECLAIM_THRESHOLD (1 << 21) -#define SK_RECLAIM_CHUNK (1 << 20) - static inline void sk_mem_uncharge(struct sock *sk, int size) { - int reclaimable; - if (!sk_has_account(sk)) return; sk->sk_forward_alloc += size; - reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk); - - /* Avoid a possible overflow. - * TCP send queues can make this happen, if sk_mem_reclaim() - * is not called and more than 2 GBytes are released at once. - * - * If we reach 2 MBytes, reclaim 1 MBytes right now, there is - * no need to hold that much forward allocation anyway. - */ - if (unlikely(reclaimable >= SK_RECLAIM_THRESHOLD)) - __sk_mem_reclaim(sk, SK_RECLAIM_CHUNK); + sk_mem_reclaim(sk); } /* @@ -2232,9 +2214,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro if (err) return err; - skb->len += copy; - skb->data_len += copy; - skb->truesize += copy; + skb_len_add(skb, copy); sk_wmem_queued_add(sk, copy); sk_mem_charge(sk, copy); return 0; @@ -2843,18 +2823,18 @@ static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto) { /* Does this proto have per netns sysctl_wmem ? */ if (proto->sysctl_wmem_offset) - return *(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset); + return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset)); - return *proto->sysctl_wmem; + return READ_ONCE(*proto->sysctl_wmem); } static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto) { /* Does this proto have per netns sysctl_rmem ? */ if (proto->sysctl_rmem_offset) - return *(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset); + return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset)); - return *proto->sysctl_rmem; + return READ_ONCE(*proto->sysctl_rmem); } /* Default TCP Small queue budget is ~1 ms of data (1sec >> 10)