Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

[linux-2.6-microblaze.git] / include / net / sock.h
diff --git a/include/net/sock.h b/include/net/sock.h

index 72ca97c..a7273b2 100644 (file)
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -609,7 +609,7 @@ void sock_net_set(struct sock *sk, struct net *net)
  
  int sk_set_peek_off(struct sock *sk, int val);
  
-static inline int sk_peek_offset(struct sock *sk, int flags)
+static inline int sk_peek_offset(const struct sock *sk, int flags)
  {
         if (unlikely(flags & MSG_PEEK)) {
                 return READ_ONCE(sk->sk_peek_off);
@@ -849,7 +849,7 @@ static inline void sk_add_bind_node(struct sock *sk,
                 ({ tpos = (typeof(*tpos) *)((void *)pos - offset); 1;});       \
              pos = rcu_dereference(hlist_next_rcu(pos)))
  
-static inline struct user_namespace *sk_user_ns(struct sock *sk)
+static inline struct user_namespace *sk_user_ns(const struct sock *sk)
  {
         /* Careful only use this in a context where these parameters
          * can not change and must all be valid, such as recvmsg from
@@ -895,7 +895,7 @@ enum sock_flags {
  
  #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
  
-static inline void sock_copy_flags(struct sock *nsk, struct sock *osk)
+static inline void sock_copy_flags(struct sock *nsk, const struct sock *osk)
  {
         nsk->sk_flags = osk->sk_flags;
  }
@@ -1240,6 +1240,7 @@ struct proto {
         void                    (*enter_memory_pressure)(struct sock *sk);
         void                    (*leave_memory_pressure)(struct sock *sk);
         atomic_long_t           *memory_allocated;      /* Current allocated memory. */
+       int  __percpu           *per_cpu_fw_alloc;
         struct percpu_counter   *sockets_allocated;     /* Current number of sockets. */
  
         /*
@@ -1383,21 +1384,46 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
  }
  
  static inline long
-sk_memory_allocated(const struct sock *sk)
+proto_memory_allocated(const struct proto *prot)
  {
-       return atomic_long_read(sk->sk_prot->memory_allocated);
+       return max(0L, atomic_long_read(prot->memory_allocated));
  }
  
  static inline long
+sk_memory_allocated(const struct sock *sk)
+{
+       return proto_memory_allocated(sk->sk_prot);
+}
+
+/* 1 MB per cpu, in page units */
+#define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT))
+
+static inline void
  sk_memory_allocated_add(struct sock *sk, int amt)
  {
-       return atomic_long_add_return(amt, sk->sk_prot->memory_allocated);
+       int local_reserve;
+
+       preempt_disable();
+       local_reserve = __this_cpu_add_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
+       if (local_reserve >= SK_MEMORY_PCPU_RESERVE) {
+               __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
+               atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
+       }
+       preempt_enable();
  }
  
  static inline void
  sk_memory_allocated_sub(struct sock *sk, int amt)
  {
-       atomic_long_sub(amt, sk->sk_prot->memory_allocated);
+       int local_reserve;
+
+       preempt_disable();
+       local_reserve = __this_cpu_sub_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
+       if (local_reserve <= -SK_MEMORY_PCPU_RESERVE) {
+               __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
+               atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
+       }
+       preempt_enable();
  }
  
  #define SK_ALLOC_PERCPU_COUNTER_BATCH 16
@@ -1426,12 +1452,6 @@ proto_sockets_allocated_sum_positive(struct proto *prot)
         return percpu_counter_sum_positive(prot->sockets_allocated);
  }
  
-static inline long
-proto_memory_allocated(struct proto *prot)
-{
-       return atomic_long_read(prot->memory_allocated);
-}
-
  static inline bool
  proto_memory_pressure(struct proto *prot)
  {
@@ -1518,30 +1538,18 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind);
  void __sk_mem_reduce_allocated(struct sock *sk, int amount);
  void __sk_mem_reclaim(struct sock *sk, int amount);
  
-/* We used to have PAGE_SIZE here, but systems with 64KB pages
- * do not necessarily have 16x time more memory than 4KB ones.
- */
-#define SK_MEM_QUANTUM 4096
-#define SK_MEM_QUANTUM_SHIFT ilog2(SK_MEM_QUANTUM)
  #define SK_MEM_SEND    0
  #define SK_MEM_RECV    1
  
-/* sysctl_mem values are in pages, we convert them in SK_MEM_QUANTUM units */
+/* sysctl_mem values are in pages */
  static inline long sk_prot_mem_limits(const struct sock *sk, int index)
  {
-       long val = sk->sk_prot->sysctl_mem[index];
-
-#if PAGE_SIZE > SK_MEM_QUANTUM
-       val <<= PAGE_SHIFT - SK_MEM_QUANTUM_SHIFT;
-#elif PAGE_SIZE < SK_MEM_QUANTUM
-       val >>= SK_MEM_QUANTUM_SHIFT - PAGE_SHIFT;
-#endif
-       return val;
+       return READ_ONCE(sk->sk_prot->sysctl_mem[index]);
  }
  
  static inline int sk_mem_pages(int amt)
  {
-       return (amt + SK_MEM_QUANTUM - 1) >> SK_MEM_QUANTUM_SHIFT;
+       return (amt + PAGE_SIZE - 1) >> PAGE_SHIFT;
  }
  
  static inline bool sk_has_account(struct sock *sk)
@@ -1552,19 +1560,23 @@ static inline bool sk_has_account(struct sock *sk)
  
  static inline bool sk_wmem_schedule(struct sock *sk, int size)
  {
+       int delta;
+
         if (!sk_has_account(sk))
                 return true;
-       return size <= sk->sk_forward_alloc ||
-               __sk_mem_schedule(sk, size, SK_MEM_SEND);
+       delta = size - sk->sk_forward_alloc;
+       return delta <= 0 || __sk_mem_schedule(sk, delta, SK_MEM_SEND);
  }
  
  static inline bool
  sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size)
  {
+       int delta;
+
         if (!sk_has_account(sk))
                 return true;
-       return size <= sk->sk_forward_alloc ||
-               __sk_mem_schedule(sk, size, SK_MEM_RECV) ||
+       delta = size - sk->sk_forward_alloc;
+       return delta <= 0 || __sk_mem_schedule(sk, delta, SK_MEM_RECV) ||
                 skb_pfmemalloc(skb);
  }
  
@@ -1590,7 +1602,7 @@ static inline void sk_mem_reclaim(struct sock *sk)
  
         reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk);
  
-       if (reclaimable >= SK_MEM_QUANTUM)
+       if (reclaimable >= (int)PAGE_SIZE)
                 __sk_mem_reclaim(sk, reclaimable);
  }
  
@@ -1600,19 +1612,6 @@ static inline void sk_mem_reclaim_final(struct sock *sk)
         sk_mem_reclaim(sk);
  }
  
-static inline void sk_mem_reclaim_partial(struct sock *sk)
-{
-       int reclaimable;
-
-       if (!sk_has_account(sk))
-               return;
-
-       reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk);
-
-       if (reclaimable > SK_MEM_QUANTUM)
-               __sk_mem_reclaim(sk, reclaimable - 1);
-}
-
  static inline void sk_mem_charge(struct sock *sk, int size)
  {
         if (!sk_has_account(sk))
@@ -1620,29 +1619,12 @@ static inline void sk_mem_charge(struct sock *sk, int size)
         sk->sk_forward_alloc -= size;
  }
  
-/* the following macros control memory reclaiming in sk_mem_uncharge()
- */
-#define SK_RECLAIM_THRESHOLD   (1 << 21)
-#define SK_RECLAIM_CHUNK       (1 << 20)
-
  static inline void sk_mem_uncharge(struct sock *sk, int size)
  {
-       int reclaimable;
-
         if (!sk_has_account(sk))
                 return;
         sk->sk_forward_alloc += size;
-       reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk);
-
-       /* Avoid a possible overflow.
-        * TCP send queues can make this happen, if sk_mem_reclaim()
-        * is not called and more than 2 GBytes are released at once.
-        *
-        * If we reach 2 MBytes, reclaim 1 MBytes right now, there is
-        * no need to hold that much forward allocation anyway.
-        */
-       if (unlikely(reclaimable >= SK_RECLAIM_THRESHOLD))
-               __sk_mem_reclaim(sk, SK_RECLAIM_CHUNK);
+       sk_mem_reclaim(sk);
  }
  
  /*
@@ -2232,9 +2214,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro
         if (err)
                 return err;
  
-       skb->len             += copy;
-       skb->data_len        += copy;
-       skb->truesize        += copy;
+       skb_len_add(skb, copy);
         sk_wmem_queued_add(sk, copy);
         sk_mem_charge(sk, copy);
         return 0;
@@ -2843,18 +2823,18 @@ static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto)
  {
         /* Does this proto have per netns sysctl_wmem ? */
         if (proto->sysctl_wmem_offset)
-               return *(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset);
+               return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset));
  
-       return *proto->sysctl_wmem;
+       return READ_ONCE(*proto->sysctl_wmem);
  }
  
  static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto)
  {
         /* Does this proto have per netns sysctl_rmem ? */
         if (proto->sysctl_rmem_offset)
-               return *(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset);
+               return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset));
  
-       return *proto->sysctl_rmem;
+       return READ_ONCE(*proto->sysctl_rmem);
  }
  
  /* Default TCP Small queue budget is ~1 ms of data (1sec >> 10)