}
 EXPORT_SYMBOL(gro_cells_init);
 
+struct percpu_free_defer {
+       struct rcu_head rcu;
+       void __percpu   *ptr;
+};
+
+static void percpu_free_defer_callback(struct rcu_head *head)
+{
+       struct percpu_free_defer *defer;
+
+       defer = container_of(head, struct percpu_free_defer, rcu);
+       free_percpu(defer->ptr);
+       kfree(defer);
+}
+
 void gro_cells_destroy(struct gro_cells *gcells)
 {
+       struct percpu_free_defer *defer;
        int i;
 
        if (!gcells->cells)
                __netif_napi_del(&cell->napi);
                __skb_queue_purge(&cell->napi_skbs);
        }
-       /* This barrier is needed because netpoll could access dev->napi_list
-        * under rcu protection.
+       /* We need to observe an rcu grace period before freeing ->cells,
+        * because netpoll could access dev->napi_list under rcu protection.
+        * Try hard using call_rcu() instead of synchronize_rcu(),
+        * because we might be called from cleanup_net(), and we
+        * definitely do not want to block this critical task.
         */
-       synchronize_net();
-
-       free_percpu(gcells->cells);
+       defer = kmalloc(sizeof(*defer), GFP_KERNEL | __GFP_NOWARN);
+       if (likely(defer)) {
+               defer->ptr = gcells->cells;
+               call_rcu(&defer->rcu, percpu_free_defer_callback);
+       } else {
+               /* We do not hold RTNL at this point, synchronize_net()
+                * would not be able to expedite this sync.
+                */
+               synchronize_rcu_expedited();
+               free_percpu(gcells->cells);
+       }
        gcells->cells = NULL;
 }
 EXPORT_SYMBOL(gro_cells_destroy);