Merge tag 'drm-fixes-2022-07-22' of git://anongit.freedesktop.org/drm/drm
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 22 Jul 2022 19:03:19 +0000 (12:03 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 22 Jul 2022 19:03:19 +0000 (12:03 -0700)
Pull drm fixes from Dave Airlie:
 "Fixes for this week.

  The main one is the i915 firmware fix for the phoronix reported issue.
  I've written some firmware guidelines as a result, should land in
  -next soon. Otherwise a few amdgpu fixes, a scheduler fix, ttm fix and
  two other minor ones.

  scheduler:
   - scheduling while atomic fix

  ttm:
   - locking fix

  edp:
   - variable typo fix

  i915:
   - add back support for v69 firmware on ADL-P

  amdgpu:
   - Drop redundant buffer cleanup that can lead to a segfault
   - Add a bo_list mutex to avoid possible list corruption in CS
   - dmub notification fix

  imx:
   - fix error path"

* tag 'drm-fixes-2022-07-22' of git://anongit.freedesktop.org/drm/drm:
  drm/amdgpu: Protect the amdgpu_bo_list list with a mutex v2
  drm/imx/dcss: Add missing of_node_put() in fail path
  drm/i915/guc: support v69 in parallel to v70
  drm/i915/guc: Support programming the EU priority in the GuC descriptor
  drm/panel-edp: Fix variable typo when saving hpd absent delay from DT
  drm/amdgpu: Remove one duplicated ef removal
  drm/ttm: fix locking in vmap/vunmap TTM GEM helpers
  drm/scheduler: Don't kill jobs in interrupt context
  drm/amd/display: Fix new dmub notification enabling in DM

118 files changed:
Documentation/admin-guide/kernel-parameters.txt
Documentation/networking/dsa/dsa.rst
Documentation/networking/ip-sysctl.rst
arch/Kconfig
arch/csky/include/asm/tlb.h
arch/loongarch/Kconfig
arch/loongarch/include/asm/tlb.h
arch/powerpc/Kconfig
arch/powerpc/include/asm/tlb.h
arch/s390/Kconfig
arch/s390/include/asm/tlb.h
arch/sparc/Kconfig
arch/sparc/include/asm/tlb_64.h
arch/x86/Kconfig
arch/x86/include/asm/tlb.h
drivers/gpio/gpiolib-cdev.c
drivers/infiniband/hw/irdma/cm.c
drivers/infiniband/hw/irdma/i40iw_hw.c
drivers/infiniband/hw/irdma/icrdma_hw.c
drivers/infiniband/hw/irdma/irdma.h
drivers/infiniband/hw/irdma/verbs.c
drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
drivers/net/amt.c
drivers/net/can/rcar/rcar_canfd.c
drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
drivers/net/dsa/microchip/ksz_common.c
drivers/net/dsa/sja1105/sja1105_main.c
drivers/net/dsa/vitesse-vsc73xx-spi.c
drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
drivers/net/ethernet/emulex/benet/be_cmds.c
drivers/net/ethernet/emulex/benet/be_cmds.h
drivers/net/ethernet/emulex/benet/be_ethtool.c
drivers/net/ethernet/intel/e1000e/hw.h
drivers/net/ethernet/intel/e1000e/ich8lan.c
drivers/net/ethernet/intel/e1000e/ich8lan.h
drivers/net/ethernet/intel/e1000e/netdev.c
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/intel/iavf/iavf.h
drivers/net/ethernet/intel/iavf/iavf_ethtool.c
drivers/net/ethernet/intel/iavf/iavf_main.c
drivers/net/ethernet/intel/iavf/iavf_txrx.c
drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
drivers/net/ethernet/intel/igc/igc_main.c
drivers/net/ethernet/intel/igc/igc_regs.h
drivers/net/ethernet/intel/ixgbe/ixgbe.h
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
drivers/net/ethernet/marvell/prestera/prestera_flower.c
drivers/net/ethernet/mediatek/mtk_ppe_offload.c
drivers/net/ethernet/mediatek/mtk_wed.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
drivers/net/ethernet/microchip/lan966x/lan966x_mac.c
drivers/net/ethernet/netronome/nfp/flower/action.c
drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
drivers/net/ethernet/stmicro/stmmac/dwmac4.h
drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
drivers/net/ethernet/stmicro/stmmac/stmmac.h
drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
drivers/net/usb/r8152.c
include/asm-generic/tlb.h
include/linux/stmmac.h
include/net/amt.h
include/net/inet_hashtables.h
include/net/inet_sock.h
include/net/ip.h
include/net/protocol.h
include/net/route.h
include/net/tcp.h
include/net/udp.h
kernel/rcu/srcutree.c
kernel/watch_queue.c
net/core/filter.c
net/core/secure_seq.c
net/core/sock_reuseport.c
net/dsa/port.c
net/ipv4/af_inet.c
net/ipv4/ah4.c
net/ipv4/esp4.c
net/ipv4/fib_semantics.c
net/ipv4/icmp.c
net/ipv4/igmp.c
net/ipv4/inet_connection_sock.c
net/ipv4/ip_forward.c
net/ipv4/ip_input.c
net/ipv4/ip_sockglue.c
net/ipv4/netfilter/nf_reject_ipv4.c
net/ipv4/proc.c
net/ipv4/route.c
net/ipv4/syncookies.c
net/ipv4/sysctl_net_ipv4.c
net/ipv4/tcp.c
net/ipv4/tcp_fastopen.c
net/ipv4/tcp_input.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_metrics.c
net/ipv4/tcp_minisocks.c
net/ipv4/tcp_output.c
net/ipv4/tcp_recovery.c
net/ipv4/tcp_timer.c
net/ipv6/af_inet6.c
net/ipv6/ip6_input.c
net/ipv6/syncookies.c
net/ipv6/tcp_ipv6.c
net/ipv6/udp.c
net/netfilter/nf_synproxy_core.c
net/sched/cls_api.c
net/sctp/protocol.c
net/smc/smc_llc.c
net/tls/tls_device.c
net/xfrm/xfrm_policy.c
net/xfrm/xfrm_state.c
scripts/gdb/linux/symbols.py
security/integrity/ima/ima_policy.c

index f2d26cb..c0fdb04 100644 (file)
                        expediting.  Set to zero to disable automatic
                        expediting.
 
+       srcutree.srcu_max_nodelay [KNL]
+                       Specifies the number of no-delay instances
+                       per jiffy for which the SRCU grace period
+                       worker thread will be rescheduled with zero
+                       delay. Beyond this limit, worker thread will
+                       be rescheduled with a sleep delay of one jiffy.
+
+       srcutree.srcu_max_nodelay_phase [KNL]
+                       Specifies the per-grace-period phase, number of
+                       non-sleeping polls of readers. Beyond this limit,
+                       grace period worker thread will be rescheduled
+                       with a sleep delay of one jiffy, between each
+                       rescan of the readers, for a grace period phase.
+
+       srcutree.srcu_retry_check_delay [KNL]
+                       Specifies number of microseconds of non-sleeping
+                       delay between each non-sleeping poll of readers.
+
        srcutree.small_contention_lim [KNL]
                        Specifies the number of update-side contention
                        events per jiffy will be tolerated before
index ed7fa76..d742ba6 100644 (file)
@@ -503,26 +503,108 @@ per-port PHY specific details: interface connection, MDIO bus location, etc.
 Driver development
 ==================
 
-DSA switch drivers need to implement a dsa_switch_ops structure which will
+DSA switch drivers need to implement a ``dsa_switch_ops`` structure which will
 contain the various members described below.
 
-``register_switch_driver()`` registers this dsa_switch_ops in its internal list
-of drivers to probe for. ``unregister_switch_driver()`` does the exact opposite.
+Probing, registration and device lifetime
+-----------------------------------------
 
-Unless requested differently by setting the priv_size member accordingly, DSA
-does not allocate any driver private context space.
+DSA switches are regular ``device`` structures on buses (be they platform, SPI,
+I2C, MDIO or otherwise). The DSA framework is not involved in their probing
+with the device core.
+
+Switch registration from the perspective of a driver means passing a valid
+``struct dsa_switch`` pointer to ``dsa_register_switch()``, usually from the
+switch driver's probing function. The following members must be valid in the
+provided structure:
+
+- ``ds->dev``: will be used to parse the switch's OF node or platform data.
+
+- ``ds->num_ports``: will be used to create the port list for this switch, and
+  to validate the port indices provided in the OF node.
+
+- ``ds->ops``: a pointer to the ``dsa_switch_ops`` structure holding the DSA
+  method implementations.
+
+- ``ds->priv``: backpointer to a driver-private data structure which can be
+  retrieved in all further DSA method callbacks.
+
+In addition, the following flags in the ``dsa_switch`` structure may optionally
+be configured to obtain driver-specific behavior from the DSA core. Their
+behavior when set is documented through comments in ``include/net/dsa.h``.
+
+- ``ds->vlan_filtering_is_global``
+
+- ``ds->needs_standalone_vlan_filtering``
+
+- ``ds->configure_vlan_while_not_filtering``
+
+- ``ds->untag_bridge_pvid``
+
+- ``ds->assisted_learning_on_cpu_port``
+
+- ``ds->mtu_enforcement_ingress``
+
+- ``ds->fdb_isolation``
+
+Internally, DSA keeps an array of switch trees (group of switches) global to
+the kernel, and attaches a ``dsa_switch`` structure to a tree on registration.
+The tree ID to which the switch is attached is determined by the first u32
+number of the ``dsa,member`` property of the switch's OF node (0 if missing).
+The switch ID within the tree is determined by the second u32 number of the
+same OF property (0 if missing). Registering multiple switches with the same
+switch ID and tree ID is illegal and will cause an error. Using platform data,
+a single switch and a single switch tree is permitted.
+
+In case of a tree with multiple switches, probing takes place asymmetrically.
+The first N-1 callers of ``dsa_register_switch()`` only add their ports to the
+port list of the tree (``dst->ports``), each port having a backpointer to its
+associated switch (``dp->ds``). Then, these switches exit their
+``dsa_register_switch()`` call early, because ``dsa_tree_setup_routing_table()``
+has determined that the tree is not yet complete (not all ports referenced by
+DSA links are present in the tree's port list). The tree becomes complete when
+the last switch calls ``dsa_register_switch()``, and this triggers the effective
+continuation of initialization (including the call to ``ds->ops->setup()``) for
+all switches within that tree, all as part of the calling context of the last
+switch's probe function.
+
+The opposite of registration takes place when calling ``dsa_unregister_switch()``,
+which removes a switch's ports from the port list of the tree. The entire tree
+is torn down when the first switch unregisters.
+
+It is mandatory for DSA switch drivers to implement the ``shutdown()`` callback
+of their respective bus, and call ``dsa_switch_shutdown()`` from it (a minimal
+version of the full teardown performed by ``dsa_unregister_switch()``).
+The reason is that DSA keeps a reference on the master net device, and if the
+driver for the master device decides to unbind on shutdown, DSA's reference
+will block that operation from finalizing.
+
+Either ``dsa_switch_shutdown()`` or ``dsa_unregister_switch()`` must be called,
+but not both, and the device driver model permits the bus' ``remove()`` method
+to be called even if ``shutdown()`` was already called. Therefore, drivers are
+expected to implement a mutual exclusion method between ``remove()`` and
+``shutdown()`` by setting their drvdata to NULL after any of these has run, and
+checking whether the drvdata is NULL before proceeding to take any action.
+
+After ``dsa_switch_shutdown()`` or ``dsa_unregister_switch()`` was called, no
+further callbacks via the provided ``dsa_switch_ops`` may take place, and the
+driver may free the data structures associated with the ``dsa_switch``.
 
 Switch configuration
 --------------------
 
-- ``tag_protocol``: this is to indicate what kind of tagging protocol is supported,
-  should be a valid value from the ``dsa_tag_protocol`` enum
+- ``get_tag_protocol``: this is to indicate what kind of tagging protocol is
+  supported, should be a valid value from the ``dsa_tag_protocol`` enum.
+  The returned information does not have to be static; the driver is passed the
+  CPU port number, as well as the tagging protocol of a possibly stacked
+  upstream switch, in case there are hardware limitations in terms of supported
+  tag formats.
 
-- ``probe``: probe routine which will be invoked by the DSA platform device upon
-  registration to test for the presence/absence of a switch device. For MDIO
-  devices, it is recommended to issue a read towards internal registers using
-  the switch pseudo-PHY and return whether this is a supported device. For other
-  buses, return a non-NULL string
+- ``change_tag_protocol``: when the default tagging protocol has compatibility
+  problems with the master or other issues, the driver may support changing it
+  at runtime, either through a device tree property or through sysfs. In that
+  case, further calls to ``get_tag_protocol`` should report the protocol in
+  current use.
 
 - ``setup``: setup function for the switch, this function is responsible for setting
   up the ``dsa_switch_ops`` private structure with all it needs: register maps,
@@ -535,7 +617,17 @@ Switch configuration
   fully configured and ready to serve any kind of request. It is recommended
   to issue a software reset of the switch during this setup function in order to
   avoid relying on what a previous software agent such as a bootloader/firmware
-  may have previously configured.
+  may have previously configured. The method responsible for undoing any
+  applicable allocations or operations done here is ``teardown``.
+
+- ``port_setup`` and ``port_teardown``: methods for initialization and
+  destruction of per-port data structures. It is mandatory for some operations
+  such as registering and unregistering devlink port regions to be done from
+  these methods, otherwise they are optional. A port will be torn down only if
+  it has been previously set up. It is possible for a port to be set up during
+  probing only to be torn down immediately afterwards, for example in case its
+  PHY cannot be found. In this case, probing of the DSA switch continues
+  without that particular port.
 
 PHY devices and link management
 -------------------------------
@@ -635,26 +727,198 @@ Power management
   ``BR_STATE_DISABLED`` and propagating changes to the hardware if this port is
   disabled while being a bridge member
 
+Address databases
+-----------------
+
+Switching hardware is expected to have a table for FDB entries, however not all
+of them are active at the same time. An address database is the subset (partition)
+of FDB entries that is active (can be matched by address learning on RX, or FDB
+lookup on TX) depending on the state of the port. An address database may
+occasionally be called "FID" (Filtering ID) in this document, although the
+underlying implementation may choose whatever is available to the hardware.
+
+For example, all ports that belong to a VLAN-unaware bridge (which is
+*currently* VLAN-unaware) are expected to learn source addresses in the
+database associated by the driver with that bridge (and not with other
+VLAN-unaware bridges). During forwarding and FDB lookup, a packet received on a
+VLAN-unaware bridge port should be able to find a VLAN-unaware FDB entry having
+the same MAC DA as the packet, which is present on another port member of the
+same bridge. At the same time, the FDB lookup process must be able to not find
+an FDB entry having the same MAC DA as the packet, if that entry points towards
+a port which is a member of a different VLAN-unaware bridge (and is therefore
+associated with a different address database).
+
+Similarly, each VLAN of each offloaded VLAN-aware bridge should have an
+associated address database, which is shared by all ports which are members of
+that VLAN, but not shared by ports belonging to different bridges that are
+members of the same VID.
+
+In this context, a VLAN-unaware database means that all packets are expected to
+match on it irrespective of VLAN ID (only MAC address lookup), whereas a
+VLAN-aware database means that packets are supposed to match based on the VLAN
+ID from the classified 802.1Q header (or the pvid if untagged).
+
+At the bridge layer, VLAN-unaware FDB entries have the special VID value of 0,
+whereas VLAN-aware FDB entries have non-zero VID values. Note that a
+VLAN-unaware bridge may have VLAN-aware (non-zero VID) FDB entries, and a
+VLAN-aware bridge may have VLAN-unaware FDB entries. As in hardware, the
+software bridge keeps separate address databases, and offloads to hardware the
+FDB entries belonging to these databases, through switchdev, asynchronously
+relative to the moment when the databases become active or inactive.
+
+When a user port operates in standalone mode, its driver should configure it to
+use a separate database called a port private database. This is different from
+the databases described above, and should impede operation as standalone port
+(packet in, packet out to the CPU port) as little as possible. For example,
+on ingress, it should not attempt to learn the MAC SA of ingress traffic, since
+learning is a bridging layer service and this is a standalone port, therefore
+it would consume useless space. With no address learning, the port private
+database should be empty in a naive implementation, and in this case, all
+received packets should be trivially flooded to the CPU port.
+
+DSA (cascade) and CPU ports are also called "shared" ports because they service
+multiple address databases, and the database that a packet should be associated
+to is usually embedded in the DSA tag. This means that the CPU port may
+simultaneously transport packets coming from a standalone port (which were
+classified by hardware in one address database), and from a bridge port (which
+were classified to a different address database).
+
+Switch drivers which satisfy certain criteria are able to optimize the naive
+configuration by removing the CPU port from the flooding domain of the switch,
+and just program the hardware with FDB entries pointing towards the CPU port
+for which it is known that software is interested in those MAC addresses.
+Packets which do not match a known FDB entry will not be delivered to the CPU,
+which will save CPU cycles required for creating an skb just to drop it.
+
+DSA is able to perform host address filtering for the following kinds of
+addresses:
+
+- Primary unicast MAC addresses of ports (``dev->dev_addr``). These are
+  associated with the port private database of the respective user port,
+  and the driver is notified to install them through ``port_fdb_add`` towards
+  the CPU port.
+
+- Secondary unicast and multicast MAC addresses of ports (addresses added
+  through ``dev_uc_add()`` and ``dev_mc_add()``). These are also associated
+  with the port private database of the respective user port.
+
+- Local/permanent bridge FDB entries (``BR_FDB_LOCAL``). These are the MAC
+  addresses of the bridge ports, for which packets must be terminated locally
+  and not forwarded. They are associated with the address database for that
+  bridge.
+
+- Static bridge FDB entries installed towards foreign (non-DSA) interfaces
+  present in the same bridge as some DSA switch ports. These are also
+  associated with the address database for that bridge.
+
+- Dynamically learned FDB entries on foreign interfaces present in the same
+  bridge as some DSA switch ports, only if ``ds->assisted_learning_on_cpu_port``
+  is set to true by the driver. These are associated with the address database
+  for that bridge.
+
+For various operations detailed below, DSA provides a ``dsa_db`` structure
+which can be of the following types:
+
+- ``DSA_DB_PORT``: the FDB (or MDB) entry to be installed or deleted belongs to
+  the port private database of user port ``db->dp``.
+- ``DSA_DB_BRIDGE``: the entry belongs to one of the address databases of bridge
+  ``db->bridge``. Separation between the VLAN-unaware database and the per-VID
+  databases of this bridge is expected to be done by the driver.
+- ``DSA_DB_LAG``: the entry belongs to the address database of LAG ``db->lag``.
+  Note: ``DSA_DB_LAG`` is currently unused and may be removed in the future.
+
+The drivers which act upon the ``dsa_db`` argument in ``port_fdb_add``,
+``port_mdb_add`` etc should declare ``ds->fdb_isolation`` as true.
+
+DSA associates each offloaded bridge and each offloaded LAG with a one-based ID
+(``struct dsa_bridge :: num``, ``struct dsa_lag :: id``) for the purposes of
+refcounting addresses on shared ports. Drivers may piggyback on DSA's numbering
+scheme (the ID is readable through ``db->bridge.num`` and ``db->lag.id`` or may
+implement their own.
+
+Only the drivers which declare support for FDB isolation are notified of FDB
+entries on the CPU port belonging to ``DSA_DB_PORT`` databases.
+For compatibility/legacy reasons, ``DSA_DB_BRIDGE`` addresses are notified to
+drivers even if they do not support FDB isolation. However, ``db->bridge.num``
+and ``db->lag.id`` are always set to 0 in that case (to denote the lack of
+isolation, for refcounting purposes).
+
+Note that it is not mandatory for a switch driver to implement physically
+separate address databases for each standalone user port. Since FDB entries in
+the port private databases will always point to the CPU port, there is no risk
+for incorrect forwarding decisions. In this case, all standalone ports may
+share the same database, but the reference counting of host-filtered addresses
+(not deleting the FDB entry for a port's MAC address if it's still in use by
+another port) becomes the responsibility of the driver, because DSA is unaware
+that the port databases are in fact shared. This can be achieved by calling
+``dsa_fdb_present_in_other_db()`` and ``dsa_mdb_present_in_other_db()``.
+The down side is that the RX filtering lists of each user port are in fact
+shared, which means that user port A may accept a packet with a MAC DA it
+shouldn't have, only because that MAC address was in the RX filtering list of
+user port B. These packets will still be dropped in software, however.
+
 Bridge layer
 ------------
 
+Offloading the bridge forwarding plane is optional and handled by the methods
+below. They may be absent, return -EOPNOTSUPP, or ``ds->max_num_bridges`` may
+be non-zero and exceeded, and in this case, joining a bridge port is still
+possible, but the packet forwarding will take place in software, and the ports
+under a software bridge must remain configured in the same way as for
+standalone operation, i.e. have all bridging service functions (address
+learning etc) disabled, and send all received packets to the CPU port only.
+
+Concretely, a port starts offloading the forwarding plane of a bridge once it
+returns success to the ``port_bridge_join`` method, and stops doing so after
+``port_bridge_leave`` has been called. Offloading the bridge means autonomously
+learning FDB entries in accordance with the software bridge port's state, and
+autonomously forwarding (or flooding) received packets without CPU intervention.
+This is optional even when offloading a bridge port. Tagging protocol drivers
+are expected to call ``dsa_default_offload_fwd_mark(skb)`` for packets which
+have already been autonomously forwarded in the forwarding domain of the
+ingress switch port. DSA, through ``dsa_port_devlink_setup()``, considers all
+switch ports part of the same tree ID to be part of the same bridge forwarding
+domain (capable of autonomous forwarding to each other).
+
+Offloading the TX forwarding process of a bridge is a distinct concept from
+simply offloading its forwarding plane, and refers to the ability of certain
+driver and tag protocol combinations to transmit a single skb coming from the
+bridge device's transmit function to potentially multiple egress ports (and
+thereby avoid its cloning in software).
+
+Packets for which the bridge requests this behavior are called data plane
+packets and have ``skb->offload_fwd_mark`` set to true in the tag protocol
+driver's ``xmit`` function. Data plane packets are subject to FDB lookup,
+hardware learning on the CPU port, and do not override the port STP state.
+Additionally, replication of data plane packets (multicast, flooding) is
+handled in hardware and the bridge driver will transmit a single skb for each
+packet that may or may not need replication.
+
+When the TX forwarding offload is enabled, the tag protocol driver is
+responsible to inject packets into the data plane of the hardware towards the
+correct bridging domain (FID) that the port is a part of. The port may be
+VLAN-unaware, and in this case the FID must be equal to the FID used by the
+driver for its VLAN-unaware address database associated with that bridge.
+Alternatively, the bridge may be VLAN-aware, and in that case, it is guaranteed
+that the packet is also VLAN-tagged with the VLAN ID that the bridge processed
+this packet in. It is the responsibility of the hardware to untag the VID on
+the egress-untagged ports, or keep the tag on the egress-tagged ones.
+
 - ``port_bridge_join``: bridge layer function invoked when a given switch port is
   added to a bridge, this function should do what's necessary at the switch
   level to permit the joining port to be added to the relevant logical
   domain for it to ingress/egress traffic with other members of the bridge.
+  By setting the ``tx_fwd_offload`` argument to true, the TX forwarding process
+  of this bridge is also offloaded.
 
 - ``port_bridge_leave``: bridge layer function invoked when a given switch port is
   removed from a bridge, this function should do what's necessary at the
   switch level to deny the leaving port from ingress/egress traffic from the
-  remaining bridge members. When the port leaves the bridge, it should be aged
-  out at the switch hardware for the switch to (re) learn MAC addresses behind
-  this port.
+  remaining bridge members.
 
 - ``port_stp_state_set``: bridge layer function invoked when a given switch port STP
   state is computed by the bridge layer and should be propagated to switch
-  hardware to forward/block/learn traffic. The switch driver is responsible for
-  computing a STP state change based on current and asked parameters and perform
-  the relevant ageing based on the intersection results
+  hardware to forward/block/learn traffic.
 
 - ``port_bridge_flags``: bridge layer function invoked when a port must
   configure its settings for e.g. flooding of unknown traffic or source address
@@ -667,21 +931,11 @@ Bridge layer
   CPU port, and flooding towards the CPU port should also be enabled, due to a
   lack of an explicit address filtering mechanism in the DSA core.
 
-- ``port_bridge_tx_fwd_offload``: bridge layer function invoked after
-  ``port_bridge_join`` when a driver sets ``ds->num_fwd_offloading_bridges`` to
-  a non-zero value. Returning success in this function activates the TX
-  forwarding offload bridge feature for this port, which enables the tagging
-  protocol driver to inject data plane packets towards the bridging domain that
-  the port is a part of. Data plane packets are subject to FDB lookup, hardware
-  learning on the CPU port, and do not override the port STP state.
-  Additionally, replication of data plane packets (multicast, flooding) is
-  handled in hardware and the bridge driver will transmit a single skb for each
-  packet that needs replication. The method is provided as a configuration
-  point for drivers that need to configure the hardware for enabling this
-  feature.
-
-- ``port_bridge_tx_fwd_unoffload``: bridge layer function invoked when a driver
-  leaves a bridge port which had the TX forwarding offload feature enabled.
+- ``port_fast_age``: bridge layer function invoked when flushing the
+  dynamically learned FDB entries on the port is necessary. This is called when
+  transitioning from an STP state where learning should take place to an STP
+  state where it shouldn't, or when leaving a bridge, or when address learning
+  is turned off via ``port_bridge_flags``.
 
 Bridge VLAN filtering
 ---------------------
@@ -697,55 +951,44 @@ Bridge VLAN filtering
   allowed.
 
 - ``port_vlan_add``: bridge layer function invoked when a VLAN is configured
-  (tagged or untagged) for the given switch port. If the operation is not
-  supported by the hardware, this function should return ``-EOPNOTSUPP`` to
-  inform the bridge code to fallback to a software implementation.
+  (tagged or untagged) for the given switch port. The CPU port becomes a member
+  of a VLAN only if a foreign bridge port is also a member of it (and
+  forwarding needs to take place in software), or the VLAN is installed to the
+  VLAN group of the bridge device itself, for termination purposes
+  (``bridge vlan add dev br0 vid 100 self``). VLANs on shared ports are
+  reference counted and removed when there is no user left. Drivers do not need
+  to manually install a VLAN on the CPU port.
 
 - ``port_vlan_del``: bridge layer function invoked when a VLAN is removed from the
   given switch port
 
-- ``port_vlan_dump``: bridge layer function invoked with a switchdev callback
-  function that the driver has to call for each VLAN the given port is a member
-  of. A switchdev object is used to carry the VID and bridge flags.
-
 - ``port_fdb_add``: bridge layer function invoked when the bridge wants to install a
   Forwarding Database entry, the switch hardware should be programmed with the
   specified address in the specified VLAN Id in the forwarding database
-  associated with this VLAN ID. If the operation is not supported, this
-  function should return ``-EOPNOTSUPP`` to inform the bridge code to fallback to
-  a software implementation.
-
-.. note:: VLAN ID 0 corresponds to the port private database, which, in the context
-        of DSA, would be its port-based VLAN, used by the associated bridge device.
+  associated with this VLAN ID.
 
 - ``port_fdb_del``: bridge layer function invoked when the bridge wants to remove a
   Forwarding Database entry, the switch hardware should be programmed to delete
   the specified MAC address from the specified VLAN ID if it was mapped into
   this port forwarding database
 
-- ``port_fdb_dump``: bridge layer function invoked with a switchdev callback
-  function that the driver has to call for each MAC address known to be behind
-  the given port. A switchdev object is used to carry the VID and FDB info.
+- ``port_fdb_dump``: bridge bypass function invoked by ``ndo_fdb_dump`` on the
+  physical DSA port interfaces. Since DSA does not attempt to keep in sync its
+  hardware FDB entries with the software bridge, this method is implemented as
+  a means to view the entries visible on user ports in the hardware database.
+  The entries reported by this function have the ``self`` flag in the output of
+  the ``bridge fdb show`` command.
 
 - ``port_mdb_add``: bridge layer function invoked when the bridge wants to install
-  a multicast database entry. If the operation is not supported, this function
-  should return ``-EOPNOTSUPP`` to inform the bridge code to fallback to a
-  software implementation. The switch hardware should be programmed with the
+  a multicast database entry. The switch hardware should be programmed with the
   specified address in the specified VLAN ID in the forwarding database
   associated with this VLAN ID.
 
-.. note:: VLAN ID 0 corresponds to the port private database, which, in the context
-        of DSA, would be its port-based VLAN, used by the associated bridge device.
-
 - ``port_mdb_del``: bridge layer function invoked when the bridge wants to remove a
   multicast database entry, the switch hardware should be programmed to delete
   the specified MAC address from the specified VLAN ID if it was mapped into
   this port forwarding database.
 
-- ``port_mdb_dump``: bridge layer function invoked with a switchdev callback
-  function that the driver has to call for each MAC address known to be behind
-  the given port. A switchdev object is used to carry the VID and MDB info.
-
 Link aggregation
 ----------------
 
index b3a534e..66c7223 100644 (file)
@@ -1052,11 +1052,7 @@ udp_rmem_min - INTEGER
        Default: 4K
 
 udp_wmem_min - INTEGER
-       Minimal size of send buffer used by UDP sockets in moderation.
-       Each UDP socket is able to use the size for sending data, even if
-       total pages of UDP sockets exceed udp_mem pressure. The unit is byte.
-
-       Default: 4K
+       UDP does not have tx memory accounting and this tunable has no effect.
 
 RAW variables
 =============
index fcf9a41..71b9272 100644 (file)
@@ -438,6 +438,13 @@ config MMU_GATHER_PAGE_SIZE
 
 config MMU_GATHER_NO_RANGE
        bool
+       select MMU_GATHER_MERGE_VMAS
+
+config MMU_GATHER_NO_FLUSH_CACHE
+       bool
+
+config MMU_GATHER_MERGE_VMAS
+       bool
 
 config MMU_GATHER_NO_GATHER
        bool
index 3498e65..702861c 100644 (file)
@@ -4,21 +4,6 @@
 #define __ASM_CSKY_TLB_H
 
 #include <asm/cacheflush.h>
-
-#define tlb_start_vma(tlb, vma) \
-       do { \
-               if (!(tlb)->fullmm) \
-                       flush_cache_range(vma, (vma)->vm_start, (vma)->vm_end); \
-       }  while (0)
-
-#define tlb_end_vma(tlb, vma) \
-       do { \
-               if (!(tlb)->fullmm) \
-                       flush_tlb_range(vma, (vma)->vm_start, (vma)->vm_end); \
-       }  while (0)
-
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
 #include <asm-generic/tlb.h>
 
 #endif /* __ASM_CSKY_TLB_H */
index 53a912b..b57daee 100644 (file)
@@ -108,6 +108,7 @@ config LOONGARCH
        select TRACE_IRQFLAGS_SUPPORT
        select USE_PERCPU_NUMA_NODE_ID
        select ZONE_DMA32
+       select MMU_GATHER_MERGE_VMAS if MMU
 
 config 32BIT
        bool
index 4f629ae..dd24f58 100644 (file)
@@ -137,16 +137,6 @@ static inline void invtlb_all(u32 op, u32 info, u64 addr)
                );
 }
 
-/*
- * LoongArch doesn't need any special per-pte or per-vma handling, except
- * we need to flush cache for area to be unmapped.
- */
-#define tlb_start_vma(tlb, vma)                                        \
-       do {                                                    \
-               if (!(tlb)->fullmm)                             \
-                       flush_cache_range(vma, vma->vm_start, vma->vm_end); \
-       }  while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
 #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
 
 static void tlb_flush(struct mmu_gather *tlb);
index 7aa12e8..c235648 100644 (file)
@@ -256,6 +256,7 @@ config PPC
        select IRQ_FORCED_THREADING
        select MMU_GATHER_PAGE_SIZE
        select MMU_GATHER_RCU_TABLE_FREE
+       select MMU_GATHER_MERGE_VMAS
        select MODULES_USE_ELF_RELA
        select NEED_DMA_MAP_STATE               if PPC64 || NOT_COHERENT_CACHE
        select NEED_PER_CPU_EMBED_FIRST_CHUNK   if PPC64
index 09a9ae5..b3de610 100644 (file)
@@ -19,8 +19,6 @@
 
 #include <linux/pagemap.h>
 
-#define tlb_start_vma(tlb, vma)        do { } while (0)
-#define tlb_end_vma(tlb, vma)  do { } while (0)
 #define __tlb_remove_tlb_entry __tlb_remove_tlb_entry
 
 #define tlb_flush tlb_flush
index 8cd9e56..5a1a8df 100644 (file)
@@ -204,6 +204,7 @@ config S390
        select IOMMU_SUPPORT            if PCI
        select MMU_GATHER_NO_GATHER
        select MMU_GATHER_RCU_TABLE_FREE
+       select MMU_GATHER_MERGE_VMAS
        select MODULES_USE_ELF_RELA
        select NEED_DMA_MAP_STATE       if PCI
        select NEED_SG_DMA_LENGTH       if PCI
index fe6407f..3a5c8fb 100644 (file)
@@ -27,9 +27,6 @@ static inline void tlb_flush(struct mmu_gather *tlb);
 static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
                                          struct page *page, int page_size);
 
-#define tlb_start_vma(tlb, vma)                        do { } while (0)
-#define tlb_end_vma(tlb, vma)                  do { } while (0)
-
 #define tlb_flush tlb_flush
 #define pte_free_tlb pte_free_tlb
 #define pmd_free_tlb pmd_free_tlb
index ba449c4..4f7d1df 100644 (file)
@@ -67,6 +67,8 @@ config SPARC64
        select HAVE_KRETPROBES
        select HAVE_KPROBES
        select MMU_GATHER_RCU_TABLE_FREE if SMP
+       select MMU_GATHER_MERGE_VMAS
+       select MMU_GATHER_NO_FLUSH_CACHE
        select HAVE_ARCH_TRANSPARENT_HUGEPAGE
        select HAVE_DYNAMIC_FTRACE
        select HAVE_FTRACE_MCOUNT_RECORD
index 779a5a0..3037187 100644 (file)
@@ -22,8 +22,6 @@ void smp_flush_tlb_mm(struct mm_struct *mm);
 void __flush_tlb_pending(unsigned long, unsigned long, unsigned long *);
 void flush_tlb_pending(void);
 
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma)  do { } while (0)
 #define tlb_flush(tlb) flush_tlb_pending()
 
 /*
index e58798f..7fff10e 100644 (file)
@@ -245,6 +245,7 @@ config X86
        select HAVE_PERF_REGS
        select HAVE_PERF_USER_STACK_DUMP
        select MMU_GATHER_RCU_TABLE_FREE        if PARAVIRT
+       select MMU_GATHER_MERGE_VMAS
        select HAVE_POSIX_CPU_TIMERS_TASK_WORK
        select HAVE_REGS_AND_STACK_ACCESS_API
        select HAVE_RELIABLE_STACKTRACE         if UNWINDER_ORC || STACK_VALIDATION
index 1bfe979..580636c 100644 (file)
@@ -2,9 +2,6 @@
 #ifndef _ASM_X86_TLB_H
 #define _ASM_X86_TLB_H
 
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-
 #define tlb_flush tlb_flush
 static inline void tlb_flush(struct mmu_gather *tlb);
 
index 0c9a63b..b26e643 100644 (file)
@@ -421,6 +421,10 @@ out_free_lh:
  * @work: the worker that implements software debouncing
  * @sw_debounced: flag indicating if the software debouncer is active
  * @level: the current debounced physical level of the line
+ * @hdesc: the Hardware Timestamp Engine (HTE) descriptor
+ * @raw_level: the line level at the time of event
+ * @total_discard_seq: the running counter of the discarded events
+ * @last_seqno: the last sequence number before debounce period expires
  */
 struct line {
        struct gpio_desc *desc;
index 638bf4a..646fa86 100644 (file)
@@ -4231,10 +4231,6 @@ void irdma_cm_teardown_connections(struct irdma_device *iwdev, u32 *ipaddr,
        struct irdma_cm_node *cm_node;
        struct list_head teardown_list;
        struct ib_qp_attr attr;
-       struct irdma_sc_vsi *vsi = &iwdev->vsi;
-       struct irdma_sc_qp *sc_qp;
-       struct irdma_qp *qp;
-       int i;
 
        INIT_LIST_HEAD(&teardown_list);
 
@@ -4251,52 +4247,6 @@ void irdma_cm_teardown_connections(struct irdma_device *iwdev, u32 *ipaddr,
                        irdma_cm_disconn(cm_node->iwqp);
                irdma_rem_ref_cm_node(cm_node);
        }
-       if (!iwdev->roce_mode)
-               return;
-
-       INIT_LIST_HEAD(&teardown_list);
-       for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) {
-               mutex_lock(&vsi->qos[i].qos_mutex);
-               list_for_each_safe (list_node, list_core_temp,
-                                   &vsi->qos[i].qplist) {
-                       u32 qp_ip[4];
-
-                       sc_qp = container_of(list_node, struct irdma_sc_qp,
-                                            list);
-                       if (sc_qp->qp_uk.qp_type != IRDMA_QP_TYPE_ROCE_RC)
-                               continue;
-
-                       qp = sc_qp->qp_uk.back_qp;
-                       if (!disconnect_all) {
-                               if (nfo->ipv4)
-                                       qp_ip[0] = qp->udp_info.local_ipaddr[3];
-                               else
-                                       memcpy(qp_ip,
-                                              &qp->udp_info.local_ipaddr[0],
-                                              sizeof(qp_ip));
-                       }
-
-                       if (disconnect_all ||
-                           (nfo->vlan_id == (qp->udp_info.vlan_tag & VLAN_VID_MASK) &&
-                            !memcmp(qp_ip, ipaddr, nfo->ipv4 ? 4 : 16))) {
-                               spin_lock(&iwdev->rf->qptable_lock);
-                               if (iwdev->rf->qp_table[sc_qp->qp_uk.qp_id]) {
-                                       irdma_qp_add_ref(&qp->ibqp);
-                                       list_add(&qp->teardown_entry,
-                                                &teardown_list);
-                               }
-                               spin_unlock(&iwdev->rf->qptable_lock);
-                       }
-               }
-               mutex_unlock(&vsi->qos[i].qos_mutex);
-       }
-
-       list_for_each_safe (list_node, list_core_temp, &teardown_list) {
-               qp = container_of(list_node, struct irdma_qp, teardown_entry);
-               attr.qp_state = IB_QPS_ERR;
-               irdma_modify_qp_roce(&qp->ibqp, &attr, IB_QP_STATE, NULL);
-               irdma_qp_rem_ref(&qp->ibqp);
-       }
 }
 
 /**
index e46fc11..50299f5 100644 (file)
@@ -201,6 +201,7 @@ void i40iw_init_hw(struct irdma_sc_dev *dev)
        dev->hw_attrs.uk_attrs.max_hw_read_sges = I40IW_MAX_SGE_RD;
        dev->hw_attrs.max_hw_device_pages = I40IW_MAX_PUSH_PAGE_COUNT;
        dev->hw_attrs.uk_attrs.max_hw_inline = I40IW_MAX_INLINE_DATA_SIZE;
+       dev->hw_attrs.page_size_cap = SZ_4K | SZ_2M;
        dev->hw_attrs.max_hw_ird = I40IW_MAX_IRD_SIZE;
        dev->hw_attrs.max_hw_ord = I40IW_MAX_ORD_SIZE;
        dev->hw_attrs.max_hw_wqes = I40IW_MAX_WQ_ENTRIES;
index cf53b17..5986fd9 100644 (file)
@@ -139,6 +139,7 @@ void icrdma_init_hw(struct irdma_sc_dev *dev)
        dev->cqp_db = dev->hw_regs[IRDMA_CQPDB];
        dev->cq_ack_db = dev->hw_regs[IRDMA_CQACK];
        dev->irq_ops = &icrdma_irq_ops;
+       dev->hw_attrs.page_size_cap = SZ_4K | SZ_2M | SZ_1G;
        dev->hw_attrs.max_hw_ird = ICRDMA_MAX_IRD_SIZE;
        dev->hw_attrs.max_hw_ord = ICRDMA_MAX_ORD_SIZE;
        dev->hw_attrs.max_stat_inst = ICRDMA_MAX_STATS_COUNT;
index 46c1233..4789e85 100644 (file)
@@ -127,6 +127,7 @@ struct irdma_hw_attrs {
        u64 max_hw_outbound_msg_size;
        u64 max_hw_inbound_msg_size;
        u64 max_mr_size;
+       u64 page_size_cap;
        u32 min_hw_qp_id;
        u32 min_hw_aeq_size;
        u32 max_hw_aeq_size;
index c4412ec..96135a2 100644 (file)
@@ -32,7 +32,7 @@ static int irdma_query_device(struct ib_device *ibdev,
        props->vendor_part_id = pcidev->device;
 
        props->hw_ver = rf->pcidev->revision;
-       props->page_size_cap = SZ_4K | SZ_2M | SZ_1G;
+       props->page_size_cap = hw_attrs->page_size_cap;
        props->max_mr_size = hw_attrs->max_mr_size;
        props->max_qp = rf->max_qp - rf->used_qps;
        props->max_qp_wr = hw_attrs->max_qp_wr;
@@ -2781,7 +2781,7 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
 
        if (req.reg_type == IRDMA_MEMREG_TYPE_MEM) {
                iwmr->page_size = ib_umem_find_best_pgsz(region,
-                                                        SZ_4K | SZ_2M | SZ_1G,
+                                                        iwdev->rf->sc_dev.hw_attrs.page_size_cap,
                                                         virt);
                if (unlikely(!iwmr->page_size)) {
                        kfree(iwmr);
index 889e403..93da236 100644 (file)
@@ -850,9 +850,10 @@ static int gpmi_nfc_compute_timings(struct gpmi_nand_data *this,
        unsigned int tRP_ps;
        bool use_half_period;
        int sample_delay_ps, sample_delay_factor;
-       u16 busy_timeout_cycles;
+       unsigned int busy_timeout_cycles;
        u8 wrn_dly_sel;
        unsigned long clk_rate, min_rate;
+       u64 busy_timeout_ps;
 
        if (sdr->tRC_min >= 30000) {
                /* ONFI non-EDO modes [0-3] */
@@ -885,7 +886,8 @@ static int gpmi_nfc_compute_timings(struct gpmi_nand_data *this,
        addr_setup_cycles = TO_CYCLES(sdr->tALS_min, period_ps);
        data_setup_cycles = TO_CYCLES(sdr->tDS_min, period_ps);
        data_hold_cycles = TO_CYCLES(sdr->tDH_min, period_ps);
-       busy_timeout_cycles = TO_CYCLES(sdr->tWB_max + sdr->tR_max, period_ps);
+       busy_timeout_ps = max(sdr->tBERS_max, sdr->tPROG_max);
+       busy_timeout_cycles = TO_CYCLES(busy_timeout_ps, period_ps);
 
        hw->timing0 = BF_GPMI_TIMING0_ADDRESS_SETUP(addr_setup_cycles) |
                      BF_GPMI_TIMING0_DATA_HOLD(data_hold_cycles) |
index be2719a..e019526 100644 (file)
@@ -563,7 +563,7 @@ static struct sk_buff *amt_build_igmp_gq(struct amt_dev *amt)
        ihv3->nsrcs     = 0;
        ihv3->resv      = 0;
        ihv3->suppress  = false;
-       ihv3->qrv       = amt->net->ipv4.sysctl_igmp_qrv;
+       ihv3->qrv       = READ_ONCE(amt->net->ipv4.sysctl_igmp_qrv);
        ihv3->csum      = 0;
        csum            = &ihv3->csum;
        csum_start      = (void *)ihv3;
@@ -577,14 +577,14 @@ static struct sk_buff *amt_build_igmp_gq(struct amt_dev *amt)
        return skb;
 }
 
-static void __amt_update_gw_status(struct amt_dev *amt, enum amt_status status,
-                                  bool validate)
+static void amt_update_gw_status(struct amt_dev *amt, enum amt_status status,
+                                bool validate)
 {
        if (validate && amt->status >= status)
                return;
        netdev_dbg(amt->dev, "Update GW status %s -> %s",
                   status_str[amt->status], status_str[status]);
-       amt->status = status;
+       WRITE_ONCE(amt->status, status);
 }
 
 static void __amt_update_relay_status(struct amt_tunnel_list *tunnel,
@@ -600,14 +600,6 @@ static void __amt_update_relay_status(struct amt_tunnel_list *tunnel,
        tunnel->status = status;
 }
 
-static void amt_update_gw_status(struct amt_dev *amt, enum amt_status status,
-                                bool validate)
-{
-       spin_lock_bh(&amt->lock);
-       __amt_update_gw_status(amt, status, validate);
-       spin_unlock_bh(&amt->lock);
-}
-
 static void amt_update_relay_status(struct amt_tunnel_list *tunnel,
                                    enum amt_status status, bool validate)
 {
@@ -700,9 +692,7 @@ static void amt_send_discovery(struct amt_dev *amt)
        if (unlikely(net_xmit_eval(err)))
                amt->dev->stats.tx_errors++;
 
-       spin_lock_bh(&amt->lock);
-       __amt_update_gw_status(amt, AMT_STATUS_SENT_DISCOVERY, true);
-       spin_unlock_bh(&amt->lock);
+       amt_update_gw_status(amt, AMT_STATUS_SENT_DISCOVERY, true);
 out:
        rcu_read_unlock();
 }
@@ -900,6 +890,28 @@ static void amt_send_mld_gq(struct amt_dev *amt, struct amt_tunnel_list *tunnel)
 }
 #endif
 
+static bool amt_queue_event(struct amt_dev *amt, enum amt_event event,
+                           struct sk_buff *skb)
+{
+       int index;
+
+       spin_lock_bh(&amt->lock);
+       if (amt->nr_events >= AMT_MAX_EVENTS) {
+               spin_unlock_bh(&amt->lock);
+               return 1;
+       }
+
+       index = (amt->event_idx + amt->nr_events) % AMT_MAX_EVENTS;
+       amt->events[index].event = event;
+       amt->events[index].skb = skb;
+       amt->nr_events++;
+       amt->event_idx %= AMT_MAX_EVENTS;
+       queue_work(amt_wq, &amt->event_wq);
+       spin_unlock_bh(&amt->lock);
+
+       return 0;
+}
+
 static void amt_secret_work(struct work_struct *work)
 {
        struct amt_dev *amt = container_of(to_delayed_work(work),
@@ -913,58 +925,72 @@ static void amt_secret_work(struct work_struct *work)
                         msecs_to_jiffies(AMT_SECRET_TIMEOUT));
 }
 
-static void amt_discovery_work(struct work_struct *work)
+static void amt_event_send_discovery(struct amt_dev *amt)
 {
-       struct amt_dev *amt = container_of(to_delayed_work(work),
-                                          struct amt_dev,
-                                          discovery_wq);
-
-       spin_lock_bh(&amt->lock);
        if (amt->status > AMT_STATUS_SENT_DISCOVERY)
                goto out;
        get_random_bytes(&amt->nonce, sizeof(__be32));
-       spin_unlock_bh(&amt->lock);
 
        amt_send_discovery(amt);
-       spin_lock_bh(&amt->lock);
 out:
        mod_delayed_work(amt_wq, &amt->discovery_wq,
                         msecs_to_jiffies(AMT_DISCOVERY_TIMEOUT));
-       spin_unlock_bh(&amt->lock);
 }
 
-static void amt_req_work(struct work_struct *work)
+static void amt_discovery_work(struct work_struct *work)
 {
        struct amt_dev *amt = container_of(to_delayed_work(work),
                                           struct amt_dev,
-                                          req_wq);
+                                          discovery_wq);
+
+       if (amt_queue_event(amt, AMT_EVENT_SEND_DISCOVERY, NULL))
+               mod_delayed_work(amt_wq, &amt->discovery_wq,
+                                msecs_to_jiffies(AMT_DISCOVERY_TIMEOUT));
+}
+
+static void amt_event_send_request(struct amt_dev *amt)
+{
        u32 exp;
 
-       spin_lock_bh(&amt->lock);
        if (amt->status < AMT_STATUS_RECEIVED_ADVERTISEMENT)
                goto out;
 
        if (amt->req_cnt > AMT_MAX_REQ_COUNT) {
                netdev_dbg(amt->dev, "Gateway is not ready");
                amt->qi = AMT_INIT_REQ_TIMEOUT;
-               amt->ready4 = false;
-               amt->ready6 = false;
+               WRITE_ONCE(amt->ready4, false);
+               WRITE_ONCE(amt->ready6, false);
                amt->remote_ip = 0;
-               __amt_update_gw_status(amt, AMT_STATUS_INIT, false);
+               amt_update_gw_status(amt, AMT_STATUS_INIT, false);
                amt->req_cnt = 0;
+               amt->nonce = 0;
                goto out;
        }
-       spin_unlock_bh(&amt->lock);
+
+       if (!amt->req_cnt) {
+               WRITE_ONCE(amt->ready4, false);
+               WRITE_ONCE(amt->ready6, false);
+               get_random_bytes(&amt->nonce, sizeof(__be32));
+       }
 
        amt_send_request(amt, false);
        amt_send_request(amt, true);
-       spin_lock_bh(&amt->lock);
-       __amt_update_gw_status(amt, AMT_STATUS_SENT_REQUEST, true);
+       amt_update_gw_status(amt, AMT_STATUS_SENT_REQUEST, true);
        amt->req_cnt++;
 out:
        exp = min_t(u32, (1 * (1 << amt->req_cnt)), AMT_MAX_REQ_TIMEOUT);
        mod_delayed_work(amt_wq, &amt->req_wq, msecs_to_jiffies(exp * 1000));
-       spin_unlock_bh(&amt->lock);
+}
+
+static void amt_req_work(struct work_struct *work)
+{
+       struct amt_dev *amt = container_of(to_delayed_work(work),
+                                          struct amt_dev,
+                                          req_wq);
+
+       if (amt_queue_event(amt, AMT_EVENT_SEND_REQUEST, NULL))
+               mod_delayed_work(amt_wq, &amt->req_wq,
+                                msecs_to_jiffies(100));
 }
 
 static bool amt_send_membership_update(struct amt_dev *amt,
@@ -1220,7 +1246,8 @@ static netdev_tx_t amt_dev_xmit(struct sk_buff *skb, struct net_device *dev)
                /* Gateway only passes IGMP/MLD packets */
                if (!report)
                        goto free;
-               if ((!v6 && !amt->ready4) || (v6 && !amt->ready6))
+               if ((!v6 && !READ_ONCE(amt->ready4)) ||
+                   (v6 && !READ_ONCE(amt->ready6)))
                        goto free;
                if (amt_send_membership_update(amt, skb,  v6))
                        goto free;
@@ -2236,6 +2263,10 @@ static bool amt_advertisement_handler(struct amt_dev *amt, struct sk_buff *skb)
            ipv4_is_zeronet(amta->ip4))
                return true;
 
+       if (amt->status != AMT_STATUS_SENT_DISCOVERY ||
+           amt->nonce != amta->nonce)
+               return true;
+
        amt->remote_ip = amta->ip4;
        netdev_dbg(amt->dev, "advertised remote ip = %pI4\n", &amt->remote_ip);
        mod_delayed_work(amt_wq, &amt->req_wq, 0);
@@ -2251,6 +2282,9 @@ static bool amt_multicast_data_handler(struct amt_dev *amt, struct sk_buff *skb)
        struct ethhdr *eth;
        struct iphdr *iph;
 
+       if (READ_ONCE(amt->status) != AMT_STATUS_SENT_UPDATE)
+               return true;
+
        hdr_size = sizeof(*amtmd) + sizeof(struct udphdr);
        if (!pskb_may_pull(skb, hdr_size))
                return true;
@@ -2325,6 +2359,9 @@ static bool amt_membership_query_handler(struct amt_dev *amt,
        if (amtmq->reserved || amtmq->version)
                return true;
 
+       if (amtmq->nonce != amt->nonce)
+               return true;
+
        hdr_size -= sizeof(*eth);
        if (iptunnel_pull_header(skb, hdr_size, htons(ETH_P_TEB), false))
                return true;
@@ -2339,6 +2376,9 @@ static bool amt_membership_query_handler(struct amt_dev *amt,
 
        iph = ip_hdr(skb);
        if (iph->version == 4) {
+               if (READ_ONCE(amt->ready4))
+                       return true;
+
                if (!pskb_may_pull(skb, sizeof(*iph) + AMT_IPHDR_OPTS +
                                   sizeof(*ihv3)))
                        return true;
@@ -2349,12 +2389,10 @@ static bool amt_membership_query_handler(struct amt_dev *amt,
                ihv3 = skb_pull(skb, sizeof(*iph) + AMT_IPHDR_OPTS);
                skb_reset_transport_header(skb);
                skb_push(skb, sizeof(*iph) + AMT_IPHDR_OPTS);
-               spin_lock_bh(&amt->lock);
-               amt->ready4 = true;
+               WRITE_ONCE(amt->ready4, true);
                amt->mac = amtmq->response_mac;
                amt->req_cnt = 0;
                amt->qi = ihv3->qqic;
-               spin_unlock_bh(&amt->lock);
                skb->protocol = htons(ETH_P_IP);
                eth->h_proto = htons(ETH_P_IP);
                ip_eth_mc_map(iph->daddr, eth->h_dest);
@@ -2363,6 +2401,9 @@ static bool amt_membership_query_handler(struct amt_dev *amt,
                struct mld2_query *mld2q;
                struct ipv6hdr *ip6h;
 
+               if (READ_ONCE(amt->ready6))
+                       return true;
+
                if (!pskb_may_pull(skb, sizeof(*ip6h) + AMT_IP6HDR_OPTS +
                                   sizeof(*mld2q)))
                        return true;
@@ -2374,12 +2415,10 @@ static bool amt_membership_query_handler(struct amt_dev *amt,
                mld2q = skb_pull(skb, sizeof(*ip6h) + AMT_IP6HDR_OPTS);
                skb_reset_transport_header(skb);
                skb_push(skb, sizeof(*ip6h) + AMT_IP6HDR_OPTS);
-               spin_lock_bh(&amt->lock);
-               amt->ready6 = true;
+               WRITE_ONCE(amt->ready6, true);
                amt->mac = amtmq->response_mac;
                amt->req_cnt = 0;
                amt->qi = mld2q->mld2q_qqic;
-               spin_unlock_bh(&amt->lock);
                skb->protocol = htons(ETH_P_IPV6);
                eth->h_proto = htons(ETH_P_IPV6);
                ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest);
@@ -2392,12 +2431,14 @@ static bool amt_membership_query_handler(struct amt_dev *amt,
        skb->pkt_type = PACKET_MULTICAST;
        skb->ip_summed = CHECKSUM_NONE;
        len = skb->len;
+       local_bh_disable();
        if (__netif_rx(skb) == NET_RX_SUCCESS) {
                amt_update_gw_status(amt, AMT_STATUS_RECEIVED_QUERY, true);
                dev_sw_netstats_rx_add(amt->dev, len);
        } else {
                amt->dev->stats.rx_dropped++;
        }
+       local_bh_enable();
 
        return false;
 }
@@ -2638,7 +2679,9 @@ static bool amt_request_handler(struct amt_dev *amt, struct sk_buff *skb)
                if (tunnel->ip4 == iph->saddr)
                        goto send;
 
+       spin_lock_bh(&amt->lock);
        if (amt->nr_tunnels >= amt->max_tunnels) {
+               spin_unlock_bh(&amt->lock);
                icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
                return true;
        }
@@ -2646,8 +2689,10 @@ static bool amt_request_handler(struct amt_dev *amt, struct sk_buff *skb)
        tunnel = kzalloc(sizeof(*tunnel) +
                         (sizeof(struct hlist_head) * amt->hash_buckets),
                         GFP_ATOMIC);
-       if (!tunnel)
+       if (!tunnel) {
+               spin_unlock_bh(&amt->lock);
                return true;
+       }
 
        tunnel->source_port = udph->source;
        tunnel->ip4 = iph->saddr;
@@ -2660,10 +2705,9 @@ static bool amt_request_handler(struct amt_dev *amt, struct sk_buff *skb)
 
        INIT_DELAYED_WORK(&tunnel->gc_wq, amt_tunnel_expire);
 
-       spin_lock_bh(&amt->lock);
        list_add_tail_rcu(&tunnel->list, &amt->tunnel_list);
        tunnel->key = amt->key;
-       amt_update_relay_status(tunnel, AMT_STATUS_RECEIVED_REQUEST, true);
+       __amt_update_relay_status(tunnel, AMT_STATUS_RECEIVED_REQUEST, true);
        amt->nr_tunnels++;
        mod_delayed_work(amt_wq, &tunnel->gc_wq,
                         msecs_to_jiffies(amt_gmi(amt)));
@@ -2688,6 +2732,38 @@ send:
        return false;
 }
 
+static void amt_gw_rcv(struct amt_dev *amt, struct sk_buff *skb)
+{
+       int type = amt_parse_type(skb);
+       int err = 1;
+
+       if (type == -1)
+               goto drop;
+
+       if (amt->mode == AMT_MODE_GATEWAY) {
+               switch (type) {
+               case AMT_MSG_ADVERTISEMENT:
+                       err = amt_advertisement_handler(amt, skb);
+                       break;
+               case AMT_MSG_MEMBERSHIP_QUERY:
+                       err = amt_membership_query_handler(amt, skb);
+                       if (!err)
+                               return;
+                       break;
+               default:
+                       netdev_dbg(amt->dev, "Invalid type of Gateway\n");
+                       break;
+               }
+       }
+drop:
+       if (err) {
+               amt->dev->stats.rx_dropped++;
+               kfree_skb(skb);
+       } else {
+               consume_skb(skb);
+       }
+}
+
 static int amt_rcv(struct sock *sk, struct sk_buff *skb)
 {
        struct amt_dev *amt;
@@ -2719,8 +2795,12 @@ static int amt_rcv(struct sock *sk, struct sk_buff *skb)
                                err = true;
                                goto drop;
                        }
-                       err = amt_advertisement_handler(amt, skb);
-                       break;
+                       if (amt_queue_event(amt, AMT_EVENT_RECEIVE, skb)) {
+                               netdev_dbg(amt->dev, "AMT Event queue full\n");
+                               err = true;
+                               goto drop;
+                       }
+                       goto out;
                case AMT_MSG_MULTICAST_DATA:
                        if (iph->saddr != amt->remote_ip) {
                                netdev_dbg(amt->dev, "Invalid Relay IP\n");
@@ -2738,11 +2818,12 @@ static int amt_rcv(struct sock *sk, struct sk_buff *skb)
                                err = true;
                                goto drop;
                        }
-                       err = amt_membership_query_handler(amt, skb);
-                       if (err)
+                       if (amt_queue_event(amt, AMT_EVENT_RECEIVE, skb)) {
+                               netdev_dbg(amt->dev, "AMT Event queue full\n");
+                               err = true;
                                goto drop;
-                       else
-                               goto out;
+                       }
+                       goto out;
                default:
                        err = true;
                        netdev_dbg(amt->dev, "Invalid type of Gateway\n");
@@ -2780,6 +2861,46 @@ out:
        return 0;
 }
 
+static void amt_event_work(struct work_struct *work)
+{
+       struct amt_dev *amt = container_of(work, struct amt_dev, event_wq);
+       struct sk_buff *skb;
+       u8 event;
+       int i;
+
+       for (i = 0; i < AMT_MAX_EVENTS; i++) {
+               spin_lock_bh(&amt->lock);
+               if (amt->nr_events == 0) {
+                       spin_unlock_bh(&amt->lock);
+                       return;
+               }
+               event = amt->events[amt->event_idx].event;
+               skb = amt->events[amt->event_idx].skb;
+               amt->events[amt->event_idx].event = AMT_EVENT_NONE;
+               amt->events[amt->event_idx].skb = NULL;
+               amt->nr_events--;
+               amt->event_idx++;
+               amt->event_idx %= AMT_MAX_EVENTS;
+               spin_unlock_bh(&amt->lock);
+
+               switch (event) {
+               case AMT_EVENT_RECEIVE:
+                       amt_gw_rcv(amt, skb);
+                       break;
+               case AMT_EVENT_SEND_DISCOVERY:
+                       amt_event_send_discovery(amt);
+                       break;
+               case AMT_EVENT_SEND_REQUEST:
+                       amt_event_send_request(amt);
+                       break;
+               default:
+                       if (skb)
+                               kfree_skb(skb);
+                       break;
+               }
+       }
+}
+
 static int amt_err_lookup(struct sock *sk, struct sk_buff *skb)
 {
        struct amt_dev *amt;
@@ -2804,7 +2925,7 @@ static int amt_err_lookup(struct sock *sk, struct sk_buff *skb)
                break;
        case AMT_MSG_REQUEST:
        case AMT_MSG_MEMBERSHIP_UPDATE:
-               if (amt->status >= AMT_STATUS_RECEIVED_ADVERTISEMENT)
+               if (READ_ONCE(amt->status) >= AMT_STATUS_RECEIVED_ADVERTISEMENT)
                        mod_delayed_work(amt_wq, &amt->req_wq, 0);
                break;
        default:
@@ -2867,6 +2988,8 @@ static int amt_dev_open(struct net_device *dev)
 
        amt->ready4 = false;
        amt->ready6 = false;
+       amt->event_idx = 0;
+       amt->nr_events = 0;
 
        err = amt_socket_create(amt);
        if (err)
@@ -2874,6 +2997,7 @@ static int amt_dev_open(struct net_device *dev)
 
        amt->req_cnt = 0;
        amt->remote_ip = 0;
+       amt->nonce = 0;
        get_random_bytes(&amt->key, sizeof(siphash_key_t));
 
        amt->status = AMT_STATUS_INIT;
@@ -2892,6 +3016,8 @@ static int amt_dev_stop(struct net_device *dev)
        struct amt_dev *amt = netdev_priv(dev);
        struct amt_tunnel_list *tunnel, *tmp;
        struct socket *sock;
+       struct sk_buff *skb;
+       int i;
 
        cancel_delayed_work_sync(&amt->req_wq);
        cancel_delayed_work_sync(&amt->discovery_wq);
@@ -2904,6 +3030,15 @@ static int amt_dev_stop(struct net_device *dev)
        if (sock)
                udp_tunnel_sock_release(sock);
 
+       cancel_work_sync(&amt->event_wq);
+       for (i = 0; i < AMT_MAX_EVENTS; i++) {
+               skb = amt->events[i].skb;
+               if (skb)
+                       kfree_skb(skb);
+               amt->events[i].event = AMT_EVENT_NONE;
+               amt->events[i].skb = NULL;
+       }
+
        amt->ready4 = false;
        amt->ready6 = false;
        amt->req_cnt = 0;
@@ -3095,7 +3230,7 @@ static int amt_newlink(struct net *net, struct net_device *dev,
                goto err;
        }
        if (amt->mode == AMT_MODE_RELAY) {
-               amt->qrv = amt->net->ipv4.sysctl_igmp_qrv;
+               amt->qrv = READ_ONCE(amt->net->ipv4.sysctl_igmp_qrv);
                amt->qri = 10;
                dev->needed_headroom = amt->stream_dev->needed_headroom +
                                       AMT_RELAY_HLEN;
@@ -3146,8 +3281,8 @@ static int amt_newlink(struct net *net, struct net_device *dev,
        INIT_DELAYED_WORK(&amt->discovery_wq, amt_discovery_work);
        INIT_DELAYED_WORK(&amt->req_wq, amt_req_work);
        INIT_DELAYED_WORK(&amt->secret_wq, amt_secret_work);
+       INIT_WORK(&amt->event_wq, amt_event_work);
        INIT_LIST_HEAD(&amt->tunnel_list);
-
        return 0;
 err:
        dev_put(amt->stream_dev);
@@ -3280,7 +3415,7 @@ static int __init amt_init(void)
        if (err < 0)
                goto unregister_notifier;
 
-       amt_wq = alloc_workqueue("amt", WQ_UNBOUND, 1);
+       amt_wq = alloc_workqueue("amt", WQ_UNBOUND, 0);
        if (!amt_wq) {
                err = -ENOMEM;
                goto rtnl_unregister;
index ba42cef..cb0321e 100644 (file)
@@ -1843,6 +1843,7 @@ static int rcar_canfd_probe(struct platform_device *pdev)
                of_child = of_get_child_by_name(pdev->dev.of_node, name);
                if (of_child && of_device_is_available(of_child))
                        channels_mask |= BIT(i);
+               of_node_put(of_child);
        }
 
        if (chip_id != RENESAS_RZG2L) {
index 9b47b07..bc65185 100644 (file)
@@ -1690,8 +1690,8 @@ static int mcp251xfd_register_chip_detect(struct mcp251xfd_priv *priv)
        u32 osc;
        int err;
 
-       /* The OSC_LPMEN is only supported on MCP2518FD, so use it to
-        * autodetect the model.
+       /* The OSC_LPMEN is only supported on MCP2518FD and MCP251863,
+        * so use it to autodetect the model.
         */
        err = regmap_update_bits(priv->map_reg, MCP251XFD_REG_OSC,
                                 MCP251XFD_REG_OSC_LPMEN,
@@ -1703,10 +1703,18 @@ static int mcp251xfd_register_chip_detect(struct mcp251xfd_priv *priv)
        if (err)
                return err;
 
-       if (osc & MCP251XFD_REG_OSC_LPMEN)
-               devtype_data = &mcp251xfd_devtype_data_mcp2518fd;
-       else
+       if (osc & MCP251XFD_REG_OSC_LPMEN) {
+               /* We cannot distinguish between MCP2518FD and
+                * MCP251863. If firmware specifies MCP251863, keep
+                * it, otherwise set to MCP2518FD.
+                */
+               if (mcp251xfd_is_251863(priv))
+                       devtype_data = &mcp251xfd_devtype_data_mcp251863;
+               else
+                       devtype_data = &mcp251xfd_devtype_data_mcp2518fd;
+       } else {
                devtype_data = &mcp251xfd_devtype_data_mcp2517fd;
+       }
 
        if (!mcp251xfd_is_251XFD(priv) &&
            priv->devtype_data.model != devtype_data->model) {
index 9ca8c8d..92a500e 100644 (file)
@@ -1038,18 +1038,21 @@ int ksz_switch_register(struct ksz_device *dev,
                ports = of_get_child_by_name(dev->dev->of_node, "ethernet-ports");
                if (!ports)
                        ports = of_get_child_by_name(dev->dev->of_node, "ports");
-               if (ports)
+               if (ports) {
                        for_each_available_child_of_node(ports, port) {
                                if (of_property_read_u32(port, "reg",
                                                         &port_num))
                                        continue;
                                if (!(dev->port_mask & BIT(port_num))) {
                                        of_node_put(port);
+                                       of_node_put(ports);
                                        return -EINVAL;
                                }
                                of_get_phy_mode(port,
                                                &dev->ports[port_num].interface);
                        }
+                       of_node_put(ports);
+               }
                dev->synclko_125 = of_property_read_bool(dev->dev->of_node,
                                                         "microchip,synclko-125");
                dev->synclko_disable = of_property_read_bool(dev->dev->of_node,
index 72b6fc1..698c7d1 100644 (file)
@@ -3382,12 +3382,28 @@ static const struct of_device_id sja1105_dt_ids[] = {
 };
 MODULE_DEVICE_TABLE(of, sja1105_dt_ids);
 
+static const struct spi_device_id sja1105_spi_ids[] = {
+       { "sja1105e" },
+       { "sja1105t" },
+       { "sja1105p" },
+       { "sja1105q" },
+       { "sja1105r" },
+       { "sja1105s" },
+       { "sja1110a" },
+       { "sja1110b" },
+       { "sja1110c" },
+       { "sja1110d" },
+       { },
+};
+MODULE_DEVICE_TABLE(spi, sja1105_spi_ids);
+
 static struct spi_driver sja1105_driver = {
        .driver = {
                .name  = "sja1105",
                .owner = THIS_MODULE,
                .of_match_table = of_match_ptr(sja1105_dt_ids),
        },
+       .id_table = sja1105_spi_ids,
        .probe  = sja1105_probe,
        .remove = sja1105_remove,
        .shutdown = sja1105_shutdown,
index 3110895..97a92e6 100644 (file)
@@ -205,10 +205,20 @@ static const struct of_device_id vsc73xx_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, vsc73xx_of_match);
 
+static const struct spi_device_id vsc73xx_spi_ids[] = {
+       { "vsc7385" },
+       { "vsc7388" },
+       { "vsc7395" },
+       { "vsc7398" },
+       { },
+};
+MODULE_DEVICE_TABLE(spi, vsc73xx_spi_ids);
+
 static struct spi_driver vsc73xx_spi_driver = {
        .probe = vsc73xx_spi_probe,
        .remove = vsc73xx_spi_remove,
        .shutdown = vsc73xx_spi_shutdown,
+       .id_table = vsc73xx_spi_ids,
        .driver = {
                .name = "vsc73xx-spi",
                .of_match_table = vsc73xx_of_match,
index 7c760aa..ddfe920 100644 (file)
@@ -1236,8 +1236,8 @@ static struct sock *chtls_recv_sock(struct sock *lsk,
        csk->sndbuf = newsk->sk_sndbuf;
        csk->smac_idx = ((struct port_info *)netdev_priv(ndev))->smt_idx;
        RCV_WSCALE(tp) = select_rcv_wscale(tcp_full_space(newsk),
-                                          sock_net(newsk)->
-                                               ipv4.sysctl_tcp_window_scaling,
+                                          READ_ONCE(sock_net(newsk)->
+                                                    ipv4.sysctl_tcp_window_scaling),
                                           tp->window_clamp);
        neigh_release(n);
        inet_inherit_port(&tcp_hashinfo, lsk, newsk);
@@ -1384,7 +1384,7 @@ static void chtls_pass_accept_request(struct sock *sk,
 #endif
        }
        if (req->tcpopt.wsf <= 14 &&
-           sock_net(sk)->ipv4.sysctl_tcp_window_scaling) {
+           READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) {
                inet_rsk(oreq)->wscale_ok = 1;
                inet_rsk(oreq)->snd_wscale = req->tcpopt.wsf;
        }
index 528eb0f..b4f5e57 100644 (file)
@@ -2287,7 +2287,7 @@ err:
 
 /* Uses sync mcc */
 int be_cmd_read_port_transceiver_data(struct be_adapter *adapter,
-                                     u8 page_num, u8 *data)
+                                     u8 page_num, u32 off, u32 len, u8 *data)
 {
        struct be_dma_mem cmd;
        struct be_mcc_wrb *wrb;
@@ -2321,10 +2321,10 @@ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter,
        req->port = cpu_to_le32(adapter->hba_port_num);
        req->page_num = cpu_to_le32(page_num);
        status = be_mcc_notify_wait(adapter);
-       if (!status) {
+       if (!status && len > 0) {
                struct be_cmd_resp_port_type *resp = cmd.va;
 
-               memcpy(data, resp->page_data, PAGE_DATA_LEN);
+               memcpy(data, resp->page_data + off, len);
        }
 err:
        mutex_unlock(&adapter->mcc_lock);
@@ -2415,7 +2415,7 @@ int be_cmd_query_cable_type(struct be_adapter *adapter)
        int status;
 
        status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0,
-                                                  page_data);
+                                                  0, PAGE_DATA_LEN, page_data);
        if (!status) {
                switch (adapter->phy.interface_type) {
                case PHY_TYPE_QSFP:
@@ -2440,7 +2440,7 @@ int be_cmd_query_sfp_info(struct be_adapter *adapter)
        int status;
 
        status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0,
-                                                  page_data);
+                                                  0, PAGE_DATA_LEN, page_data);
        if (!status) {
                strlcpy(adapter->phy.vendor_name, page_data +
                        SFP_VENDOR_NAME_OFFSET, SFP_VENDOR_NAME_LEN - 1);
index db1f3b9..e2085c6 100644 (file)
@@ -2427,7 +2427,7 @@ int be_cmd_set_beacon_state(struct be_adapter *adapter, u8 port_num, u8 beacon,
 int be_cmd_get_beacon_state(struct be_adapter *adapter, u8 port_num,
                            u32 *state);
 int be_cmd_read_port_transceiver_data(struct be_adapter *adapter,
-                                     u8 page_num, u8 *data);
+                                     u8 page_num, u32 off, u32 len, u8 *data);
 int be_cmd_query_cable_type(struct be_adapter *adapter);
 int be_cmd_query_sfp_info(struct be_adapter *adapter);
 int lancer_cmd_read_object(struct be_adapter *adapter, struct be_dma_mem *cmd,
index dfa7843..bd0df18 100644 (file)
@@ -1344,7 +1344,7 @@ static int be_get_module_info(struct net_device *netdev,
                return -EOPNOTSUPP;
 
        status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0,
-                                                  page_data);
+                                                  0, PAGE_DATA_LEN, page_data);
        if (!status) {
                if (!page_data[SFP_PLUS_SFF_8472_COMP]) {
                        modinfo->type = ETH_MODULE_SFF_8079;
@@ -1362,25 +1362,32 @@ static int be_get_module_eeprom(struct net_device *netdev,
 {
        struct be_adapter *adapter = netdev_priv(netdev);
        int status;
+       u32 begin, end;
 
        if (!check_privilege(adapter, MAX_PRIVILEGES))
                return -EOPNOTSUPP;
 
-       status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0,
-                                                  data);
-       if (status)
-               goto err;
+       begin = eeprom->offset;
+       end = eeprom->offset + eeprom->len;
+
+       if (begin < PAGE_DATA_LEN) {
+               status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, begin,
+                                                          min_t(u32, end, PAGE_DATA_LEN) - begin,
+                                                          data);
+               if (status)
+                       goto err;
+
+               data += PAGE_DATA_LEN - begin;
+               begin = PAGE_DATA_LEN;
+       }
 
-       if (eeprom->offset + eeprom->len > PAGE_DATA_LEN) {
-               status = be_cmd_read_port_transceiver_data(adapter,
-                                                          TR_PAGE_A2,
-                                                          data +
-                                                          PAGE_DATA_LEN);
+       if (end > PAGE_DATA_LEN) {
+               status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A2,
+                                                          begin - PAGE_DATA_LEN,
+                                                          end - begin, data);
                if (status)
                        goto err;
        }
-       if (eeprom->offset)
-               memcpy(data, data + eeprom->offset, eeprom->len);
 err:
        return be_cmd_status(status);
 }
index 13382df..bcf680e 100644 (file)
@@ -630,7 +630,6 @@ struct e1000_phy_info {
        bool disable_polarity_correction;
        bool is_mdix;
        bool polarity_correction;
-       bool reset_disable;
        bool speed_downgraded;
        bool autoneg_wait_to_complete;
 };
index e6c8e6d..9466f65 100644 (file)
@@ -2050,10 +2050,6 @@ static s32 e1000_check_reset_block_ich8lan(struct e1000_hw *hw)
        bool blocked = false;
        int i = 0;
 
-       /* Check the PHY (LCD) reset flag */
-       if (hw->phy.reset_disable)
-               return true;
-
        while ((blocked = !(er32(FWSM) & E1000_ICH_FWSM_RSPCIPHY)) &&
               (i++ < 30))
                usleep_range(10000, 11000);
index 638a3dd..2504b11 100644 (file)
 #define I217_CGFREG_ENABLE_MTA_RESET   0x0002
 #define I217_MEMPWR                    PHY_REG(772, 26)
 #define I217_MEMPWR_DISABLE_SMB_RELEASE        0x0010
-#define I217_MEMPWR_MOEM               0x1000
 
 /* Receive Address Initial CRC Calculation */
 #define E1000_PCH_RAICC(_n)    (0x05F50 + ((_n) * 4))
index fa06f68..f172994 100644 (file)
@@ -6494,6 +6494,10 @@ static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter)
 
        if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID &&
            hw->mac.type >= e1000_pch_adp) {
+               /* Keep the GPT clock enabled for CSME */
+               mac_data = er32(FEXTNVM);
+               mac_data |= BIT(3);
+               ew32(FEXTNVM, mac_data);
                /* Request ME unconfigure the device from S0ix */
                mac_data = er32(H2ME);
                mac_data &= ~E1000_H2ME_START_DPG;
@@ -6987,21 +6991,8 @@ static __maybe_unused int e1000e_pm_suspend(struct device *dev)
        struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev));
        struct e1000_adapter *adapter = netdev_priv(netdev);
        struct pci_dev *pdev = to_pci_dev(dev);
-       struct e1000_hw *hw = &adapter->hw;
-       u16 phy_data;
        int rc;
 
-       if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID &&
-           hw->mac.type >= e1000_pch_adp) {
-               /* Mask OEM Bits / Gig Disable / Restart AN (772_26[12] = 1) */
-               e1e_rphy(hw, I217_MEMPWR, &phy_data);
-               phy_data |= I217_MEMPWR_MOEM;
-               e1e_wphy(hw, I217_MEMPWR, phy_data);
-
-               /* Disable LCD reset */
-               hw->phy.reset_disable = true;
-       }
-
        e1000e_flush_lpic(pdev);
 
        e1000e_pm_freeze(dev);
@@ -7023,8 +7014,6 @@ static __maybe_unused int e1000e_pm_resume(struct device *dev)
        struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev));
        struct e1000_adapter *adapter = netdev_priv(netdev);
        struct pci_dev *pdev = to_pci_dev(dev);
-       struct e1000_hw *hw = &adapter->hw;
-       u16 phy_data;
        int rc;
 
        /* Introduce S0ix implementation */
@@ -7035,17 +7024,6 @@ static __maybe_unused int e1000e_pm_resume(struct device *dev)
        if (rc)
                return rc;
 
-       if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID &&
-           hw->mac.type >= e1000_pch_adp) {
-               /* Unmask OEM Bits / Gig Disable / Restart AN 772_26[12] = 0 */
-               e1e_rphy(hw, I217_MEMPWR, &phy_data);
-               phy_data &= ~I217_MEMPWR_MOEM;
-               e1e_wphy(hw, I217_MEMPWR, phy_data);
-
-               /* Enable LCD reset */
-               hw->phy.reset_disable = false;
-       }
-
        return e1000e_pm_thaw(dev);
 }
 
index aa786fd..7f1a0d9 100644 (file)
@@ -10650,7 +10650,7 @@ static int i40e_reset(struct i40e_pf *pf)
  **/
 static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
 {
-       int old_recovery_mode_bit = test_bit(__I40E_RECOVERY_MODE, pf->state);
+       const bool is_recovery_mode_reported = i40e_check_recovery_mode(pf);
        struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
        struct i40e_hw *hw = &pf->hw;
        i40e_status ret;
@@ -10658,13 +10658,11 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
        int v;
 
        if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) &&
-           i40e_check_recovery_mode(pf)) {
+           is_recovery_mode_reported)
                i40e_set_ethtool_ops(pf->vsi[pf->lan_vsi]->netdev);
-       }
 
        if (test_bit(__I40E_DOWN, pf->state) &&
-           !test_bit(__I40E_RECOVERY_MODE, pf->state) &&
-           !old_recovery_mode_bit)
+           !test_bit(__I40E_RECOVERY_MODE, pf->state))
                goto clear_recovery;
        dev_dbg(&pf->pdev->dev, "Rebuilding internal switch\n");
 
@@ -10691,13 +10689,12 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
         * accordingly with regard to resources initialization
         * and deinitialization
         */
-       if (test_bit(__I40E_RECOVERY_MODE, pf->state) ||
-           old_recovery_mode_bit) {
+       if (test_bit(__I40E_RECOVERY_MODE, pf->state)) {
                if (i40e_get_capabilities(pf,
                                          i40e_aqc_opc_list_func_capabilities))
                        goto end_unlock;
 
-               if (test_bit(__I40E_RECOVERY_MODE, pf->state)) {
+               if (is_recovery_mode_reported) {
                        /* we're staying in recovery mode so we'll reinitialize
                         * misc vector here
                         */
index 49aed3e..0ea0361 100644 (file)
@@ -64,7 +64,6 @@ struct iavf_vsi {
        u16 id;
        DECLARE_BITMAP(state, __IAVF_VSI_STATE_SIZE__);
        int base_vector;
-       u16 work_limit;
        u16 qs_handle;
        void *priv;     /* client driver data reference. */
 };
@@ -159,8 +158,12 @@ struct iavf_vlan {
 struct iavf_vlan_filter {
        struct list_head list;
        struct iavf_vlan vlan;
-       bool remove;            /* filter needs to be removed */
-       bool add;               /* filter needs to be added */
+       struct {
+               u8 is_new_vlan:1;       /* filter is new, wait for PF answer */
+               u8 remove:1;            /* filter needs to be removed */
+               u8 add:1;               /* filter needs to be added */
+               u8 padding:5;
+       };
 };
 
 #define IAVF_MAX_TRAFFIC_CLASS 4
@@ -461,6 +464,10 @@ static inline const char *iavf_state_str(enum iavf_state_t state)
                return "__IAVF_INIT_VERSION_CHECK";
        case __IAVF_INIT_GET_RESOURCES:
                return "__IAVF_INIT_GET_RESOURCES";
+       case __IAVF_INIT_EXTENDED_CAPS:
+               return "__IAVF_INIT_EXTENDED_CAPS";
+       case __IAVF_INIT_CONFIG_ADAPTER:
+               return "__IAVF_INIT_CONFIG_ADAPTER";
        case __IAVF_INIT_SW:
                return "__IAVF_INIT_SW";
        case __IAVF_INIT_FAILED:
@@ -520,6 +527,7 @@ int iavf_get_vf_config(struct iavf_adapter *adapter);
 int iavf_get_vf_vlan_v2_caps(struct iavf_adapter *adapter);
 int iavf_send_vf_offload_vlan_v2_msg(struct iavf_adapter *adapter);
 void iavf_set_queue_vlan_tag_loc(struct iavf_adapter *adapter);
+u16 iavf_get_num_vlans_added(struct iavf_adapter *adapter);
 void iavf_irq_enable(struct iavf_adapter *adapter, bool flush);
 void iavf_configure_queues(struct iavf_adapter *adapter);
 void iavf_deconfigure_queues(struct iavf_adapter *adapter);
index 3bb5671..e535d4c 100644 (file)
@@ -692,12 +692,8 @@ static int __iavf_get_coalesce(struct net_device *netdev,
                               struct ethtool_coalesce *ec, int queue)
 {
        struct iavf_adapter *adapter = netdev_priv(netdev);
-       struct iavf_vsi *vsi = &adapter->vsi;
        struct iavf_ring *rx_ring, *tx_ring;
 
-       ec->tx_max_coalesced_frames = vsi->work_limit;
-       ec->rx_max_coalesced_frames = vsi->work_limit;
-
        /* Rx and Tx usecs per queue value. If user doesn't specify the
         * queue, return queue 0's value to represent.
         */
@@ -825,12 +821,8 @@ static int __iavf_set_coalesce(struct net_device *netdev,
                               struct ethtool_coalesce *ec, int queue)
 {
        struct iavf_adapter *adapter = netdev_priv(netdev);
-       struct iavf_vsi *vsi = &adapter->vsi;
        int i;
 
-       if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq)
-               vsi->work_limit = ec->tx_max_coalesced_frames_irq;
-
        if (ec->rx_coalesce_usecs == 0) {
                if (ec->use_adaptive_rx_coalesce)
                        netif_info(adapter, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n");
@@ -1969,8 +1961,6 @@ static int iavf_set_rxfh(struct net_device *netdev, const u32 *indir,
 
 static const struct ethtool_ops iavf_ethtool_ops = {
        .supported_coalesce_params = ETHTOOL_COALESCE_USECS |
-                                    ETHTOOL_COALESCE_MAX_FRAMES |
-                                    ETHTOOL_COALESCE_MAX_FRAMES_IRQ |
                                     ETHTOOL_COALESCE_USE_ADAPTIVE,
        .get_drvinfo            = iavf_get_drvinfo,
        .get_link               = ethtool_op_get_link,
index f3ecb3b..2e2c153 100644 (file)
@@ -843,7 +843,7 @@ static void iavf_restore_filters(struct iavf_adapter *adapter)
  * iavf_get_num_vlans_added - get number of VLANs added
  * @adapter: board private structure
  */
-static u16 iavf_get_num_vlans_added(struct iavf_adapter *adapter)
+u16 iavf_get_num_vlans_added(struct iavf_adapter *adapter)
 {
        return bitmap_weight(adapter->vsi.active_cvlans, VLAN_N_VID) +
                bitmap_weight(adapter->vsi.active_svlans, VLAN_N_VID);
@@ -906,11 +906,6 @@ static int iavf_vlan_rx_add_vid(struct net_device *netdev,
        if (!iavf_add_vlan(adapter, IAVF_VLAN(vid, be16_to_cpu(proto))))
                return -ENOMEM;
 
-       if (proto == cpu_to_be16(ETH_P_8021Q))
-               set_bit(vid, adapter->vsi.active_cvlans);
-       else
-               set_bit(vid, adapter->vsi.active_svlans);
-
        return 0;
 }
 
@@ -2245,7 +2240,6 @@ int iavf_parse_vf_resource_msg(struct iavf_adapter *adapter)
 
        adapter->vsi.back = adapter;
        adapter->vsi.base_vector = 1;
-       adapter->vsi.work_limit = IAVF_DEFAULT_IRQ_WORK;
        vsi->netdev = adapter->netdev;
        vsi->qs_handle = adapter->vsi_res->qset_handle;
        if (adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF) {
@@ -2956,6 +2950,9 @@ continue_reset:
        adapter->aq_required |= IAVF_FLAG_AQ_ADD_CLOUD_FILTER;
        iavf_misc_irq_enable(adapter);
 
+       bitmap_clear(adapter->vsi.active_cvlans, 0, VLAN_N_VID);
+       bitmap_clear(adapter->vsi.active_svlans, 0, VLAN_N_VID);
+
        mod_delayed_work(iavf_wq, &adapter->watchdog_task, 2);
 
        /* We were running when the reset started, so we need to restore some
index 978f651..06d1879 100644 (file)
@@ -194,7 +194,7 @@ static bool iavf_clean_tx_irq(struct iavf_vsi *vsi,
        struct iavf_tx_buffer *tx_buf;
        struct iavf_tx_desc *tx_desc;
        unsigned int total_bytes = 0, total_packets = 0;
-       unsigned int budget = vsi->work_limit;
+       unsigned int budget = IAVF_DEFAULT_IRQ_WORK;
 
        tx_buf = &tx_ring->tx_bi[i];
        tx_desc = IAVF_TX_DESC(tx_ring, i);
@@ -1285,11 +1285,10 @@ static struct iavf_rx_buffer *iavf_get_rx_buffer(struct iavf_ring *rx_ring,
 {
        struct iavf_rx_buffer *rx_buffer;
 
-       if (!size)
-               return NULL;
-
        rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean];
        prefetchw(rx_buffer->page);
+       if (!size)
+               return rx_buffer;
 
        /* we are reusing so sync this buffer for CPU use */
        dma_sync_single_range_for_cpu(rx_ring->dev,
index 782450d..1603e99 100644 (file)
@@ -626,6 +626,33 @@ static void iavf_mac_add_reject(struct iavf_adapter *adapter)
        spin_unlock_bh(&adapter->mac_vlan_list_lock);
 }
 
+/**
+ * iavf_vlan_add_reject
+ * @adapter: adapter structure
+ *
+ * Remove VLAN filters from list based on PF response.
+ **/
+static void iavf_vlan_add_reject(struct iavf_adapter *adapter)
+{
+       struct iavf_vlan_filter *f, *ftmp;
+
+       spin_lock_bh(&adapter->mac_vlan_list_lock);
+       list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) {
+               if (f->is_new_vlan) {
+                       if (f->vlan.tpid == ETH_P_8021Q)
+                               clear_bit(f->vlan.vid,
+                                         adapter->vsi.active_cvlans);
+                       else
+                               clear_bit(f->vlan.vid,
+                                         adapter->vsi.active_svlans);
+
+                       list_del(&f->list);
+                       kfree(f);
+               }
+       }
+       spin_unlock_bh(&adapter->mac_vlan_list_lock);
+}
+
 /**
  * iavf_add_vlans
  * @adapter: adapter structure
@@ -683,6 +710,7 @@ void iavf_add_vlans(struct iavf_adapter *adapter)
                                vvfl->vlan_id[i] = f->vlan.vid;
                                i++;
                                f->add = false;
+                               f->is_new_vlan = true;
                                if (i == count)
                                        break;
                        }
@@ -695,10 +723,18 @@ void iavf_add_vlans(struct iavf_adapter *adapter)
                iavf_send_pf_msg(adapter, VIRTCHNL_OP_ADD_VLAN, (u8 *)vvfl, len);
                kfree(vvfl);
        } else {
+               u16 max_vlans = adapter->vlan_v2_caps.filtering.max_filters;
+               u16 current_vlans = iavf_get_num_vlans_added(adapter);
                struct virtchnl_vlan_filter_list_v2 *vvfl_v2;
 
                adapter->current_op = VIRTCHNL_OP_ADD_VLAN_V2;
 
+               if ((count + current_vlans) > max_vlans &&
+                   current_vlans < max_vlans) {
+                       count = max_vlans - iavf_get_num_vlans_added(adapter);
+                       more = true;
+               }
+
                len = sizeof(*vvfl_v2) + ((count - 1) *
                                          sizeof(struct virtchnl_vlan_filter));
                if (len > IAVF_MAX_AQ_BUF_SIZE) {
@@ -725,6 +761,9 @@ void iavf_add_vlans(struct iavf_adapter *adapter)
                                        &adapter->vlan_v2_caps.filtering.filtering_support;
                                struct virtchnl_vlan *vlan;
 
+                               if (i == count)
+                                       break;
+
                                /* give priority over outer if it's enabled */
                                if (filtering_support->outer)
                                        vlan = &vvfl_v2->filters[i].outer;
@@ -736,8 +775,7 @@ void iavf_add_vlans(struct iavf_adapter *adapter)
 
                                i++;
                                f->add = false;
-                               if (i == count)
-                                       break;
+                               f->is_new_vlan = true;
                        }
                }
 
@@ -2080,6 +2118,11 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
                         */
                        iavf_netdev_features_vlan_strip_set(netdev, true);
                        break;
+               case VIRTCHNL_OP_ADD_VLAN_V2:
+                       iavf_vlan_add_reject(adapter);
+                       dev_warn(&adapter->pdev->dev, "Failed to add VLAN filter, error %s\n",
+                                iavf_stat_str(&adapter->hw, v_retval));
+                       break;
                default:
                        dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n",
                                v_retval, iavf_stat_str(&adapter->hw, v_retval),
@@ -2332,6 +2375,24 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
                spin_unlock_bh(&adapter->adv_rss_lock);
                }
                break;
+       case VIRTCHNL_OP_ADD_VLAN_V2: {
+               struct iavf_vlan_filter *f;
+
+               spin_lock_bh(&adapter->mac_vlan_list_lock);
+               list_for_each_entry(f, &adapter->vlan_filter_list, list) {
+                       if (f->is_new_vlan) {
+                               f->is_new_vlan = false;
+                               if (f->vlan.tpid == ETH_P_8021Q)
+                                       set_bit(f->vlan.vid,
+                                               adapter->vsi.active_cvlans);
+                               else
+                                       set_bit(f->vlan.vid,
+                                               adapter->vsi.active_svlans);
+                       }
+               }
+               spin_unlock_bh(&adapter->mac_vlan_list_lock);
+               }
+               break;
        case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING:
                /* PF enabled vlan strip on this VF.
                 * Update netdev->features if needed to be in sync with ethtool.
index ae17af4..a5ebee7 100644 (file)
@@ -6171,6 +6171,9 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg)
        u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr);
        u32 value = 0;
 
+       if (IGC_REMOVED(hw_addr))
+               return ~value;
+
        value = readl(&hw_addr[reg]);
 
        /* reads should not return all F's */
index e197a33..026c3b6 100644 (file)
@@ -306,7 +306,8 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg);
 #define wr32(reg, val) \
 do { \
        u8 __iomem *hw_addr = READ_ONCE((hw)->hw_addr); \
-       writel((val), &hw_addr[(reg)]); \
+       if (!IGC_REMOVED(hw_addr)) \
+               writel((val), &hw_addr[(reg)]); \
 } while (0)
 
 #define rd32(reg) (igc_rd32(hw, reg))
@@ -318,4 +319,6 @@ do { \
 
 #define array_rd32(reg, offset) (igc_rd32(hw, (reg) + ((offset) << 2)))
 
+#define IGC_REMOVED(h) unlikely(!(h))
+
 #endif
index 921a4d9..8813b4d 100644 (file)
@@ -779,6 +779,7 @@ struct ixgbe_adapter {
 #ifdef CONFIG_IXGBE_IPSEC
        struct ixgbe_ipsec *ipsec;
 #endif /* CONFIG_IXGBE_IPSEC */
+       spinlock_t vfs_lock;
 };
 
 static inline int ixgbe_determine_xdp_q_idx(int cpu)
index 77c2e70..55f91c9 100644 (file)
@@ -6403,6 +6403,9 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter,
        /* n-tuple support exists, always init our spinlock */
        spin_lock_init(&adapter->fdir_perfect_lock);
 
+       /* init spinlock to avoid concurrency of VF resources */
+       spin_lock_init(&adapter->vfs_lock);
+
 #ifdef CONFIG_IXGBE_DCB
        ixgbe_init_dcb(adapter);
 #endif
index d4e63f0..a1e69c7 100644 (file)
@@ -205,10 +205,13 @@ void ixgbe_enable_sriov(struct ixgbe_adapter *adapter, unsigned int max_vfs)
 int ixgbe_disable_sriov(struct ixgbe_adapter *adapter)
 {
        unsigned int num_vfs = adapter->num_vfs, vf;
+       unsigned long flags;
        int rss;
 
+       spin_lock_irqsave(&adapter->vfs_lock, flags);
        /* set num VFs to 0 to prevent access to vfinfo */
        adapter->num_vfs = 0;
+       spin_unlock_irqrestore(&adapter->vfs_lock, flags);
 
        /* put the reference to all of the vf devices */
        for (vf = 0; vf < num_vfs; ++vf) {
@@ -1355,8 +1358,10 @@ static void ixgbe_rcv_ack_from_vf(struct ixgbe_adapter *adapter, u32 vf)
 void ixgbe_msg_task(struct ixgbe_adapter *adapter)
 {
        struct ixgbe_hw *hw = &adapter->hw;
+       unsigned long flags;
        u32 vf;
 
+       spin_lock_irqsave(&adapter->vfs_lock, flags);
        for (vf = 0; vf < adapter->num_vfs; vf++) {
                /* process any reset requests */
                if (!ixgbe_check_for_rst(hw, vf))
@@ -1370,6 +1375,7 @@ void ixgbe_msg_task(struct ixgbe_adapter *adapter)
                if (!ixgbe_check_for_ack(hw, vf))
                        ixgbe_rcv_ack_from_vf(adapter, vf);
        }
+       spin_unlock_irqrestore(&adapter->vfs_lock, flags);
 }
 
 static inline void ixgbe_ping_vf(struct ixgbe_adapter *adapter, int vf)
index d43e503..4d93ad6 100644 (file)
@@ -167,12 +167,12 @@ static int prestera_flower_parse_meta(struct prestera_acl_rule *rule,
        }
        port = netdev_priv(ingress_dev);
 
-       mask = htons(0x1FFF);
-       key = htons(port->hw_id);
+       mask = htons(0x1FFF << 3);
+       key = htons(port->hw_id << 3);
        rule_match_set(r_match->key, SYS_PORT, key);
        rule_match_set(r_match->mask, SYS_PORT, mask);
 
-       mask = htons(0x1FF);
+       mask = htons(0x3FF);
        key = htons(port->dev_id);
        rule_match_set(r_match->key, SYS_DEV, key);
        rule_match_set(r_match->mask, SYS_DEV, mask);
index 90e7dfd..5d457bc 100644 (file)
@@ -93,6 +93,9 @@ mtk_flow_get_wdma_info(struct net_device *dev, const u8 *addr, struct mtk_wdma_i
        };
        struct net_device_path path = {};
 
+       if (!ctx.dev)
+               return -ENODEV;
+
        memcpy(ctx.daddr, addr, sizeof(ctx.daddr));
 
        if (!IS_ENABLED(CONFIG_NET_MEDIATEK_SOC_WED))
index 8f0cd31..29be2fc 100644 (file)
@@ -651,7 +651,7 @@ mtk_wed_tx_ring_setup(struct mtk_wed_device *dev, int idx, void __iomem *regs)
         * WDMA RX.
         */
 
-       BUG_ON(idx > ARRAY_SIZE(dev->tx_ring));
+       BUG_ON(idx >= ARRAY_SIZE(dev->tx_ring));
 
        if (mtk_wed_ring_alloc(dev, ring, MTK_WED_TX_RING_SIZE))
                return -ENOMEM;
index 0d8a006..ce33dbd 100644 (file)
@@ -5384,7 +5384,7 @@ static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
 {
        const struct fib_nh *nh = fib_info_nh(fi, 0);
 
-       return nh->fib_nh_scope == RT_SCOPE_LINK ||
+       return nh->fib_nh_gw_family ||
               mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL);
 }
 
@@ -10324,7 +10324,7 @@ static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp,
        unsigned long *fields = config->fields;
        u32 hash_fields;
 
-       switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
+       switch (READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_policy)) {
        case 0:
                mlxsw_sp_mp4_hash_outer_addr(config);
                break;
@@ -10342,7 +10342,7 @@ static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp,
                mlxsw_sp_mp_hash_inner_l3(config);
                break;
        case 3:
-               hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
+               hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
                /* Outer */
                MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_NOT_TCP_NOT_UDP);
                MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_TCP_UDP);
@@ -10523,13 +10523,14 @@ static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
 {
        struct net *net = mlxsw_sp_net(mlxsw_sp);
-       bool usp = net->ipv4.sysctl_ip_fwd_update_priority;
        char rgcr_pl[MLXSW_REG_RGCR_LEN];
        u64 max_rifs;
+       bool usp;
 
        if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
                return -EIO;
        max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
+       usp = READ_ONCE(net->ipv4.sysctl_ip_fwd_update_priority);
 
        mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
        mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
index 005e56e..5893770 100644 (file)
@@ -75,6 +75,9 @@ static int __lan966x_mac_learn(struct lan966x *lan966x, int pgid,
                               unsigned int vid,
                               enum macaccess_entry_type type)
 {
+       int ret;
+
+       spin_lock(&lan966x->mac_lock);
        lan966x_mac_select(lan966x, mac, vid);
 
        /* Issue a write command */
@@ -86,7 +89,10 @@ static int __lan966x_mac_learn(struct lan966x *lan966x, int pgid,
               ANA_MACACCESS_MAC_TABLE_CMD_SET(MACACCESS_CMD_LEARN),
               lan966x, ANA_MACACCESS);
 
-       return lan966x_mac_wait_for_completion(lan966x);
+       ret = lan966x_mac_wait_for_completion(lan966x);
+       spin_unlock(&lan966x->mac_lock);
+
+       return ret;
 }
 
 /* The mask of the front ports is encoded inside the mac parameter via a call
@@ -113,11 +119,13 @@ int lan966x_mac_learn(struct lan966x *lan966x, int port,
        return __lan966x_mac_learn(lan966x, port, false, mac, vid, type);
 }
 
-int lan966x_mac_forget(struct lan966x *lan966x,
-                      const unsigned char mac[ETH_ALEN],
-                      unsigned int vid,
-                      enum macaccess_entry_type type)
+static int lan966x_mac_forget_locked(struct lan966x *lan966x,
+                                    const unsigned char mac[ETH_ALEN],
+                                    unsigned int vid,
+                                    enum macaccess_entry_type type)
 {
+       lockdep_assert_held(&lan966x->mac_lock);
+
        lan966x_mac_select(lan966x, mac, vid);
 
        /* Issue a forget command */
@@ -128,6 +136,20 @@ int lan966x_mac_forget(struct lan966x *lan966x,
        return lan966x_mac_wait_for_completion(lan966x);
 }
 
+int lan966x_mac_forget(struct lan966x *lan966x,
+                      const unsigned char mac[ETH_ALEN],
+                      unsigned int vid,
+                      enum macaccess_entry_type type)
+{
+       int ret;
+
+       spin_lock(&lan966x->mac_lock);
+       ret = lan966x_mac_forget_locked(lan966x, mac, vid, type);
+       spin_unlock(&lan966x->mac_lock);
+
+       return ret;
+}
+
 int lan966x_mac_cpu_learn(struct lan966x *lan966x, const char *addr, u16 vid)
 {
        return lan966x_mac_learn(lan966x, PGID_CPU, addr, vid, ENTRYTYPE_LOCKED);
@@ -161,7 +183,7 @@ static struct lan966x_mac_entry *lan966x_mac_alloc_entry(const unsigned char *ma
 {
        struct lan966x_mac_entry *mac_entry;
 
-       mac_entry = kzalloc(sizeof(*mac_entry), GFP_KERNEL);
+       mac_entry = kzalloc(sizeof(*mac_entry), GFP_ATOMIC);
        if (!mac_entry)
                return NULL;
 
@@ -179,7 +201,6 @@ static struct lan966x_mac_entry *lan966x_mac_find_entry(struct lan966x *lan966x,
        struct lan966x_mac_entry *res = NULL;
        struct lan966x_mac_entry *mac_entry;
 
-       spin_lock(&lan966x->mac_lock);
        list_for_each_entry(mac_entry, &lan966x->mac_entries, list) {
                if (mac_entry->vid == vid &&
                    ether_addr_equal(mac, mac_entry->mac) &&
@@ -188,7 +209,6 @@ static struct lan966x_mac_entry *lan966x_mac_find_entry(struct lan966x *lan966x,
                        break;
                }
        }
-       spin_unlock(&lan966x->mac_lock);
 
        return res;
 }
@@ -231,8 +251,11 @@ int lan966x_mac_add_entry(struct lan966x *lan966x, struct lan966x_port *port,
 {
        struct lan966x_mac_entry *mac_entry;
 
-       if (lan966x_mac_lookup(lan966x, addr, vid, ENTRYTYPE_NORMAL))
+       spin_lock(&lan966x->mac_lock);
+       if (lan966x_mac_lookup(lan966x, addr, vid, ENTRYTYPE_NORMAL)) {
+               spin_unlock(&lan966x->mac_lock);
                return 0;
+       }
 
        /* In case the entry already exists, don't add it again to SW,
         * just update HW, but we need to look in the actual HW because
@@ -241,21 +264,25 @@ int lan966x_mac_add_entry(struct lan966x *lan966x, struct lan966x_port *port,
         * add the entry but without the extern_learn flag.
         */
        mac_entry = lan966x_mac_find_entry(lan966x, addr, vid, port->chip_port);
-       if (mac_entry)
-               return lan966x_mac_learn(lan966x, port->chip_port,
-                                        addr, vid, ENTRYTYPE_LOCKED);
+       if (mac_entry) {
+               spin_unlock(&lan966x->mac_lock);
+               goto mac_learn;
+       }
 
        mac_entry = lan966x_mac_alloc_entry(addr, vid, port->chip_port);
-       if (!mac_entry)
+       if (!mac_entry) {
+               spin_unlock(&lan966x->mac_lock);
                return -ENOMEM;
+       }
 
-       spin_lock(&lan966x->mac_lock);
        list_add_tail(&mac_entry->list, &lan966x->mac_entries);
        spin_unlock(&lan966x->mac_lock);
 
-       lan966x_mac_learn(lan966x, port->chip_port, addr, vid, ENTRYTYPE_LOCKED);
        lan966x_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED, addr, vid, port->dev);
 
+mac_learn:
+       lan966x_mac_learn(lan966x, port->chip_port, addr, vid, ENTRYTYPE_LOCKED);
+
        return 0;
 }
 
@@ -269,8 +296,9 @@ int lan966x_mac_del_entry(struct lan966x *lan966x, const unsigned char *addr,
                                 list) {
                if (mac_entry->vid == vid &&
                    ether_addr_equal(addr, mac_entry->mac)) {
-                       lan966x_mac_forget(lan966x, mac_entry->mac, mac_entry->vid,
-                                          ENTRYTYPE_LOCKED);
+                       lan966x_mac_forget_locked(lan966x, mac_entry->mac,
+                                                 mac_entry->vid,
+                                                 ENTRYTYPE_LOCKED);
 
                        list_del(&mac_entry->list);
                        kfree(mac_entry);
@@ -288,8 +316,8 @@ void lan966x_mac_purge_entries(struct lan966x *lan966x)
        spin_lock(&lan966x->mac_lock);
        list_for_each_entry_safe(mac_entry, tmp, &lan966x->mac_entries,
                                 list) {
-               lan966x_mac_forget(lan966x, mac_entry->mac, mac_entry->vid,
-                                  ENTRYTYPE_LOCKED);
+               lan966x_mac_forget_locked(lan966x, mac_entry->mac,
+                                         mac_entry->vid, ENTRYTYPE_LOCKED);
 
                list_del(&mac_entry->list);
                kfree(mac_entry);
@@ -325,10 +353,13 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row,
 {
        struct lan966x_mac_entry *mac_entry, *tmp;
        unsigned char mac[ETH_ALEN] __aligned(2);
+       struct list_head mac_deleted_entries;
        u32 dest_idx;
        u32 column;
        u16 vid;
 
+       INIT_LIST_HEAD(&mac_deleted_entries);
+
        spin_lock(&lan966x->mac_lock);
        list_for_each_entry_safe(mac_entry, tmp, &lan966x->mac_entries, list) {
                bool found = false;
@@ -362,20 +393,26 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row,
                }
 
                if (!found) {
-                       /* Notify the bridge that the entry doesn't exist
-                        * anymore in the HW and remove the entry from the SW
-                        * list
-                        */
-                       lan966x_mac_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE,
-                                             mac_entry->mac, mac_entry->vid,
-                                             lan966x->ports[mac_entry->port_index]->dev);
-
                        list_del(&mac_entry->list);
-                       kfree(mac_entry);
+                       /* Move the entry from SW list to a tmp list such that
+                        * it would be deleted later
+                        */
+                       list_add_tail(&mac_entry->list, &mac_deleted_entries);
                }
        }
        spin_unlock(&lan966x->mac_lock);
 
+       list_for_each_entry_safe(mac_entry, tmp, &mac_deleted_entries, list) {
+               /* Notify the bridge that the entry doesn't exist
+                * anymore in the HW
+                */
+               lan966x_mac_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE,
+                                     mac_entry->mac, mac_entry->vid,
+                                     lan966x->ports[mac_entry->port_index]->dev);
+               list_del(&mac_entry->list);
+               kfree(mac_entry);
+       }
+
        /* Now go to the list of columns and see if any entry was not in the SW
         * list, then that means that the entry is new so it needs to notify the
         * bridge.
@@ -396,13 +433,20 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row,
                if (WARN_ON(dest_idx >= lan966x->num_phys_ports))
                        continue;
 
+               spin_lock(&lan966x->mac_lock);
+               mac_entry = lan966x_mac_find_entry(lan966x, mac, vid, dest_idx);
+               if (mac_entry) {
+                       spin_unlock(&lan966x->mac_lock);
+                       continue;
+               }
+
                mac_entry = lan966x_mac_alloc_entry(mac, vid, dest_idx);
-               if (!mac_entry)
+               if (!mac_entry) {
+                       spin_unlock(&lan966x->mac_lock);
                        return;
+               }
 
                mac_entry->row = row;
-
-               spin_lock(&lan966x->mac_lock);
                list_add_tail(&mac_entry->list, &lan966x->mac_entries);
                spin_unlock(&lan966x->mac_lock);
 
@@ -424,6 +468,7 @@ irqreturn_t lan966x_mac_irq_handler(struct lan966x *lan966x)
               lan966x, ANA_MACTINDX);
 
        while (1) {
+               spin_lock(&lan966x->mac_lock);
                lan_rmw(ANA_MACACCESS_MAC_TABLE_CMD_SET(MACACCESS_CMD_SYNC_GET_NEXT),
                        ANA_MACACCESS_MAC_TABLE_CMD,
                        lan966x, ANA_MACACCESS);
@@ -447,12 +492,15 @@ irqreturn_t lan966x_mac_irq_handler(struct lan966x *lan966x)
                        stop = false;
 
                if (column == LAN966X_MAC_COLUMNS - 1 &&
-                   index == 0 && stop)
+                   index == 0 && stop) {
+                       spin_unlock(&lan966x->mac_lock);
                        break;
+               }
 
                entry[column].mach = lan_rd(lan966x, ANA_MACHDATA);
                entry[column].macl = lan_rd(lan966x, ANA_MACLDATA);
                entry[column].maca = lan_rd(lan966x, ANA_MACACCESS);
+               spin_unlock(&lan966x->mac_lock);
 
                /* Once all the columns are read process them */
                if (column == LAN966X_MAC_COLUMNS - 1) {
index 0147de4..ffb6f6d 100644 (file)
@@ -474,7 +474,7 @@ nfp_fl_set_tun(struct nfp_app *app, struct nfp_fl_set_tun *set_tun,
                        set_tun->ttl = ip4_dst_hoplimit(&rt->dst);
                        ip_rt_put(rt);
                } else {
-                       set_tun->ttl = net->ipv4.sysctl_ip_default_ttl;
+                       set_tun->ttl = READ_ONCE(net->ipv4.sysctl_ip_default_ttl);
                }
        }
 
index 38fe77d..3fe720c 100644 (file)
@@ -298,6 +298,11 @@ static void get_arttime(struct mii_bus *mii, int intel_adhoc_addr,
        *art_time = ns;
 }
 
+static int stmmac_cross_ts_isr(struct stmmac_priv *priv)
+{
+       return (readl(priv->ioaddr + GMAC_INT_STATUS) & GMAC_INT_TSIE);
+}
+
 static int intel_crosststamp(ktime_t *device,
                             struct system_counterval_t *system,
                             void *ctx)
@@ -313,8 +318,6 @@ static int intel_crosststamp(ktime_t *device,
        u32 num_snapshot;
        u32 gpio_value;
        u32 acr_value;
-       int ret;
-       u32 v;
        int i;
 
        if (!boot_cpu_has(X86_FEATURE_ART))
@@ -328,6 +331,8 @@ static int intel_crosststamp(ktime_t *device,
        if (priv->plat->ext_snapshot_en)
                return -EBUSY;
 
+       priv->plat->int_snapshot_en = 1;
+
        mutex_lock(&priv->aux_ts_lock);
        /* Enable Internal snapshot trigger */
        acr_value = readl(ptpaddr + PTP_ACR);
@@ -347,6 +352,7 @@ static int intel_crosststamp(ktime_t *device,
                break;
        default:
                mutex_unlock(&priv->aux_ts_lock);
+               priv->plat->int_snapshot_en = 0;
                return -EINVAL;
        }
        writel(acr_value, ptpaddr + PTP_ACR);
@@ -368,13 +374,12 @@ static int intel_crosststamp(ktime_t *device,
        gpio_value |= GMAC_GPO1;
        writel(gpio_value, ioaddr + GMAC_GPIO_STATUS);
 
-       /* Poll for time sync operation done */
-       ret = readl_poll_timeout(priv->ioaddr + GMAC_INT_STATUS, v,
-                                (v & GMAC_INT_TSIE), 100, 10000);
-
-       if (ret == -ETIMEDOUT) {
-               pr_err("%s: Wait for time sync operation timeout\n", __func__);
-               return ret;
+       /* Time sync done Indication - Interrupt method */
+       if (!wait_event_interruptible_timeout(priv->tstamp_busy_wait,
+                                             stmmac_cross_ts_isr(priv),
+                                             HZ / 100)) {
+               priv->plat->int_snapshot_en = 0;
+               return -ETIMEDOUT;
        }
 
        num_snapshot = (readl(ioaddr + GMAC_TIMESTAMP_STATUS) &
@@ -392,6 +397,7 @@ static int intel_crosststamp(ktime_t *device,
        }
 
        system->cycles *= intel_priv->crossts_adj;
+       priv->plat->int_snapshot_en = 0;
 
        return 0;
 }
@@ -576,6 +582,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
 
        plat->has_crossts = true;
        plat->crosststamp = intel_crosststamp;
+       plat->int_snapshot_en = 0;
 
        /* Setup MSI vector offset specific to Intel mGbE controller */
        plat->msi_mac_vec = 29;
index 6ff88df..ca8ab29 100644 (file)
@@ -576,32 +576,7 @@ static int mediatek_dwmac_init(struct platform_device *pdev, void *priv)
                }
        }
 
-       ret = clk_bulk_prepare_enable(variant->num_clks, plat->clks);
-       if (ret) {
-               dev_err(plat->dev, "failed to enable clks, err = %d\n", ret);
-               return ret;
-       }
-
-       ret = clk_prepare_enable(plat->rmii_internal_clk);
-       if (ret) {
-               dev_err(plat->dev, "failed to enable rmii internal clk, err = %d\n", ret);
-               goto err_clk;
-       }
-
        return 0;
-
-err_clk:
-       clk_bulk_disable_unprepare(variant->num_clks, plat->clks);
-       return ret;
-}
-
-static void mediatek_dwmac_exit(struct platform_device *pdev, void *priv)
-{
-       struct mediatek_dwmac_plat_data *plat = priv;
-       const struct mediatek_dwmac_variant *variant = plat->variant;
-
-       clk_disable_unprepare(plat->rmii_internal_clk);
-       clk_bulk_disable_unprepare(variant->num_clks, plat->clks);
 }
 
 static int mediatek_dwmac_clks_config(void *priv, bool enabled)
@@ -643,7 +618,6 @@ static int mediatek_dwmac_common_data(struct platform_device *pdev,
        plat->addr64 = priv_plat->variant->dma_bit_mask;
        plat->bsp_priv = priv_plat;
        plat->init = mediatek_dwmac_init;
-       plat->exit = mediatek_dwmac_exit;
        plat->clks_config = mediatek_dwmac_clks_config;
        if (priv_plat->variant->dwmac_fix_mac_speed)
                plat->fix_mac_speed = priv_plat->variant->dwmac_fix_mac_speed;
@@ -712,13 +686,32 @@ static int mediatek_dwmac_probe(struct platform_device *pdev)
        mediatek_dwmac_common_data(pdev, plat_dat, priv_plat);
        mediatek_dwmac_init(pdev, priv_plat);
 
+       ret = mediatek_dwmac_clks_config(priv_plat, true);
+       if (ret)
+               return ret;
+
        ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
        if (ret) {
                stmmac_remove_config_dt(pdev, plat_dat);
-               return ret;
+               goto err_drv_probe;
        }
 
        return 0;
+
+err_drv_probe:
+       mediatek_dwmac_clks_config(priv_plat, false);
+       return ret;
+}
+
+static int mediatek_dwmac_remove(struct platform_device *pdev)
+{
+       struct mediatek_dwmac_plat_data *priv_plat = get_stmmac_bsp_priv(&pdev->dev);
+       int ret;
+
+       ret = stmmac_pltfr_remove(pdev);
+       mediatek_dwmac_clks_config(priv_plat, false);
+
+       return ret;
 }
 
 static const struct of_device_id mediatek_dwmac_match[] = {
@@ -733,7 +726,7 @@ MODULE_DEVICE_TABLE(of, mediatek_dwmac_match);
 
 static struct platform_driver mediatek_dwmac_driver = {
        .probe  = mediatek_dwmac_probe,
-       .remove = stmmac_pltfr_remove,
+       .remove = mediatek_dwmac_remove,
        .driver = {
                .name           = "dwmac-mediatek",
                .pm             = &stmmac_pltfr_pm_ops,
index 462ca7e..71dad40 100644 (file)
 #define        GMAC_PCS_IRQ_DEFAULT    (GMAC_INT_RGSMIIS | GMAC_INT_PCS_LINK | \
                                 GMAC_INT_PCS_ANE)
 
-#define        GMAC_INT_DEFAULT_ENABLE (GMAC_INT_PMT_EN | GMAC_INT_LPI_EN)
+#define        GMAC_INT_DEFAULT_ENABLE (GMAC_INT_PMT_EN | GMAC_INT_LPI_EN | \
+                                GMAC_INT_TSIE)
 
 enum dwmac4_irq_status {
        time_stamp_irq = 0x00001000,
index fd41db6..d8f1fbc 100644 (file)
@@ -23,6 +23,7 @@
 static void dwmac4_core_init(struct mac_device_info *hw,
                             struct net_device *dev)
 {
+       struct stmmac_priv *priv = netdev_priv(dev);
        void __iomem *ioaddr = hw->pcsr;
        u32 value = readl(ioaddr + GMAC_CONFIG);
 
@@ -58,6 +59,9 @@ static void dwmac4_core_init(struct mac_device_info *hw,
                value |= GMAC_INT_FPE_EN;
 
        writel(value, ioaddr + GMAC_INT_EN);
+
+       if (GMAC_INT_DEFAULT_ENABLE & GMAC_INT_TSIE)
+               init_waitqueue_head(&priv->tstamp_busy_wait);
 }
 
 static void dwmac4_rx_queue_enable(struct mac_device_info *hw,
@@ -219,6 +223,9 @@ static void dwmac4_map_mtl_dma(struct mac_device_info *hw, u32 queue, u32 chan)
        if (queue == 0 || queue == 4) {
                value &= ~MTL_RXQ_DMA_Q04MDMACH_MASK;
                value |= MTL_RXQ_DMA_Q04MDMACH(chan);
+       } else if (queue > 4) {
+               value &= ~MTL_RXQ_DMA_QXMDMACH_MASK(queue - 4);
+               value |= MTL_RXQ_DMA_QXMDMACH(chan, queue - 4);
        } else {
                value &= ~MTL_RXQ_DMA_QXMDMACH_MASK(queue);
                value |= MTL_RXQ_DMA_QXMDMACH(chan, queue);
index 57970ae..f9e8396 100644 (file)
@@ -266,6 +266,7 @@ struct stmmac_priv {
        rwlock_t ptp_lock;
        /* Protects auxiliary snapshot registers from concurrent access. */
        struct mutex aux_ts_lock;
+       wait_queue_head_t tstamp_busy_wait;
 
        void __iomem *mmcaddr;
        void __iomem *ptpaddr;
index abfb3cd..9c3055e 100644 (file)
@@ -803,14 +803,6 @@ static int stmmac_ethtool_op_set_eee(struct net_device *dev,
                netdev_warn(priv->dev,
                            "Setting EEE tx-lpi is not supported\n");
 
-       if (priv->hw->xpcs) {
-               ret = xpcs_config_eee(priv->hw->xpcs,
-                                     priv->plat->mult_fact_100ns,
-                                     edata->eee_enabled);
-               if (ret)
-                       return ret;
-       }
-
        if (!edata->eee_enabled)
                stmmac_disable_eee_mode(priv);
 
index 92d3294..764832f 100644 (file)
@@ -179,6 +179,11 @@ static void timestamp_interrupt(struct stmmac_priv *priv)
        u64 ptp_time;
        int i;
 
+       if (priv->plat->int_snapshot_en) {
+               wake_up(&priv->tstamp_busy_wait);
+               return;
+       }
+
        tsync_int = readl(priv->ioaddr + GMAC_INT_STATUS) & GMAC_INT_TSIE;
 
        if (!tsync_int)
index d1a7cf4..c5f3363 100644 (file)
@@ -834,19 +834,10 @@ int stmmac_init_tstamp_counter(struct stmmac_priv *priv, u32 systime_flags)
        struct timespec64 now;
        u32 sec_inc = 0;
        u64 temp = 0;
-       int ret;
 
        if (!(priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp))
                return -EOPNOTSUPP;
 
-       ret = clk_prepare_enable(priv->plat->clk_ptp_ref);
-       if (ret < 0) {
-               netdev_warn(priv->dev,
-                           "failed to enable PTP reference clock: %pe\n",
-                           ERR_PTR(ret));
-               return ret;
-       }
-
        stmmac_config_hw_tstamping(priv, priv->ptpaddr, systime_flags);
        priv->systime_flags = systime_flags;
 
@@ -3270,6 +3261,14 @@ static int stmmac_hw_setup(struct net_device *dev, bool ptp_register)
 
        stmmac_mmc_setup(priv);
 
+       if (ptp_register) {
+               ret = clk_prepare_enable(priv->plat->clk_ptp_ref);
+               if (ret < 0)
+                       netdev_warn(priv->dev,
+                                   "failed to enable PTP reference clock: %pe\n",
+                                   ERR_PTR(ret));
+       }
+
        ret = stmmac_init_ptp(priv);
        if (ret == -EOPNOTSUPP)
                netdev_info(priv->dev, "PTP not supported by HW\n");
@@ -7213,8 +7212,6 @@ int stmmac_dvr_remove(struct device *dev)
        netdev_info(priv->dev, "%s: removing driver", __func__);
 
        pm_runtime_get_sync(dev);
-       pm_runtime_disable(dev);
-       pm_runtime_put_noidle(dev);
 
        stmmac_stop_all_dma(priv);
        stmmac_mac_set(priv, priv->ioaddr, false);
@@ -7241,6 +7238,9 @@ int stmmac_dvr_remove(struct device *dev)
        mutex_destroy(&priv->lock);
        bitmap_free(priv->af_xdp_zc_qps);
 
+       pm_runtime_disable(dev);
+       pm_runtime_put_noidle(dev);
+
        return 0;
 }
 EXPORT_SYMBOL_GPL(stmmac_dvr_remove);
index 11e1055..9f5cac4 100644 (file)
@@ -815,7 +815,13 @@ static int __maybe_unused stmmac_pltfr_noirq_resume(struct device *dev)
                if (ret)
                        return ret;
 
-               stmmac_init_tstamp_counter(priv, priv->systime_flags);
+               ret = clk_prepare_enable(priv->plat->clk_ptp_ref);
+               if (ret < 0) {
+                       netdev_warn(priv->dev,
+                                   "failed to enable PTP reference clock: %pe\n",
+                                   ERR_PTR(ret));
+                       return ret;
+               }
        }
 
        return 0;
index e45fb19..4d11980 100644 (file)
@@ -175,11 +175,10 @@ static int stmmac_enable(struct ptp_clock_info *ptp,
        struct stmmac_priv *priv =
            container_of(ptp, struct stmmac_priv, ptp_clock_ops);
        void __iomem *ptpaddr = priv->ptpaddr;
-       void __iomem *ioaddr = priv->hw->pcsr;
        struct stmmac_pps_cfg *cfg;
-       u32 intr_value, acr_value;
        int ret = -EOPNOTSUPP;
        unsigned long flags;
+       u32 acr_value;
 
        switch (rq->type) {
        case PTP_CLK_REQ_PEROUT:
@@ -213,19 +212,10 @@ static int stmmac_enable(struct ptp_clock_info *ptp,
                        netdev_dbg(priv->dev, "Auxiliary Snapshot %d enabled.\n",
                                   priv->plat->ext_snapshot_num >>
                                   PTP_ACR_ATSEN_SHIFT);
-                       /* Enable Timestamp Interrupt */
-                       intr_value = readl(ioaddr + GMAC_INT_EN);
-                       intr_value |= GMAC_INT_TSIE;
-                       writel(intr_value, ioaddr + GMAC_INT_EN);
-
                } else {
                        netdev_dbg(priv->dev, "Auxiliary Snapshot %d disabled.\n",
                                   priv->plat->ext_snapshot_num >>
                                   PTP_ACR_ATSEN_SHIFT);
-                       /* Disable Timestamp Interrupt */
-                       intr_value = readl(ioaddr + GMAC_INT_EN);
-                       intr_value &= ~GMAC_INT_TSIE;
-                       writel(intr_value, ioaddr + GMAC_INT_EN);
                }
                writel(acr_value, ptpaddr + PTP_ACR);
                mutex_unlock(&priv->aux_ts_lock);
index b082819..0f6efaa 100644 (file)
@@ -32,7 +32,7 @@
 #define NETNEXT_VERSION                "12"
 
 /* Information for net */
-#define NET_VERSION            "12"
+#define NET_VERSION            "13"
 
 #define DRIVER_VERSION         "v1." NETNEXT_VERSION "." NET_VERSION
 #define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_swsd@realtek.com>"
@@ -5917,7 +5917,8 @@ static void r8153_enter_oob(struct r8152 *tp)
 
        wait_oob_link_list_ready(tp);
 
-       ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, mtu_to_size(tp->netdev->mtu));
+       ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, 1522);
+       ocp_write_byte(tp, MCU_TYPE_PLA, PLA_MTPS, MTPS_DEFAULT);
 
        switch (tp->version) {
        case RTL_VER_03:
@@ -5953,6 +5954,10 @@ static void r8153_enter_oob(struct r8152 *tp)
        ocp_data |= NOW_IS_OOB | DIS_MCU_CLROOB;
        ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data);
 
+       ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7);
+       ocp_data |= MCU_BORW_EN;
+       ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data);
+
        rxdy_gated_en(tp, false);
 
        ocp_data = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR);
@@ -6555,6 +6560,9 @@ static void rtl8156_down(struct r8152 *tp)
        rtl_disable(tp);
        rtl_reset_bmu(tp);
 
+       ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, 1522);
+       ocp_write_byte(tp, MCU_TYPE_PLA, PLA_MTPS, MTPS_DEFAULT);
+
        /* Clear teredo wake event. bit[15:8] is the teredo wakeup
         * type. Set it to zero. bits[7:0] are the W1C bits about
         * the events. Set them to all 1 to clear them.
@@ -6565,6 +6573,10 @@ static void rtl8156_down(struct r8152 *tp)
        ocp_data |= NOW_IS_OOB;
        ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data);
 
+       ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7);
+       ocp_data |= MCU_BORW_EN;
+       ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data);
+
        rtl_rx_vlan_en(tp, true);
        rxdy_gated_en(tp, false);
 
index ff3e825..492dce4 100644 (file)
  *  Useful if your architecture doesn't use IPIs for remote TLB invalidates
  *  and therefore doesn't naturally serialize with software page-table walkers.
  *
+ *  MMU_GATHER_NO_FLUSH_CACHE
+ *
+ *  Indicates the architecture has flush_cache_range() but it needs *NOT* be called
+ *  before unmapping a VMA.
+ *
+ *  NOTE: strictly speaking we shouldn't have this knob and instead rely on
+ *       flush_cache_range() being a NOP, except Sparc64 seems to be
+ *       different here.
+ *
+ *  MMU_GATHER_MERGE_VMAS
+ *
+ *  Indicates the architecture wants to merge ranges over VMAs; typical when
+ *  multiple range invalidates are more expensive than a full invalidate.
+ *
  *  MMU_GATHER_NO_RANGE
  *
- *  Use this if your architecture lacks an efficient flush_tlb_range().
+ *  Use this if your architecture lacks an efficient flush_tlb_range(). This
+ *  option implies MMU_GATHER_MERGE_VMAS above.
  *
  *  MMU_GATHER_NO_GATHER
  *
@@ -288,6 +303,7 @@ struct mmu_gather {
         */
        unsigned int            vma_exec : 1;
        unsigned int            vma_huge : 1;
+       unsigned int            vma_pfn  : 1;
 
        unsigned int            batch_count;
 
@@ -334,8 +350,8 @@ static inline void __tlb_reset_range(struct mmu_gather *tlb)
 
 #ifdef CONFIG_MMU_GATHER_NO_RANGE
 
-#if defined(tlb_flush) || defined(tlb_start_vma) || defined(tlb_end_vma)
-#error MMU_GATHER_NO_RANGE relies on default tlb_flush(), tlb_start_vma() and tlb_end_vma()
+#if defined(tlb_flush)
+#error MMU_GATHER_NO_RANGE relies on default tlb_flush()
 #endif
 
 /*
@@ -352,20 +368,9 @@ static inline void tlb_flush(struct mmu_gather *tlb)
                flush_tlb_mm(tlb->mm);
 }
 
-static inline void
-tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { }
-
-#define tlb_end_vma tlb_end_vma
-static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { }
-
 #else /* CONFIG_MMU_GATHER_NO_RANGE */
 
 #ifndef tlb_flush
-
-#if defined(tlb_start_vma) || defined(tlb_end_vma)
-#error Default tlb_flush() relies on default tlb_start_vma() and tlb_end_vma()
-#endif
-
 /*
  * When an architecture does not provide its own tlb_flush() implementation
  * but does have a reasonably efficient flush_vma_range() implementation
@@ -385,6 +390,9 @@ static inline void tlb_flush(struct mmu_gather *tlb)
                flush_tlb_range(&vma, tlb->start, tlb->end);
        }
 }
+#endif
+
+#endif /* CONFIG_MMU_GATHER_NO_RANGE */
 
 static inline void
 tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma)
@@ -402,17 +410,9 @@ tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma)
         */
        tlb->vma_huge = is_vm_hugetlb_page(vma);
        tlb->vma_exec = !!(vma->vm_flags & VM_EXEC);
+       tlb->vma_pfn  = !!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP));
 }
 
-#else
-
-static inline void
-tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { }
-
-#endif
-
-#endif /* CONFIG_MMU_GATHER_NO_RANGE */
-
 static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
 {
        /*
@@ -486,32 +486,36 @@ static inline unsigned long tlb_get_unmap_size(struct mmu_gather *tlb)
  * case where we're doing a full MM flush.  When we're doing a munmap,
  * the vmas are adjusted to only cover the region to be torn down.
  */
-#ifndef tlb_start_vma
 static inline void tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
 {
        if (tlb->fullmm)
                return;
 
        tlb_update_vma_flags(tlb, vma);
+#ifndef CONFIG_MMU_GATHER_NO_FLUSH_CACHE
        flush_cache_range(vma, vma->vm_start, vma->vm_end);
-}
 #endif
+}
 
-#ifndef tlb_end_vma
 static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
 {
        if (tlb->fullmm)
                return;
 
        /*
-        * Do a TLB flush and reset the range at VMA boundaries; this avoids
-        * the ranges growing with the unused space between consecutive VMAs,
-        * but also the mmu_gather::vma_* flags from tlb_start_vma() rely on
-        * this.
+        * VM_PFNMAP is more fragile because the core mm will not track the
+        * page mapcount -- there might not be page-frames for these PFNs after
+        * all. Force flush TLBs for such ranges to avoid munmap() vs
+        * unmap_mapping_range() races.
         */
-       tlb_flush_mmu_tlbonly(tlb);
+       if (tlb->vma_pfn || !IS_ENABLED(CONFIG_MMU_GATHER_MERGE_VMAS)) {
+               /*
+                * Do a TLB flush and reset the range at VMA boundaries; this avoids
+                * the ranges growing with the unused space between consecutive VMAs.
+                */
+               tlb_flush_mmu_tlbonly(tlb);
+       }
 }
-#endif
 
 /*
  * tlb_flush_{pte|pmd|pud|p4d}_range() adjust the tlb->start and tlb->end,
index 2991785..8df475d 100644 (file)
@@ -260,6 +260,7 @@ struct plat_stmmacenet_data {
        bool has_crossts;
        int int_snapshot_num;
        int ext_snapshot_num;
+       bool int_snapshot_en;
        bool ext_snapshot_en;
        bool multi_msi_en;
        int msi_mac_vec;
index 0e40c3d..08fc30c 100644 (file)
@@ -78,6 +78,15 @@ enum amt_status {
 
 #define AMT_STATUS_MAX (__AMT_STATUS_MAX - 1)
 
+/* Gateway events only */
+enum amt_event {
+       AMT_EVENT_NONE,
+       AMT_EVENT_RECEIVE,
+       AMT_EVENT_SEND_DISCOVERY,
+       AMT_EVENT_SEND_REQUEST,
+       __AMT_EVENT_MAX,
+};
+
 struct amt_header {
 #if defined(__LITTLE_ENDIAN_BITFIELD)
        u8 type:4,
@@ -292,6 +301,12 @@ struct amt_group_node {
        struct hlist_head       sources[];
 };
 
+#define AMT_MAX_EVENTS 16
+struct amt_events {
+       enum amt_event event;
+       struct sk_buff *skb;
+};
+
 struct amt_dev {
        struct net_device       *dev;
        struct net_device       *stream_dev;
@@ -308,6 +323,7 @@ struct amt_dev {
        struct delayed_work     req_wq;
        /* Protected by RTNL */
        struct delayed_work     secret_wq;
+       struct work_struct      event_wq;
        /* AMT status */
        enum amt_status         status;
        /* Generated key */
@@ -345,6 +361,10 @@ struct amt_dev {
        /* Used only in gateway mode */
        u64                     mac:48,
                                reserved:16;
+       /* AMT gateway side message handler queue */
+       struct amt_events       events[AMT_MAX_EVENTS];
+       u8                      event_idx;
+       u8                      nr_events;
 };
 
 #define AMT_TOS                        0xc0
index ebfa3df..fd6b510 100644 (file)
@@ -179,7 +179,7 @@ static inline bool inet_sk_bound_dev_eq(struct net *net, int bound_dev_if,
                                        int dif, int sdif)
 {
 #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
-       return inet_bound_dev_eq(!!net->ipv4.sysctl_tcp_l3mdev_accept,
+       return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept),
                                 bound_dev_if, dif, sdif);
 #else
        return inet_bound_dev_eq(true, bound_dev_if, dif, sdif);
index daead5f..6395f6b 100644 (file)
@@ -107,7 +107,8 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
 
 static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb)
 {
-       if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept)
+       if (!sk->sk_mark &&
+           READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept))
                return skb->mark;
 
        return sk->sk_mark;
@@ -120,7 +121,7 @@ static inline int inet_request_bound_dev_if(const struct sock *sk,
 #ifdef CONFIG_NET_L3_MASTER_DEV
        struct net *net = sock_net(sk);
 
-       if (!bound_dev_if && net->ipv4.sysctl_tcp_l3mdev_accept)
+       if (!bound_dev_if && READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept))
                return l3mdev_master_ifindex_by_index(net, skb->skb_iif);
 #endif
 
@@ -132,7 +133,7 @@ static inline int inet_sk_bound_l3mdev(const struct sock *sk)
 #ifdef CONFIG_NET_L3_MASTER_DEV
        struct net *net = sock_net(sk);
 
-       if (!net->ipv4.sysctl_tcp_l3mdev_accept)
+       if (!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept))
                return l3mdev_master_ifindex_by_index(net,
                                                      sk->sk_bound_dev_if);
 #endif
@@ -374,7 +375,7 @@ static inline bool inet_get_convert_csum(struct sock *sk)
 static inline bool inet_can_nonlocal_bind(struct net *net,
                                          struct inet_sock *inet)
 {
-       return net->ipv4.sysctl_ip_nonlocal_bind ||
+       return READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind) ||
                inet->freebind || inet->transparent;
 }
 
index 26fffda..1c979fd 100644 (file)
@@ -357,7 +357,7 @@ static inline bool sysctl_dev_name_is_allowed(const char *name)
 
 static inline bool inet_port_requires_bind_service(struct net *net, unsigned short port)
 {
-       return port < net->ipv4.sysctl_ip_prot_sock;
+       return port < READ_ONCE(net->ipv4.sysctl_ip_prot_sock);
 }
 
 #else
@@ -384,7 +384,7 @@ void ipfrag_init(void);
 void ip_static_sysctl_init(void);
 
 #define IP4_REPLY_MARK(net, mark) \
-       ((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0)
+       (READ_ONCE((net)->ipv4.sysctl_fwmark_reflect) ? (mark) : 0)
 
 static inline bool ip_is_fragment(const struct iphdr *iph)
 {
@@ -446,7 +446,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
        struct net *net = dev_net(dst->dev);
        unsigned int mtu;
 
-       if (net->ipv4.sysctl_ip_fwd_use_pmtu ||
+       if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) ||
            ip_mtu_locked(dst) ||
            !forwarding) {
                mtu = rt->rt_pmtu;
index f51c06a..6aef8cb 100644 (file)
@@ -35,8 +35,6 @@
 
 /* This is used to register protocols. */
 struct net_protocol {
-       int                     (*early_demux)(struct sk_buff *skb);
-       int                     (*early_demux_handler)(struct sk_buff *skb);
        int                     (*handler)(struct sk_buff *skb);
 
        /* This returns an error if we weren't able to handle the error. */
@@ -52,8 +50,6 @@ struct net_protocol {
 
 #if IS_ENABLED(CONFIG_IPV6)
 struct inet6_protocol {
-       void    (*early_demux)(struct sk_buff *skb);
-       void    (*early_demux_handler)(struct sk_buff *skb);
        int     (*handler)(struct sk_buff *skb);
 
        /* This returns an error if we weren't able to handle the error. */
index 991a398..bbcf2ab 100644 (file)
@@ -373,7 +373,7 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst)
        struct net *net = dev_net(dst->dev);
 
        if (hoplimit == 0)
-               hoplimit = net->ipv4.sysctl_ip_default_ttl;
+               hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl);
        return hoplimit;
 }
 
index 1e99f5c..071735e 100644 (file)
@@ -932,7 +932,7 @@ extern const struct inet_connection_sock_af_ops ipv6_specific;
 
 INDIRECT_CALLABLE_DECLARE(void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb));
 INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *skb));
-INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *skb));
+void tcp_v6_early_demux(struct sk_buff *skb);
 
 #endif
 
@@ -1403,8 +1403,8 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk)
        struct tcp_sock *tp = tcp_sk(sk);
        s32 delta;
 
-       if (!sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle || tp->packets_out ||
-           ca_ops->cong_control)
+       if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) ||
+           tp->packets_out || ca_ops->cong_control)
                return;
        delta = tcp_jiffies32 - tp->lsndtime;
        if (delta > inet_csk(sk)->icsk_rto)
@@ -1493,21 +1493,24 @@ static inline int keepalive_intvl_when(const struct tcp_sock *tp)
 {
        struct net *net = sock_net((struct sock *)tp);
 
-       return tp->keepalive_intvl ? : net->ipv4.sysctl_tcp_keepalive_intvl;
+       return tp->keepalive_intvl ? :
+               READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl);
 }
 
 static inline int keepalive_time_when(const struct tcp_sock *tp)
 {
        struct net *net = sock_net((struct sock *)tp);
 
-       return tp->keepalive_time ? : net->ipv4.sysctl_tcp_keepalive_time;
+       return tp->keepalive_time ? :
+               READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time);
 }
 
 static inline int keepalive_probes(const struct tcp_sock *tp)
 {
        struct net *net = sock_net((struct sock *)tp);
 
-       return tp->keepalive_probes ? : net->ipv4.sysctl_tcp_keepalive_probes;
+       return tp->keepalive_probes ? :
+               READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes);
 }
 
 static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp)
@@ -1520,7 +1523,8 @@ static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp)
 
 static inline int tcp_fin_time(const struct sock *sk)
 {
-       int fin_timeout = tcp_sk(sk)->linger2 ? : sock_net(sk)->ipv4.sysctl_tcp_fin_timeout;
+       int fin_timeout = tcp_sk(sk)->linger2 ? :
+               READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fin_timeout);
        const int rto = inet_csk(sk)->icsk_rto;
 
        if (fin_timeout < (rto << 2) - (rto >> 1))
@@ -2023,7 +2027,7 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr);
 static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp)
 {
        struct net *net = sock_net((struct sock *)tp);
-       return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat;
+       return tp->notsent_lowat ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat);
 }
 
 bool tcp_stream_memory_free(const struct sock *sk, int wake);
index b83a003..8dd4aa1 100644 (file)
@@ -167,7 +167,7 @@ static inline void udp_csum_pull_header(struct sk_buff *skb)
 typedef struct sock *(*udp_lookup_t)(const struct sk_buff *skb, __be16 sport,
                                     __be16 dport);
 
-INDIRECT_CALLABLE_DECLARE(void udp_v6_early_demux(struct sk_buff *));
+void udp_v6_early_demux(struct sk_buff *skb);
 INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *));
 
 struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
@@ -238,7 +238,7 @@ static inline bool udp_sk_bound_dev_eq(struct net *net, int bound_dev_if,
                                       int dif, int sdif)
 {
 #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
-       return inet_bound_dev_eq(!!net->ipv4.sysctl_udp_l3mdev_accept,
+       return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_udp_l3mdev_accept),
                                 bound_dev_if, dif, sdif);
 #else
        return inet_bound_dev_eq(true, bound_dev_if, dif, sdif);
index 50ba70f..1c304fe 100644 (file)
@@ -511,10 +511,52 @@ static bool srcu_readers_active(struct srcu_struct *ssp)
        return sum;
 }
 
-#define SRCU_INTERVAL          1       // Base delay if no expedited GPs pending.
-#define SRCU_MAX_INTERVAL      10      // Maximum incremental delay from slow readers.
-#define SRCU_MAX_NODELAY_PHASE 1       // Maximum per-GP-phase consecutive no-delay instances.
-#define SRCU_MAX_NODELAY       100     // Maximum consecutive no-delay instances.
+/*
+ * We use an adaptive strategy for synchronize_srcu() and especially for
+ * synchronize_srcu_expedited().  We spin for a fixed time period
+ * (defined below, boot time configurable) to allow SRCU readers to exit
+ * their read-side critical sections.  If there are still some readers
+ * after one jiffy, we repeatedly block for one jiffy time periods.
+ * The blocking time is increased as the grace-period age increases,
+ * with max blocking time capped at 10 jiffies.
+ */
+#define SRCU_DEFAULT_RETRY_CHECK_DELAY         5
+
+static ulong srcu_retry_check_delay = SRCU_DEFAULT_RETRY_CHECK_DELAY;
+module_param(srcu_retry_check_delay, ulong, 0444);
+
+#define SRCU_INTERVAL          1               // Base delay if no expedited GPs pending.
+#define SRCU_MAX_INTERVAL      10              // Maximum incremental delay from slow readers.
+
+#define SRCU_DEFAULT_MAX_NODELAY_PHASE_LO      3UL     // Lowmark on default per-GP-phase
+                                                       // no-delay instances.
+#define SRCU_DEFAULT_MAX_NODELAY_PHASE_HI      1000UL  // Highmark on default per-GP-phase
+                                                       // no-delay instances.
+
+#define SRCU_UL_CLAMP_LO(val, low)     ((val) > (low) ? (val) : (low))
+#define SRCU_UL_CLAMP_HI(val, high)    ((val) < (high) ? (val) : (high))
+#define SRCU_UL_CLAMP(val, low, high)  SRCU_UL_CLAMP_HI(SRCU_UL_CLAMP_LO((val), (low)), (high))
+// per-GP-phase no-delay instances adjusted to allow non-sleeping poll upto
+// one jiffies time duration. Mult by 2 is done to factor in the srcu_get_delay()
+// called from process_srcu().
+#define SRCU_DEFAULT_MAX_NODELAY_PHASE_ADJUSTED        \
+       (2UL * USEC_PER_SEC / HZ / SRCU_DEFAULT_RETRY_CHECK_DELAY)
+
+// Maximum per-GP-phase consecutive no-delay instances.
+#define SRCU_DEFAULT_MAX_NODELAY_PHASE \
+       SRCU_UL_CLAMP(SRCU_DEFAULT_MAX_NODELAY_PHASE_ADJUSTED,  \
+                     SRCU_DEFAULT_MAX_NODELAY_PHASE_LO,        \
+                     SRCU_DEFAULT_MAX_NODELAY_PHASE_HI)
+
+static ulong srcu_max_nodelay_phase = SRCU_DEFAULT_MAX_NODELAY_PHASE;
+module_param(srcu_max_nodelay_phase, ulong, 0444);
+
+// Maximum consecutive no-delay instances.
+#define SRCU_DEFAULT_MAX_NODELAY       (SRCU_DEFAULT_MAX_NODELAY_PHASE > 100 ? \
+                                        SRCU_DEFAULT_MAX_NODELAY_PHASE : 100)
+
+static ulong srcu_max_nodelay = SRCU_DEFAULT_MAX_NODELAY;
+module_param(srcu_max_nodelay, ulong, 0444);
 
 /*
  * Return grace-period delay, zero if there are expedited grace
@@ -522,16 +564,22 @@ static bool srcu_readers_active(struct srcu_struct *ssp)
  */
 static unsigned long srcu_get_delay(struct srcu_struct *ssp)
 {
+       unsigned long gpstart;
+       unsigned long j;
        unsigned long jbase = SRCU_INTERVAL;
 
        if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp)))
                jbase = 0;
-       if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)))
-               jbase += jiffies - READ_ONCE(ssp->srcu_gp_start);
-       if (!jbase) {
-               WRITE_ONCE(ssp->srcu_n_exp_nodelay, READ_ONCE(ssp->srcu_n_exp_nodelay) + 1);
-               if (READ_ONCE(ssp->srcu_n_exp_nodelay) > SRCU_MAX_NODELAY_PHASE)
-                       jbase = 1;
+       if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq))) {
+               j = jiffies - 1;
+               gpstart = READ_ONCE(ssp->srcu_gp_start);
+               if (time_after(j, gpstart))
+                       jbase += j - gpstart;
+               if (!jbase) {
+                       WRITE_ONCE(ssp->srcu_n_exp_nodelay, READ_ONCE(ssp->srcu_n_exp_nodelay) + 1);
+                       if (READ_ONCE(ssp->srcu_n_exp_nodelay) > srcu_max_nodelay_phase)
+                               jbase = 1;
+               }
        }
        return jbase > SRCU_MAX_INTERVAL ? SRCU_MAX_INTERVAL : jbase;
 }
@@ -606,15 +654,6 @@ void __srcu_read_unlock(struct srcu_struct *ssp, int idx)
 }
 EXPORT_SYMBOL_GPL(__srcu_read_unlock);
 
-/*
- * We use an adaptive strategy for synchronize_srcu() and especially for
- * synchronize_srcu_expedited().  We spin for a fixed time period
- * (defined below) to allow SRCU readers to exit their read-side critical
- * sections.  If there are still some readers after a few microseconds,
- * we repeatedly block for 1-millisecond time periods.
- */
-#define SRCU_RETRY_CHECK_DELAY         5
-
 /*
  * Start an SRCU grace period.
  */
@@ -700,7 +739,7 @@ static void srcu_schedule_cbs_snp(struct srcu_struct *ssp, struct srcu_node *snp
  */
 static void srcu_gp_end(struct srcu_struct *ssp)
 {
-       unsigned long cbdelay;
+       unsigned long cbdelay = 1;
        bool cbs;
        bool last_lvl;
        int cpu;
@@ -720,7 +759,9 @@ static void srcu_gp_end(struct srcu_struct *ssp)
        spin_lock_irq_rcu_node(ssp);
        idx = rcu_seq_state(ssp->srcu_gp_seq);
        WARN_ON_ONCE(idx != SRCU_STATE_SCAN2);
-       cbdelay = !!srcu_get_delay(ssp);
+       if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp)))
+               cbdelay = 0;
+
        WRITE_ONCE(ssp->srcu_last_gp_end, ktime_get_mono_fast_ns());
        rcu_seq_end(&ssp->srcu_gp_seq);
        gpseq = rcu_seq_current(&ssp->srcu_gp_seq);
@@ -921,12 +962,16 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
  */
 static bool try_check_zero(struct srcu_struct *ssp, int idx, int trycount)
 {
+       unsigned long curdelay;
+
+       curdelay = !srcu_get_delay(ssp);
+
        for (;;) {
                if (srcu_readers_active_idx_check(ssp, idx))
                        return true;
-               if (--trycount + !srcu_get_delay(ssp) <= 0)
+               if ((--trycount + curdelay) <= 0)
                        return false;
-               udelay(SRCU_RETRY_CHECK_DELAY);
+               udelay(srcu_retry_check_delay);
        }
 }
 
@@ -1582,7 +1627,7 @@ static void process_srcu(struct work_struct *work)
                j = jiffies;
                if (READ_ONCE(ssp->reschedule_jiffies) == j) {
                        WRITE_ONCE(ssp->reschedule_count, READ_ONCE(ssp->reschedule_count) + 1);
-                       if (READ_ONCE(ssp->reschedule_count) > SRCU_MAX_NODELAY)
+                       if (READ_ONCE(ssp->reschedule_count) > srcu_max_nodelay)
                                curdelay = 1;
                } else {
                        WRITE_ONCE(ssp->reschedule_count, 1);
@@ -1674,6 +1719,11 @@ static int __init srcu_bootup_announce(void)
        pr_info("Hierarchical SRCU implementation.\n");
        if (exp_holdoff != DEFAULT_SRCU_EXP_HOLDOFF)
                pr_info("\tNon-default auto-expedite holdoff of %lu ns.\n", exp_holdoff);
+       if (srcu_retry_check_delay != SRCU_DEFAULT_RETRY_CHECK_DELAY)
+               pr_info("\tNon-default retry check delay of %lu us.\n", srcu_retry_check_delay);
+       if (srcu_max_nodelay != SRCU_DEFAULT_MAX_NODELAY)
+               pr_info("\tNon-default max no-delay of %lu.\n", srcu_max_nodelay);
+       pr_info("\tMax phase no-delay instances is %lu.\n", srcu_max_nodelay_phase);
        return 0;
 }
 early_initcall(srcu_bootup_announce);
index 230038d..bb9962b 100644 (file)
@@ -34,6 +34,27 @@ MODULE_LICENSE("GPL");
 #define WATCH_QUEUE_NOTE_SIZE 128
 #define WATCH_QUEUE_NOTES_PER_PAGE (PAGE_SIZE / WATCH_QUEUE_NOTE_SIZE)
 
+/*
+ * This must be called under the RCU read-lock, which makes
+ * sure that the wqueue still exists. It can then take the lock,
+ * and check that the wqueue hasn't been destroyed, which in
+ * turn makes sure that the notification pipe still exists.
+ */
+static inline bool lock_wqueue(struct watch_queue *wqueue)
+{
+       spin_lock_bh(&wqueue->lock);
+       if (unlikely(wqueue->defunct)) {
+               spin_unlock_bh(&wqueue->lock);
+               return false;
+       }
+       return true;
+}
+
+static inline void unlock_wqueue(struct watch_queue *wqueue)
+{
+       spin_unlock_bh(&wqueue->lock);
+}
+
 static void watch_queue_pipe_buf_release(struct pipe_inode_info *pipe,
                                         struct pipe_buffer *buf)
 {
@@ -69,6 +90,10 @@ static const struct pipe_buf_operations watch_queue_pipe_buf_ops = {
 
 /*
  * Post a notification to a watch queue.
+ *
+ * Must be called with the RCU lock for reading, and the
+ * watch_queue lock held, which guarantees that the pipe
+ * hasn't been released.
  */
 static bool post_one_notification(struct watch_queue *wqueue,
                                  struct watch_notification *n)
@@ -85,9 +110,6 @@ static bool post_one_notification(struct watch_queue *wqueue,
 
        spin_lock_irq(&pipe->rd_wait.lock);
 
-       if (wqueue->defunct)
-               goto out;
-
        mask = pipe->ring_size - 1;
        head = pipe->head;
        tail = pipe->tail;
@@ -203,7 +225,10 @@ void __post_watch_notification(struct watch_list *wlist,
                if (security_post_notification(watch->cred, cred, n) < 0)
                        continue;
 
-               post_one_notification(wqueue, n);
+               if (lock_wqueue(wqueue)) {
+                       post_one_notification(wqueue, n);
+                       unlock_wqueue(wqueue);
+               }
        }
 
        rcu_read_unlock();
@@ -462,11 +487,12 @@ int add_watch_to_object(struct watch *watch, struct watch_list *wlist)
                return -EAGAIN;
        }
 
-       spin_lock_bh(&wqueue->lock);
-       kref_get(&wqueue->usage);
-       kref_get(&watch->usage);
-       hlist_add_head(&watch->queue_node, &wqueue->watches);
-       spin_unlock_bh(&wqueue->lock);
+       if (lock_wqueue(wqueue)) {
+               kref_get(&wqueue->usage);
+               kref_get(&watch->usage);
+               hlist_add_head(&watch->queue_node, &wqueue->watches);
+               unlock_wqueue(wqueue);
+       }
 
        hlist_add_head(&watch->list_node, &wlist->watchers);
        return 0;
@@ -520,20 +546,15 @@ found:
 
        wqueue = rcu_dereference(watch->queue);
 
-       /* We don't need the watch list lock for the next bit as RCU is
-        * protecting *wqueue from deallocation.
-        */
-       if (wqueue) {
+       if (lock_wqueue(wqueue)) {
                post_one_notification(wqueue, &n.watch);
 
-               spin_lock_bh(&wqueue->lock);
-
                if (!hlist_unhashed(&watch->queue_node)) {
                        hlist_del_init_rcu(&watch->queue_node);
                        put_watch(watch);
                }
 
-               spin_unlock_bh(&wqueue->lock);
+               unlock_wqueue(wqueue);
        }
 
        if (wlist->release_watch) {
index 2a6a0b0..7950f75 100644 (file)
@@ -7041,7 +7041,7 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len
        if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
                return -EINVAL;
 
-       if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
+       if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies))
                return -EINVAL;
 
        if (!th->ack || th->rst || th->syn)
@@ -7116,7 +7116,7 @@ BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
        if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
                return -EINVAL;
 
-       if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
+       if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies))
                return -ENOENT;
 
        if (!th->syn || th->ack || th->fin || th->rst)
index 5f85e01..b0ff615 100644 (file)
@@ -64,7 +64,7 @@ u32 secure_tcpv6_ts_off(const struct net *net,
                .daddr = *(struct in6_addr *)daddr,
        };
 
-       if (net->ipv4.sysctl_tcp_timestamps != 1)
+       if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
                return 0;
 
        ts_secret_init();
@@ -120,7 +120,7 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
 #ifdef CONFIG_INET
 u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr)
 {
-       if (net->ipv4.sysctl_tcp_timestamps != 1)
+       if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
                return 0;
 
        ts_secret_init();
index 3f00a28..5daa1fa 100644 (file)
@@ -387,7 +387,7 @@ void reuseport_stop_listen_sock(struct sock *sk)
                prog = rcu_dereference_protected(reuse->prog,
                                                 lockdep_is_held(&reuseport_lock));
 
-               if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req ||
+               if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_migrate_req) ||
                    (prog && prog->expected_attach_type == BPF_SK_REUSEPORT_SELECT_OR_MIGRATE)) {
                        /* Migration capable, move sk from the listening section
                         * to the closed section.
@@ -545,7 +545,7 @@ struct sock *reuseport_migrate_sock(struct sock *sk,
        hash = migrating_sk->sk_hash;
        prog = rcu_dereference(reuse->prog);
        if (!prog || prog->expected_attach_type != BPF_SK_REUSEPORT_SELECT_OR_MIGRATE) {
-               if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req)
+               if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_migrate_req))
                        goto select_by_hash;
                goto failure;
        }
index 3738f2d..2dd76eb 100644 (file)
@@ -248,6 +248,7 @@ static void dsa_port_reset_vlan_filtering(struct dsa_port *dp,
        struct netlink_ext_ack extack = {0};
        bool change_vlan_filtering = false;
        struct dsa_switch *ds = dp->ds;
+       struct dsa_port *other_dp;
        bool vlan_filtering;
        int err;
 
@@ -270,8 +271,8 @@ static void dsa_port_reset_vlan_filtering(struct dsa_port *dp,
         * VLAN-aware bridge.
         */
        if (change_vlan_filtering && ds->vlan_filtering_is_global) {
-               dsa_switch_for_each_port(dp, ds) {
-                       struct net_device *br = dsa_port_bridge_dev_get(dp);
+               dsa_switch_for_each_port(other_dp, ds) {
+                       struct net_device *br = dsa_port_bridge_dev_get(other_dp);
 
                        if (br && br_vlan_enabled(br)) {
                                change_vlan_filtering = false;
@@ -799,7 +800,7 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
                ds->vlan_filtering = vlan_filtering;
 
                dsa_switch_for_each_user_port(other_dp, ds) {
-                       struct net_device *slave = dp->slave;
+                       struct net_device *slave = other_dp->slave;
 
                        /* We might be called in the unbind path, so not
                         * all slave devices might still be registered.
index ac67f6b..252c8bc 100644 (file)
@@ -217,7 +217,7 @@ int inet_listen(struct socket *sock, int backlog)
                 * because the socket was in TCP_LISTEN state previously but
                 * was shutdown() rather than close().
                 */
-               tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
+               tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen);
                if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
                    (tcp_fastopen & TFO_SERVER_ENABLE) &&
                    !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
@@ -335,7 +335,7 @@ lookup_protocol:
                        inet->hdrincl = 1;
        }
 
-       if (net->ipv4.sysctl_ip_no_pmtu_disc)
+       if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc))
                inet->pmtudisc = IP_PMTUDISC_DONT;
        else
                inet->pmtudisc = IP_PMTUDISC_WANT;
@@ -1710,24 +1710,14 @@ static const struct net_protocol igmp_protocol = {
 };
 #endif
 
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct net_protocol tcp_protocol = {
-       .early_demux    =       tcp_v4_early_demux,
-       .early_demux_handler =  tcp_v4_early_demux,
+static const struct net_protocol tcp_protocol = {
        .handler        =       tcp_v4_rcv,
        .err_handler    =       tcp_v4_err,
        .no_policy      =       1,
        .icmp_strict_tag_validation = 1,
 };
 
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct net_protocol udp_protocol = {
-       .early_demux =  udp_v4_early_demux,
-       .early_demux_handler =  udp_v4_early_demux,
+static const struct net_protocol udp_protocol = {
        .handler =      udp_rcv,
        .err_handler =  udp_err,
        .no_policy =    1,
index 6eea1e9..f8ad044 100644 (file)
@@ -507,7 +507,7 @@ static int ah_init_state(struct xfrm_state *x)
 
        if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
            crypto_ahash_digestsize(ahash)) {
-               pr_info("%s: %s digestsize %u != %hu\n",
+               pr_info("%s: %s digestsize %u != %u\n",
                        __func__, x->aalg->alg_name,
                        crypto_ahash_digestsize(ahash),
                        aalg_desc->uinfo.auth.icv_fullbits / 8);
index b21238d..b694f35 100644 (file)
@@ -1108,7 +1108,7 @@ static int esp_init_authenc(struct xfrm_state *x)
                err = -EINVAL;
                if (aalg_desc->uinfo.auth.icv_fullbits / 8 !=
                    crypto_aead_authsize(aead)) {
-                       pr_info("ESP: %s digestsize %u != %hu\n",
+                       pr_info("ESP: %s digestsize %u != %u\n",
                                x->aalg->alg_name,
                                crypto_aead_authsize(aead),
                                aalg_desc->uinfo.auth.icv_fullbits / 8);
index d9fdcba..db7b250 100644 (file)
@@ -2216,7 +2216,7 @@ void fib_select_multipath(struct fib_result *res, int hash)
        }
 
        change_nexthops(fi) {
-               if (net->ipv4.sysctl_fib_multipath_use_neigh) {
+               if (READ_ONCE(net->ipv4.sysctl_fib_multipath_use_neigh)) {
                        if (!fib_good_nh(nexthop_nh))
                                continue;
                        if (!first) {
index 57c4f0d..d5d745c 100644 (file)
@@ -881,7 +881,7 @@ static enum skb_drop_reason icmp_unreach(struct sk_buff *skb)
                         * values please see
                         * Documentation/networking/ip-sysctl.rst
                         */
-                       switch (net->ipv4.sysctl_ip_no_pmtu_disc) {
+                       switch (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) {
                        default:
                                net_dbg_ratelimited("%pI4: fragmentation needed and DF set\n",
                                                    &iph->daddr);
index b65d074..e3ab0cb 100644 (file)
@@ -467,7 +467,8 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
 
        if (pmc->multiaddr == IGMP_ALL_HOSTS)
                return skb;
-       if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
+       if (ipv4_is_local_multicast(pmc->multiaddr) &&
+           !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
                return skb;
 
        mtu = READ_ONCE(dev->mtu);
@@ -593,7 +594,7 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc)
                        if (pmc->multiaddr == IGMP_ALL_HOSTS)
                                continue;
                        if (ipv4_is_local_multicast(pmc->multiaddr) &&
-                            !net->ipv4.sysctl_igmp_llm_reports)
+                           !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
                                continue;
                        spin_lock_bh(&pmc->lock);
                        if (pmc->sfcount[MCAST_EXCLUDE])
@@ -736,7 +737,8 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
        if (type == IGMPV3_HOST_MEMBERSHIP_REPORT)
                return igmpv3_send_report(in_dev, pmc);
 
-       if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports)
+       if (ipv4_is_local_multicast(group) &&
+           !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
                return 0;
 
        if (type == IGMP_HOST_LEAVE_MESSAGE)
@@ -825,7 +827,7 @@ static void igmp_ifc_event(struct in_device *in_dev)
        struct net *net = dev_net(in_dev->dev);
        if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev))
                return;
-       WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv);
+       WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv));
        igmp_ifc_start_timer(in_dev, 1);
 }
 
@@ -920,7 +922,8 @@ static bool igmp_heard_report(struct in_device *in_dev, __be32 group)
 
        if (group == IGMP_ALL_HOSTS)
                return false;
-       if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports)
+       if (ipv4_is_local_multicast(group) &&
+           !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
                return false;
 
        rcu_read_lock();
@@ -1006,7 +1009,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
                 * received value was zero, use the default or statically
                 * configured value.
                 */
-               in_dev->mr_qrv = ih3->qrv ?: net->ipv4.sysctl_igmp_qrv;
+               in_dev->mr_qrv = ih3->qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
                in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL;
 
                /* RFC3376, 8.3. Query Response Interval:
@@ -1045,7 +1048,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
                if (im->multiaddr == IGMP_ALL_HOSTS)
                        continue;
                if (ipv4_is_local_multicast(im->multiaddr) &&
-                   !net->ipv4.sysctl_igmp_llm_reports)
+                   !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
                        continue;
                spin_lock_bh(&im->lock);
                if (im->tm_running)
@@ -1186,7 +1189,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im,
        pmc->interface = im->interface;
        in_dev_hold(in_dev);
        pmc->multiaddr = im->multiaddr;
-       pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+       pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
        pmc->sfmode = im->sfmode;
        if (pmc->sfmode == MCAST_INCLUDE) {
                struct ip_sf_list *psf;
@@ -1237,9 +1240,11 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im)
                        swap(im->tomb, pmc->tomb);
                        swap(im->sources, pmc->sources);
                        for (psf = im->sources; psf; psf = psf->sf_next)
-                               psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+                               psf->sf_crcount = in_dev->mr_qrv ?:
+                                       READ_ONCE(net->ipv4.sysctl_igmp_qrv);
                } else {
-                       im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+                       im->crcount = in_dev->mr_qrv ?:
+                               READ_ONCE(net->ipv4.sysctl_igmp_qrv);
                }
                in_dev_put(pmc->interface);
                kfree_pmc(pmc);
@@ -1296,7 +1301,8 @@ static void __igmp_group_dropped(struct ip_mc_list *im, gfp_t gfp)
 #ifdef CONFIG_IP_MULTICAST
        if (im->multiaddr == IGMP_ALL_HOSTS)
                return;
-       if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
+       if (ipv4_is_local_multicast(im->multiaddr) &&
+           !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
                return;
 
        reporter = im->reporter;
@@ -1338,13 +1344,14 @@ static void igmp_group_added(struct ip_mc_list *im)
 #ifdef CONFIG_IP_MULTICAST
        if (im->multiaddr == IGMP_ALL_HOSTS)
                return;
-       if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
+       if (ipv4_is_local_multicast(im->multiaddr) &&
+           !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
                return;
 
        if (in_dev->dead)
                return;
 
-       im->unsolicit_count = net->ipv4.sysctl_igmp_qrv;
+       im->unsolicit_count = READ_ONCE(net->ipv4.sysctl_igmp_qrv);
        if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) {
                spin_lock_bh(&im->lock);
                igmp_start_timer(im, IGMP_INITIAL_REPORT_DELAY);
@@ -1358,7 +1365,7 @@ static void igmp_group_added(struct ip_mc_list *im)
         * IN() to IN(A).
         */
        if (im->sfmode == MCAST_EXCLUDE)
-               im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+               im->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
 
        igmp_ifc_event(in_dev);
 #endif
@@ -1642,7 +1649,7 @@ static void ip_mc_rejoin_groups(struct in_device *in_dev)
                if (im->multiaddr == IGMP_ALL_HOSTS)
                        continue;
                if (ipv4_is_local_multicast(im->multiaddr) &&
-                   !net->ipv4.sysctl_igmp_llm_reports)
+                   !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
                        continue;
 
                /* a failover is happening and switches
@@ -1749,7 +1756,7 @@ static void ip_mc_reset(struct in_device *in_dev)
 
        in_dev->mr_qi = IGMP_QUERY_INTERVAL;
        in_dev->mr_qri = IGMP_QUERY_RESPONSE_INTERVAL;
-       in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
+       in_dev->mr_qrv = READ_ONCE(net->ipv4.sysctl_igmp_qrv);
 }
 #else
 static void ip_mc_reset(struct in_device *in_dev)
@@ -1883,7 +1890,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
 #ifdef CONFIG_IP_MULTICAST
                if (psf->sf_oldin &&
                    !IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) {
-                       psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+                       psf->sf_crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
                        psf->sf_next = pmc->tomb;
                        pmc->tomb = psf;
                        rv = 1;
@@ -1947,7 +1954,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
                /* filter mode change */
                pmc->sfmode = MCAST_INCLUDE;
 #ifdef CONFIG_IP_MULTICAST
-               pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+               pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
                WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
                for (psf = pmc->sources; psf; psf = psf->sf_next)
                        psf->sf_crcount = 0;
@@ -2126,7 +2133,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
 #ifdef CONFIG_IP_MULTICAST
                /* else no filters; keep old mode for reports */
 
-               pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+               pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
                WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
                for (psf = pmc->sources; psf; psf = psf->sf_next)
                        psf->sf_crcount = 0;
@@ -2192,7 +2199,7 @@ static int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr,
                count++;
        }
        err = -ENOBUFS;
-       if (count >= net->ipv4.sysctl_igmp_max_memberships)
+       if (count >= READ_ONCE(net->ipv4.sysctl_igmp_max_memberships))
                goto done;
        iml = sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL);
        if (!iml)
@@ -2379,7 +2386,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
        }
        /* else, add a new source to the filter */
 
-       if (psl && psl->sl_count >= net->ipv4.sysctl_igmp_max_msf) {
+       if (psl && psl->sl_count >= READ_ONCE(net->ipv4.sysctl_igmp_max_msf)) {
                err = -ENOBUFS;
                goto done;
        }
index 53f5f95..eb31c71 100644 (file)
@@ -263,7 +263,7 @@ next_port:
                goto other_half_scan;
        }
 
-       if (net->ipv4.sysctl_ip_autobind_reuse && !relax) {
+       if (READ_ONCE(net->ipv4.sysctl_ip_autobind_reuse) && !relax) {
                /* We still have a chance to connect to different destinations */
                relax = true;
                goto ports_exhausted;
@@ -833,7 +833,8 @@ static void reqsk_timer_handler(struct timer_list *t)
 
        icsk = inet_csk(sk_listener);
        net = sock_net(sk_listener);
-       max_syn_ack_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
+       max_syn_ack_retries = icsk->icsk_syn_retries ? :
+               READ_ONCE(net->ipv4.sysctl_tcp_synack_retries);
        /* Normally all the openreqs are young and become mature
         * (i.e. converted to established socket) for first timeout.
         * If synack was not acknowledged for 1 second, it means
index e3aa436..e18931a 100644 (file)
@@ -157,7 +157,7 @@ int ip_forward(struct sk_buff *skb)
            !skb_sec_path(skb))
                ip_rt_send_redirect(skb);
 
-       if (net->ipv4.sysctl_ip_fwd_update_priority)
+       if (READ_ONCE(net->ipv4.sysctl_ip_fwd_update_priority))
                skb->priority = rt_tos2priority(iph->tos);
 
        return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD,
index b1165f7..1b51239 100644 (file)
@@ -312,14 +312,13 @@ static bool ip_can_use_hint(const struct sk_buff *skb, const struct iphdr *iph,
               ip_hdr(hint)->tos == iph->tos;
 }
 
-INDIRECT_CALLABLE_DECLARE(int udp_v4_early_demux(struct sk_buff *));
-INDIRECT_CALLABLE_DECLARE(int tcp_v4_early_demux(struct sk_buff *));
+int tcp_v4_early_demux(struct sk_buff *skb);
+int udp_v4_early_demux(struct sk_buff *skb);
 static int ip_rcv_finish_core(struct net *net, struct sock *sk,
                              struct sk_buff *skb, struct net_device *dev,
                              const struct sk_buff *hint)
 {
        const struct iphdr *iph = ip_hdr(skb);
-       int (*edemux)(struct sk_buff *skb);
        int err, drop_reason;
        struct rtable *rt;
 
@@ -332,21 +331,29 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
                        goto drop_error;
        }
 
-       if (net->ipv4.sysctl_ip_early_demux &&
+       if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) &&
            !skb_dst(skb) &&
            !skb->sk &&
            !ip_is_fragment(iph)) {
-               const struct net_protocol *ipprot;
-               int protocol = iph->protocol;
-
-               ipprot = rcu_dereference(inet_protos[protocol]);
-               if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
-                       err = INDIRECT_CALL_2(edemux, tcp_v4_early_demux,
-                                             udp_v4_early_demux, skb);
-                       if (unlikely(err))
-                               goto drop_error;
-                       /* must reload iph, skb->head might have changed */
-                       iph = ip_hdr(skb);
+               switch (iph->protocol) {
+               case IPPROTO_TCP:
+                       if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux)) {
+                               tcp_v4_early_demux(skb);
+
+                               /* must reload iph, skb->head might have changed */
+                               iph = ip_hdr(skb);
+                       }
+                       break;
+               case IPPROTO_UDP:
+                       if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) {
+                               err = udp_v4_early_demux(skb);
+                               if (unlikely(err))
+                                       goto drop_error;
+
+                               /* must reload iph, skb->head might have changed */
+                               iph = ip_hdr(skb);
+                       }
+                       break;
                }
        }
 
index 445a9ec..a8a323e 100644 (file)
@@ -782,7 +782,7 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen)
        /* numsrc >= (4G-140)/128 overflow in 32 bits */
        err = -ENOBUFS;
        if (gsf->gf_numsrc >= 0x1ffffff ||
-           gsf->gf_numsrc > sock_net(sk)->ipv4.sysctl_igmp_max_msf)
+           gsf->gf_numsrc > READ_ONCE(sock_net(sk)->ipv4.sysctl_igmp_max_msf))
                goto out_free_gsf;
 
        err = -EINVAL;
@@ -832,7 +832,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
 
        /* numsrc >= (4G-140)/128 overflow in 32 bits */
        err = -ENOBUFS;
-       if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf)
+       if (n > READ_ONCE(sock_net(sk)->ipv4.sysctl_igmp_max_msf))
                goto out_free_gsf;
        err = set_mcast_msfilter(sk, gf32->gf_interface, n, gf32->gf_fmode,
                                 &gf32->gf_group, gf32->gf_slist_flex);
@@ -1244,7 +1244,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname,
                }
                /* numsrc >= (1G-4) overflow in 32 bits */
                if (msf->imsf_numsrc >= 0x3ffffffcU ||
-                   msf->imsf_numsrc > net->ipv4.sysctl_igmp_max_msf) {
+                   msf->imsf_numsrc > READ_ONCE(net->ipv4.sysctl_igmp_max_msf)) {
                        kfree(msf);
                        err = -ENOBUFS;
                        break;
@@ -1606,7 +1606,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
        {
                struct net *net = sock_net(sk);
                val = (inet->uc_ttl == -1 ?
-                      net->ipv4.sysctl_ip_default_ttl :
+                      READ_ONCE(net->ipv4.sysctl_ip_default_ttl) :
                       inet->uc_ttl);
                break;
        }
index 918c61f..d640adc 100644 (file)
@@ -62,7 +62,7 @@ struct sk_buff *nf_reject_skb_v4_tcp_reset(struct net *net,
 
        skb_reserve(nskb, LL_MAX_HEADER);
        niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP,
-                                  net->ipv4.sysctl_ip_default_ttl);
+                                  READ_ONCE(net->ipv4.sysctl_ip_default_ttl));
        nf_reject_ip_tcphdr_put(nskb, oldskb, oth);
        niph->tot_len = htons(nskb->len);
        ip_send_check(niph);
@@ -117,7 +117,7 @@ struct sk_buff *nf_reject_skb_v4_unreach(struct net *net,
 
        skb_reserve(nskb, LL_MAX_HEADER);
        niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_ICMP,
-                                  net->ipv4.sysctl_ip_default_ttl);
+                                  READ_ONCE(net->ipv4.sysctl_ip_default_ttl));
 
        skb_reset_transport_header(nskb);
        icmph = skb_put_zero(nskb, sizeof(struct icmphdr));
index 2883607..0088a4c 100644 (file)
@@ -387,7 +387,7 @@ static int snmp_seq_show_ipstats(struct seq_file *seq, void *v)
 
        seq_printf(seq, "\nIp: %d %d",
                   IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2,
-                  net->ipv4.sysctl_ip_default_ttl);
+                  READ_ONCE(net->ipv4.sysctl_ip_default_ttl));
 
        BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0);
        snmp_get_cpu_field64_batch(buff64, snmp4_ipstats_list,
index 356f535..4702c61 100644 (file)
@@ -1398,7 +1398,7 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr)
        struct fib_info *fi = res->fi;
        u32 mtu = 0;
 
-       if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu ||
+       if (READ_ONCE(dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu) ||
            fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU))
                mtu = fi->fib_mtu;
 
@@ -1929,7 +1929,7 @@ static u32 fib_multipath_custom_hash_outer(const struct net *net,
                                           const struct sk_buff *skb,
                                           bool *p_has_inner)
 {
-       u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
+       u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
        struct flow_keys keys, hash_keys;
 
        if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
@@ -1958,7 +1958,7 @@ static u32 fib_multipath_custom_hash_inner(const struct net *net,
                                           const struct sk_buff *skb,
                                           bool has_inner)
 {
-       u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
+       u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
        struct flow_keys keys, hash_keys;
 
        /* We assume the packet carries an encapsulation, but if none was
@@ -2018,7 +2018,7 @@ static u32 fib_multipath_custom_hash_skb(const struct net *net,
 static u32 fib_multipath_custom_hash_fl4(const struct net *net,
                                         const struct flowi4 *fl4)
 {
-       u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
+       u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
        struct flow_keys hash_keys;
 
        if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
@@ -2048,7 +2048,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
        struct flow_keys hash_keys;
        u32 mhash = 0;
 
-       switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
+       switch (READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_policy)) {
        case 0:
                memset(&hash_keys, 0, sizeof(hash_keys));
                hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
index b387c48..942d2df 100644 (file)
@@ -247,12 +247,12 @@ bool cookie_timestamp_decode(const struct net *net,
                return true;
        }
 
-       if (!net->ipv4.sysctl_tcp_timestamps)
+       if (!READ_ONCE(net->ipv4.sysctl_tcp_timestamps))
                return false;
 
        tcp_opt->sack_ok = (options & TS_OPT_SACK) ? TCP_SACK_SEEN : 0;
 
-       if (tcp_opt->sack_ok && !net->ipv4.sysctl_tcp_sack)
+       if (tcp_opt->sack_ok && !READ_ONCE(net->ipv4.sysctl_tcp_sack))
                return false;
 
        if ((options & TS_OPT_WSCALE_MASK) == TS_OPT_WSCALE_MASK)
@@ -261,7 +261,7 @@ bool cookie_timestamp_decode(const struct net *net,
        tcp_opt->wscale_ok = 1;
        tcp_opt->snd_wscale = options & TS_OPT_WSCALE_MASK;
 
-       return net->ipv4.sysctl_tcp_window_scaling != 0;
+       return READ_ONCE(net->ipv4.sysctl_tcp_window_scaling) != 0;
 }
 EXPORT_SYMBOL(cookie_timestamp_decode);
 
@@ -340,7 +340,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
        struct flowi4 fl4;
        u32 tsoff = 0;
 
-       if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst)
+       if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) ||
+           !th->ack || th->rst)
                goto out;
 
        if (tcp_synq_no_recent_overflow(sk))
index 108fd86..5490c28 100644 (file)
@@ -84,7 +84,7 @@ static int ipv4_local_port_range(struct ctl_table *table, int write,
                 * port limit.
                 */
                if ((range[1] < range[0]) ||
-                   (range[0] < net->ipv4.sysctl_ip_prot_sock))
+                   (range[0] < READ_ONCE(net->ipv4.sysctl_ip_prot_sock)))
                        ret = -EINVAL;
                else
                        set_local_port_range(net, range);
@@ -110,7 +110,7 @@ static int ipv4_privileged_ports(struct ctl_table *table, int write,
                .extra2 = &ip_privileged_port_max,
        };
 
-       pports = net->ipv4.sysctl_ip_prot_sock;
+       pports = READ_ONCE(net->ipv4.sysctl_ip_prot_sock);
 
        ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
 
@@ -122,7 +122,7 @@ static int ipv4_privileged_ports(struct ctl_table *table, int write,
                if (range[0] < pports)
                        ret = -EINVAL;
                else
-                       net->ipv4.sysctl_ip_prot_sock = pports;
+                       WRITE_ONCE(net->ipv4.sysctl_ip_prot_sock, pports);
        }
 
        return ret;
@@ -350,61 +350,6 @@ bad_key:
        return ret;
 }
 
-static void proc_configure_early_demux(int enabled, int protocol)
-{
-       struct net_protocol *ipprot;
-#if IS_ENABLED(CONFIG_IPV6)
-       struct inet6_protocol *ip6prot;
-#endif
-
-       rcu_read_lock();
-
-       ipprot = rcu_dereference(inet_protos[protocol]);
-       if (ipprot)
-               ipprot->early_demux = enabled ? ipprot->early_demux_handler :
-                                               NULL;
-
-#if IS_ENABLED(CONFIG_IPV6)
-       ip6prot = rcu_dereference(inet6_protos[protocol]);
-       if (ip6prot)
-               ip6prot->early_demux = enabled ? ip6prot->early_demux_handler :
-                                                NULL;
-#endif
-       rcu_read_unlock();
-}
-
-static int proc_tcp_early_demux(struct ctl_table *table, int write,
-                               void *buffer, size_t *lenp, loff_t *ppos)
-{
-       int ret = 0;
-
-       ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
-
-       if (write && !ret) {
-               int enabled = init_net.ipv4.sysctl_tcp_early_demux;
-
-               proc_configure_early_demux(enabled, IPPROTO_TCP);
-       }
-
-       return ret;
-}
-
-static int proc_udp_early_demux(struct ctl_table *table, int write,
-                               void *buffer, size_t *lenp, loff_t *ppos)
-{
-       int ret = 0;
-
-       ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
-
-       if (write && !ret) {
-               int enabled = init_net.ipv4.sysctl_udp_early_demux;
-
-               proc_configure_early_demux(enabled, IPPROTO_UDP);
-       }
-
-       return ret;
-}
-
 static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
                                             int write, void *buffer,
                                             size_t *lenp, loff_t *ppos)
@@ -707,14 +652,14 @@ static struct ctl_table ipv4_net_table[] = {
                .data           = &init_net.ipv4.sysctl_udp_early_demux,
                .maxlen         = sizeof(u8),
                .mode           = 0644,
-               .proc_handler   = proc_udp_early_demux
+               .proc_handler   = proc_dou8vec_minmax,
        },
        {
                .procname       = "tcp_early_demux",
                .data           = &init_net.ipv4.sysctl_tcp_early_demux,
                .maxlen         = sizeof(u8),
                .mode           = 0644,
-               .proc_handler   = proc_tcp_early_demux
+               .proc_handler   = proc_dou8vec_minmax,
        },
        {
                .procname       = "nexthop_compat_mode",
index 2222dfd..2faaaaf 100644 (file)
@@ -441,7 +441,7 @@ void tcp_init_sock(struct sock *sk)
        tp->snd_cwnd_clamp = ~0;
        tp->mss_cache = TCP_MSS_DEFAULT;
 
-       tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
+       tp->reordering = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering);
        tcp_assign_congestion_control(sk);
 
        tp->tsoffset = 0;
@@ -1150,7 +1150,8 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
        struct sockaddr *uaddr = msg->msg_name;
        int err, flags;
 
-       if (!(sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
+       if (!(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) &
+             TFO_CLIENT_ENABLE) ||
            (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) &&
             uaddr->sa_family == AF_UNSPEC))
                return -EOPNOTSUPP;
@@ -3617,7 +3618,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname,
        case TCP_FASTOPEN_CONNECT:
                if (val > 1 || val < 0) {
                        err = -EINVAL;
-               } else if (net->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) {
+               } else if (READ_ONCE(net->ipv4.sysctl_tcp_fastopen) &
+                          TFO_CLIENT_ENABLE) {
                        if (sk->sk_state == TCP_CLOSE)
                                tp->fastopen_connect = val;
                        else
@@ -3967,12 +3969,13 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
                val = keepalive_probes(tp);
                break;
        case TCP_SYNCNT:
-               val = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
+               val = icsk->icsk_syn_retries ? :
+                       READ_ONCE(net->ipv4.sysctl_tcp_syn_retries);
                break;
        case TCP_LINGER2:
                val = tp->linger2;
                if (val >= 0)
-                       val = (val ? : net->ipv4.sysctl_tcp_fin_timeout) / HZ;
+                       val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ;
                break;
        case TCP_DEFER_ACCEPT:
                val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
index fdbcf2a..825b216 100644 (file)
@@ -332,7 +332,7 @@ static bool tcp_fastopen_no_cookie(const struct sock *sk,
                                   const struct dst_entry *dst,
                                   int flag)
 {
-       return (sock_net(sk)->ipv4.sysctl_tcp_fastopen & flag) ||
+       return (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) & flag) ||
               tcp_sk(sk)->fastopen_no_cookie ||
               (dst && dst_metric(dst, RTAX_FASTOPEN_NO_COOKIE));
 }
@@ -347,7 +347,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
                              const struct dst_entry *dst)
 {
        bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
-       int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
+       int tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen);
        struct tcp_fastopen_cookie valid_foc = { .len = -1 };
        struct sock *child;
        int ret = 0;
@@ -489,7 +489,7 @@ void tcp_fastopen_active_disable(struct sock *sk)
 {
        struct net *net = sock_net(sk);
 
-       if (!sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout)
+       if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout))
                return;
 
        /* Paired with READ_ONCE() in tcp_fastopen_active_should_disable() */
@@ -510,7 +510,8 @@ void tcp_fastopen_active_disable(struct sock *sk)
  */
 bool tcp_fastopen_active_should_disable(struct sock *sk)
 {
-       unsigned int tfo_bh_timeout = sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout;
+       unsigned int tfo_bh_timeout =
+               READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout);
        unsigned long timeout;
        int tfo_da_times;
        int multiplier;
index 3ec4edc..07dbcba 100644 (file)
@@ -1051,7 +1051,7 @@ static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
                         tp->undo_marker ? tp->undo_retrans : 0);
 #endif
                tp->reordering = min_t(u32, (metric + mss - 1) / mss,
-                                      sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
+                                      READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering));
        }
 
        /* This exciting event is worth to be remembered. 8) */
@@ -2030,7 +2030,7 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend)
                return;
 
        tp->reordering = min_t(u32, tp->packets_out + addend,
-                              sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
+                              READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering));
        tp->reord_seen++;
        NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
 }
@@ -2095,7 +2095,8 @@ static inline void tcp_init_undo(struct tcp_sock *tp)
 
 static bool tcp_is_rack(const struct sock *sk)
 {
-       return sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION;
+       return READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
+               TCP_RACK_LOSS_DETECTION;
 }
 
 /* If we detect SACK reneging, forget all SACK information
@@ -2139,6 +2140,7 @@ void tcp_enter_loss(struct sock *sk)
        struct tcp_sock *tp = tcp_sk(sk);
        struct net *net = sock_net(sk);
        bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
+       u8 reordering;
 
        tcp_timeout_mark_lost(sk);
 
@@ -2159,10 +2161,12 @@ void tcp_enter_loss(struct sock *sk)
        /* Timeout in disordered state after receiving substantial DUPACKs
         * suggests that the degree of reordering is over-estimated.
         */
+       reordering = READ_ONCE(net->ipv4.sysctl_tcp_reordering);
        if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
-           tp->sacked_out >= net->ipv4.sysctl_tcp_reordering)
+           tp->sacked_out >= reordering)
                tp->reordering = min_t(unsigned int, tp->reordering,
-                                      net->ipv4.sysctl_tcp_reordering);
+                                      reordering);
+
        tcp_set_ca_state(sk, TCP_CA_Loss);
        tp->high_seq = tp->snd_nxt;
        tcp_ecn_queue_cwr(tp);
@@ -3464,7 +3468,8 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
         * new SACK or ECE mark may first advance cwnd here and later reduce
         * cwnd in tcp_fastretrans_alert() based on more states.
         */
-       if (tcp_sk(sk)->reordering > sock_net(sk)->ipv4.sysctl_tcp_reordering)
+       if (tcp_sk(sk)->reordering >
+           READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering))
                return flag & FLAG_FORWARD_PROGRESS;
 
        return flag & FLAG_DATA_ACKED;
@@ -4056,7 +4061,7 @@ void tcp_parse_options(const struct net *net,
                                break;
                        case TCPOPT_WINDOW:
                                if (opsize == TCPOLEN_WINDOW && th->syn &&
-                                   !estab && net->ipv4.sysctl_tcp_window_scaling) {
+                                   !estab && READ_ONCE(net->ipv4.sysctl_tcp_window_scaling)) {
                                        __u8 snd_wscale = *(__u8 *)ptr;
                                        opt_rx->wscale_ok = 1;
                                        if (snd_wscale > TCP_MAX_WSCALE) {
@@ -4072,7 +4077,7 @@ void tcp_parse_options(const struct net *net,
                        case TCPOPT_TIMESTAMP:
                                if ((opsize == TCPOLEN_TIMESTAMP) &&
                                    ((estab && opt_rx->tstamp_ok) ||
-                                    (!estab && net->ipv4.sysctl_tcp_timestamps))) {
+                                    (!estab && READ_ONCE(net->ipv4.sysctl_tcp_timestamps)))) {
                                        opt_rx->saw_tstamp = 1;
                                        opt_rx->rcv_tsval = get_unaligned_be32(ptr);
                                        opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
@@ -4080,7 +4085,7 @@ void tcp_parse_options(const struct net *net,
                                break;
                        case TCPOPT_SACK_PERM:
                                if (opsize == TCPOLEN_SACK_PERM && th->syn &&
-                                   !estab && net->ipv4.sysctl_tcp_sack) {
+                                   !estab && READ_ONCE(net->ipv4.sysctl_tcp_sack)) {
                                        opt_rx->sack_ok = TCP_SACK_SEEN;
                                        tcp_sack_reset(opt_rx);
                                }
@@ -5567,7 +5572,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
        struct tcp_sock *tp = tcp_sk(sk);
        u32 ptr = ntohs(th->urg_ptr);
 
-       if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg)
+       if (ptr && !READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_stdurg))
                ptr--;
        ptr += ntohl(th->seq);
 
@@ -6797,11 +6802,14 @@ static bool tcp_syn_flood_action(const struct sock *sk, const char *proto)
 {
        struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
        const char *msg = "Dropping request";
-       bool want_cookie = false;
        struct net *net = sock_net(sk);
+       bool want_cookie = false;
+       u8 syncookies;
+
+       syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
 
 #ifdef CONFIG_SYN_COOKIES
-       if (net->ipv4.sysctl_tcp_syncookies) {
+       if (syncookies) {
                msg = "Sending cookies";
                want_cookie = true;
                __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
@@ -6809,8 +6817,7 @@ static bool tcp_syn_flood_action(const struct sock *sk, const char *proto)
 #endif
                __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
 
-       if (!queue->synflood_warned &&
-           net->ipv4.sysctl_tcp_syncookies != 2 &&
+       if (!queue->synflood_warned && syncookies != 2 &&
            xchg(&queue->synflood_warned, 1) == 0)
                net_info_ratelimited("%s: Possible SYN flooding on port %d. %s.  Check SNMP counters.\n",
                                     proto, sk->sk_num, msg);
@@ -6859,7 +6866,7 @@ u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
        struct tcp_sock *tp = tcp_sk(sk);
        u16 mss;
 
-       if (sock_net(sk)->ipv4.sysctl_tcp_syncookies != 2 &&
+       if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) != 2 &&
            !inet_csk_reqsk_queue_is_full(sk))
                return 0;
 
@@ -6893,13 +6900,15 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
        bool want_cookie = false;
        struct dst_entry *dst;
        struct flowi fl;
+       u8 syncookies;
+
+       syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
 
        /* TW buckets are converted to open requests without
         * limitations, they conserve resources and peer is
         * evidently real one.
         */
-       if ((net->ipv4.sysctl_tcp_syncookies == 2 ||
-            inet_csk_reqsk_queue_is_full(sk)) && !isn) {
+       if ((syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) && !isn) {
                want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name);
                if (!want_cookie)
                        goto drop;
@@ -6948,10 +6957,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
                tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb);
 
        if (!want_cookie && !isn) {
+               int max_syn_backlog = READ_ONCE(net->ipv4.sysctl_max_syn_backlog);
+
                /* Kill the following clause, if you dislike this way. */
-               if (!net->ipv4.sysctl_tcp_syncookies &&
-                   (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
-                    (net->ipv4.sysctl_max_syn_backlog >> 2)) &&
+               if (!syncookies &&
+                   (max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
+                    (max_syn_backlog >> 2)) &&
                    !tcp_peer_is_proven(req, dst)) {
                        /* Without syncookies last quarter of
                         * backlog is filled with destinations,
index da5a3c4..d16e6e4 100644 (file)
@@ -108,10 +108,10 @@ static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb)
 
 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 {
+       int reuse = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tw_reuse);
        const struct inet_timewait_sock *tw = inet_twsk(sktw);
        const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
        struct tcp_sock *tp = tcp_sk(sk);
-       int reuse = sock_net(sk)->ipv4.sysctl_tcp_tw_reuse;
 
        if (reuse == 2) {
                /* Still does not detect *everything* that goes through
index 7029b0e..a501150 100644 (file)
@@ -428,7 +428,8 @@ void tcp_update_metrics(struct sock *sk)
                if (!tcp_metric_locked(tm, TCP_METRIC_REORDERING)) {
                        val = tcp_metric_get(tm, TCP_METRIC_REORDERING);
                        if (val < tp->reordering &&
-                           tp->reordering != net->ipv4.sysctl_tcp_reordering)
+                           tp->reordering !=
+                           READ_ONCE(net->ipv4.sysctl_tcp_reordering))
                                tcp_metric_set(tm, TCP_METRIC_REORDERING,
                                               tp->reordering);
                }
index 6854bb1..cb95d88 100644 (file)
@@ -173,7 +173,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
                         * Oh well... nobody has a sufficient solution to this
                         * protocol bug yet.
                         */
-                       if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) {
+                       if (!READ_ONCE(twsk_net(tw)->ipv4.sysctl_tcp_rfc1337)) {
 kill:
                                inet_twsk_deschedule_put(tw);
                                return TCP_TW_SUCCESS;
@@ -781,7 +781,7 @@ listen_overflow:
        if (sk != req->rsk_listener)
                __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
 
-       if (!sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow) {
+       if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow)) {
                inet_rsk(req)->acked = 1;
                return NULL;
        }
index 11aa0ab..c38e07b 100644 (file)
@@ -791,18 +791,18 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
        opts->mss = tcp_advertise_mss(sk);
        remaining -= TCPOLEN_MSS_ALIGNED;
 
-       if (likely(sock_net(sk)->ipv4.sysctl_tcp_timestamps && !*md5)) {
+       if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps) && !*md5)) {
                opts->options |= OPTION_TS;
                opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset;
                opts->tsecr = tp->rx_opt.ts_recent;
                remaining -= TCPOLEN_TSTAMP_ALIGNED;
        }
-       if (likely(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) {
+       if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling))) {
                opts->ws = tp->rx_opt.rcv_wscale;
                opts->options |= OPTION_WSCALE;
                remaining -= TCPOLEN_WSCALE_ALIGNED;
        }
-       if (likely(sock_net(sk)->ipv4.sysctl_tcp_sack)) {
+       if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_sack))) {
                opts->options |= OPTION_SACK_ADVERTISE;
                if (unlikely(!(OPTION_TS & opts->options)))
                        remaining -= TCPOLEN_SACKPERM_ALIGNED;
@@ -1719,7 +1719,8 @@ static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu)
        mss_now -= icsk->icsk_ext_hdr_len;
 
        /* Then reserve room for full set of TCP options and 8 bytes of data */
-       mss_now = max(mss_now, sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss);
+       mss_now = max(mss_now,
+                     READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss));
        return mss_now;
 }
 
@@ -1762,10 +1763,10 @@ void tcp_mtup_init(struct sock *sk)
        struct inet_connection_sock *icsk = inet_csk(sk);
        struct net *net = sock_net(sk);
 
-       icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1;
+       icsk->icsk_mtup.enabled = READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing) > 1;
        icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
                               icsk->icsk_af_ops->net_header_len;
-       icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
+       icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, READ_ONCE(net->ipv4.sysctl_tcp_base_mss));
        icsk->icsk_mtup.probe_size = 0;
        if (icsk->icsk_mtup.enabled)
                icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
@@ -1897,7 +1898,7 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
                if (tp->packets_out > tp->snd_cwnd_used)
                        tp->snd_cwnd_used = tp->packets_out;
 
-               if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle &&
+               if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) &&
                    (s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto &&
                    !ca_ops->cong_control)
                        tcp_cwnd_application_limited(sk);
@@ -2282,7 +2283,7 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk)
        u32 interval;
        s32 delta;
 
-       interval = net->ipv4.sysctl_tcp_probe_interval;
+       interval = READ_ONCE(net->ipv4.sysctl_tcp_probe_interval);
        delta = tcp_jiffies32 - icsk->icsk_mtup.probe_timestamp;
        if (unlikely(delta >= interval * HZ)) {
                int mss = tcp_current_mss(sk);
@@ -2366,7 +2367,7 @@ static int tcp_mtu_probe(struct sock *sk)
         * probing process by not resetting search range to its orignal.
         */
        if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) ||
-               interval < net->ipv4.sysctl_tcp_probe_threshold) {
+           interval < READ_ONCE(net->ipv4.sysctl_tcp_probe_threshold)) {
                /* Check whether enough time has elaplased for
                 * another round of probing.
                 */
@@ -2740,7 +2741,7 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
        if (rcu_access_pointer(tp->fastopen_rsk))
                return false;
 
-       early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans;
+       early_retrans = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_early_retrans);
        /* Schedule a loss probe in 2*RTT for SACK capable connections
         * not in loss recovery, that are either limited by cwnd or application.
         */
@@ -3104,7 +3105,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
        struct sk_buff *skb = to, *tmp;
        bool first = true;
 
-       if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse)
+       if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse))
                return;
        if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
                return;
@@ -3646,7 +3647,7 @@ static void tcp_connect_init(struct sock *sk)
         * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
         */
        tp->tcp_header_len = sizeof(struct tcphdr);
-       if (sock_net(sk)->ipv4.sysctl_tcp_timestamps)
+       if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps))
                tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED;
 
 #ifdef CONFIG_TCP_MD5SIG
@@ -3682,7 +3683,7 @@ static void tcp_connect_init(struct sock *sk)
                                  tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
                                  &tp->rcv_wnd,
                                  &tp->window_clamp,
-                                 sock_net(sk)->ipv4.sysctl_tcp_window_scaling,
+                                 READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling),
                                  &rcv_wscale,
                                  rcv_wnd);
 
@@ -4089,7 +4090,7 @@ void tcp_send_probe0(struct sock *sk)
 
        icsk->icsk_probes_out++;
        if (err <= 0) {
-               if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2)
+               if (icsk->icsk_backoff < READ_ONCE(net->ipv4.sysctl_tcp_retries2))
                        icsk->icsk_backoff++;
                timeout = tcp_probe0_when(sk, TCP_RTO_MAX);
        } else {
index 48f30e7..50abaa9 100644 (file)
@@ -14,7 +14,8 @@ static u32 tcp_rack_reo_wnd(const struct sock *sk)
                        return 0;
 
                if (tp->sacked_out >= tp->reordering &&
-                   !(sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_NO_DUPTHRESH))
+                   !(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
+                     TCP_RACK_NO_DUPTHRESH))
                        return 0;
        }
 
@@ -187,7 +188,8 @@ void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs)
 {
        struct tcp_sock *tp = tcp_sk(sk);
 
-       if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_STATIC_REO_WND ||
+       if ((READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
+            TCP_RACK_STATIC_REO_WND) ||
            !rs->prior_delivered)
                return;
 
index 20cf4a9..50bba37 100644 (file)
@@ -143,7 +143,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
  */
 static int tcp_orphan_retries(struct sock *sk, bool alive)
 {
-       int retries = sock_net(sk)->ipv4.sysctl_tcp_orphan_retries; /* May be zero. */
+       int retries = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_orphan_retries); /* May be zero. */
 
        /* We know from an ICMP that something is wrong. */
        if (sk->sk_err_soft && !alive)
@@ -163,7 +163,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
        int mss;
 
        /* Black hole detection */
-       if (!net->ipv4.sysctl_tcp_mtu_probing)
+       if (!READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing))
                return;
 
        if (!icsk->icsk_mtup.enabled) {
@@ -171,9 +171,9 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
                icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
        } else {
                mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
-               mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
-               mss = max(mss, net->ipv4.sysctl_tcp_mtu_probe_floor);
-               mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss);
+               mss = min(READ_ONCE(net->ipv4.sysctl_tcp_base_mss), mss);
+               mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_mtu_probe_floor));
+               mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_min_snd_mss));
                icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
        }
        tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
@@ -239,17 +239,18 @@ static int tcp_write_timeout(struct sock *sk)
        if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
                if (icsk->icsk_retransmits)
                        __dst_negative_advice(sk);
-               retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
+               retry_until = icsk->icsk_syn_retries ? :
+                       READ_ONCE(net->ipv4.sysctl_tcp_syn_retries);
                expired = icsk->icsk_retransmits >= retry_until;
        } else {
-               if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0)) {
+               if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1), 0)) {
                        /* Black hole detection */
                        tcp_mtu_probing(icsk, sk);
 
                        __dst_negative_advice(sk);
                }
 
-               retry_until = net->ipv4.sysctl_tcp_retries2;
+               retry_until = READ_ONCE(net->ipv4.sysctl_tcp_retries2);
                if (sock_flag(sk, SOCK_DEAD)) {
                        const bool alive = icsk->icsk_rto < TCP_RTO_MAX;
 
@@ -380,7 +381,7 @@ static void tcp_probe_timer(struct sock *sk)
                 msecs_to_jiffies(icsk->icsk_user_timeout))
                goto abort;
 
-       max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2;
+       max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2);
        if (sock_flag(sk, SOCK_DEAD)) {
                const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
 
@@ -406,12 +407,15 @@ abort:            tcp_write_err(sk);
 static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req)
 {
        struct inet_connection_sock *icsk = inet_csk(sk);
-       int max_retries = icsk->icsk_syn_retries ? :
-           sock_net(sk)->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */
        struct tcp_sock *tp = tcp_sk(sk);
+       int max_retries;
 
        req->rsk_ops->syn_ack_timeout(req);
 
+       /* add one more retry for fastopen */
+       max_retries = icsk->icsk_syn_retries ? :
+               READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_synack_retries) + 1;
+
        if (req->num_timeout >= max_retries) {
                tcp_write_err(sk);
                return;
@@ -574,7 +578,7 @@ out_reset_timer:
         * linear-timeout retransmissions into a black hole
         */
        if (sk->sk_state == TCP_ESTABLISHED &&
-           (tp->thin_lto || net->ipv4.sysctl_tcp_thin_linear_timeouts) &&
+           (tp->thin_lto || READ_ONCE(net->ipv4.sysctl_tcp_thin_linear_timeouts)) &&
            tcp_stream_is_thin(tp) &&
            icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
                icsk->icsk_backoff = 0;
@@ -585,7 +589,7 @@ out_reset_timer:
        }
        inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
                                  tcp_clamp_rto_to_user_timeout(sk), TCP_RTO_MAX);
-       if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0))
+       if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1) + 1, 0))
                __sk_dst_reset(sk);
 
 out:;
index 70564dd..6f354f8 100644 (file)
@@ -226,7 +226,7 @@ lookup_protocol:
        RCU_INIT_POINTER(inet->mc_list, NULL);
        inet->rcv_tos   = 0;
 
-       if (net->ipv4.sysctl_ip_no_pmtu_disc)
+       if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc))
                inet->pmtudisc = IP_PMTUDISC_DONT;
        else
                inet->pmtudisc = IP_PMTUDISC_WANT;
index 0322cc8..e1ebf5e 100644 (file)
 #include <net/inet_ecn.h>
 #include <net/dst_metadata.h>
 
-INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *));
 static void ip6_rcv_finish_core(struct net *net, struct sock *sk,
                                struct sk_buff *skb)
 {
-       void (*edemux)(struct sk_buff *skb);
-
-       if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
-               const struct inet6_protocol *ipprot;
-
-               ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
-               if (ipprot && (edemux = READ_ONCE(ipprot->early_demux)))
-                       INDIRECT_CALL_2(edemux, tcp_v6_early_demux,
-                                       udp_v6_early_demux, skb);
+       if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) &&
+           !skb_dst(skb) && !skb->sk) {
+               switch (ipv6_hdr(skb)->nexthdr) {
+               case IPPROTO_TCP:
+                       if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux))
+                               tcp_v6_early_demux(skb);
+                       break;
+               case IPPROTO_UDP:
+                       if (READ_ONCE(net->ipv4.sysctl_udp_early_demux))
+                               udp_v6_early_demux(skb);
+                       break;
+               }
        }
+
        if (!skb_valid_dst(skb))
                ip6_route_input(skb);
 }
index 9cc123f..5014aa6 100644 (file)
@@ -141,7 +141,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
        __u8 rcv_wscale;
        u32 tsoff = 0;
 
-       if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst)
+       if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) ||
+           !th->ack || th->rst)
                goto out;
 
        if (tcp_synq_no_recent_overflow(sk))
index f37dd4a..9d3ede2 100644 (file)
@@ -1822,7 +1822,7 @@ do_time_wait:
        goto discard_it;
 }
 
-INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
+void tcp_v6_early_demux(struct sk_buff *skb)
 {
        const struct ipv6hdr *hdr;
        const struct tcphdr *th;
@@ -2176,12 +2176,7 @@ struct proto tcpv6_prot = {
 };
 EXPORT_SYMBOL_GPL(tcpv6_prot);
 
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct inet6_protocol tcpv6_protocol = {
-       .early_demux    =       tcp_v6_early_demux,
-       .early_demux_handler =  tcp_v6_early_demux,
+static const struct inet6_protocol tcpv6_protocol = {
        .handler        =       tcp_v6_rcv,
        .err_handler    =       tcp_v6_err,
        .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
index 55afd7f..e2f2e08 100644 (file)
@@ -1052,7 +1052,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
        return NULL;
 }
 
-INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb)
+void udp_v6_early_demux(struct sk_buff *skb)
 {
        struct net *net = dev_net(skb->dev);
        const struct udphdr *uh;
@@ -1660,12 +1660,7 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname,
        return ipv6_getsockopt(sk, level, optname, optval, optlen);
 }
 
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct inet6_protocol udpv6_protocol = {
-       .early_demux    =       udp_v6_early_demux,
-       .early_demux_handler =  udp_v6_early_demux,
+static const struct inet6_protocol udpv6_protocol = {
        .handler        =       udpv6_rcv,
        .err_handler    =       udpv6_err,
        .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
index e479dd0..16915f8 100644 (file)
@@ -405,7 +405,7 @@ synproxy_build_ip(struct net *net, struct sk_buff *skb, __be32 saddr,
        iph->tos        = 0;
        iph->id         = 0;
        iph->frag_off   = htons(IP_DF);
-       iph->ttl        = net->ipv4.sysctl_ip_default_ttl;
+       iph->ttl        = READ_ONCE(net->ipv4.sysctl_ip_default_ttl);
        iph->protocol   = IPPROTO_TCP;
        iph->check      = 0;
        iph->saddr      = saddr;
index 9bb4d3d..ac366c9 100644 (file)
@@ -3533,7 +3533,7 @@ int tc_setup_action(struct flow_action *flow_action,
                    struct tc_action *actions[],
                    struct netlink_ext_ack *extack)
 {
-       int i, j, index, err = 0;
+       int i, j, k, index, err = 0;
        struct tc_action *act;
 
        BUILD_BUG_ON(TCA_ACT_HW_STATS_ANY != FLOW_ACTION_HW_STATS_ANY);
@@ -3553,14 +3553,18 @@ int tc_setup_action(struct flow_action *flow_action,
                if (err)
                        goto err_out_locked;
 
-               entry->hw_stats = tc_act_hw_stats(act->hw_stats);
-               entry->hw_index = act->tcfa_index;
                index = 0;
                err = tc_setup_offload_act(act, entry, &index, extack);
-               if (!err)
-                       j += index;
-               else
+               if (err)
                        goto err_out_locked;
+
+               for (k = 0; k < index ; k++) {
+                       entry[k].hw_stats = tc_act_hw_stats(act->hw_stats);
+                       entry[k].hw_index = act->tcfa_index;
+               }
+
+               j += index;
+
                spin_unlock_bh(&act->tcfa_lock);
        }
 
index 35928fe..1a094b0 100644 (file)
@@ -358,7 +358,7 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp)
        if (addr->v4.sin_addr.s_addr != htonl(INADDR_ANY) &&
           ret != RTN_LOCAL &&
           !sp->inet.freebind &&
-          !net->ipv4.sysctl_ip_nonlocal_bind)
+           !READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind))
                return 0;
 
        if (ipv6_only_sock(sctp_opt2sk(sp)))
index c4d057b..0bde36b 100644 (file)
@@ -2122,7 +2122,7 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc)
        init_waitqueue_head(&lgr->llc_flow_waiter);
        init_waitqueue_head(&lgr->llc_msg_waiter);
        mutex_init(&lgr->llc_conf_mutex);
-       lgr->llc_testlink_time = net->ipv4.sysctl_tcp_keepalive_time;
+       lgr->llc_testlink_time = READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time);
 }
 
 /* called after lgr was removed from lgr_list */
index ce827e7..879b902 100644 (file)
@@ -97,13 +97,16 @@ static void tls_device_queue_ctx_destruction(struct tls_context *ctx)
        unsigned long flags;
 
        spin_lock_irqsave(&tls_device_lock, flags);
+       if (unlikely(!refcount_dec_and_test(&ctx->refcount)))
+               goto unlock;
+
        list_move_tail(&ctx->list, &tls_device_gc_list);
 
        /* schedule_work inside the spinlock
         * to make sure tls_device_down waits for that work.
         */
        schedule_work(&tls_device_gc_work);
-
+unlock:
        spin_unlock_irqrestore(&tls_device_lock, flags);
 }
 
@@ -194,8 +197,7 @@ void tls_device_sk_destruct(struct sock *sk)
                clean_acked_data_disable(inet_csk(sk));
        }
 
-       if (refcount_dec_and_test(&tls_ctx->refcount))
-               tls_device_queue_ctx_destruction(tls_ctx);
+       tls_device_queue_ctx_destruction(tls_ctx);
 }
 EXPORT_SYMBOL_GPL(tls_device_sk_destruct);
 
index f1876ea..f1a0bab 100644 (file)
@@ -2678,8 +2678,10 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family,
                *num_xfrms = 0;
                return 0;
        }
-       if (IS_ERR(pols[0]))
+       if (IS_ERR(pols[0])) {
+               *num_pols = 0;
                return PTR_ERR(pols[0]);
+       }
 
        *num_xfrms = pols[0]->xfrm_nr;
 
@@ -2694,6 +2696,7 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family,
                if (pols[1]) {
                        if (IS_ERR(pols[1])) {
                                xfrm_pols_put(pols, *num_pols);
+                               *num_pols = 0;
                                return PTR_ERR(pols[1]);
                        }
                        (*num_pols)++;
index 08564e0..ccfb172 100644 (file)
@@ -2620,7 +2620,7 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
        int err;
 
        if (family == AF_INET &&
-           xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc)
+           READ_ONCE(xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc))
                x->props.flags |= XFRM_STATE_NOPMTUDISC;
 
        err = -EPROTONOSUPPORT;
index 46f7542..dc07b6d 100644 (file)
@@ -180,7 +180,7 @@ lx-symbols command."""
                 self.breakpoint.delete()
                 self.breakpoint = None
             self.breakpoint = LoadModuleBreakpoint(
-                "kernel/module.c:do_init_module", self)
+                "kernel/module/main.c:do_init_module", self)
         else:
             gdb.write("Note: symbol update on module loading not supported "
                       "with this gdb version\n")
index 7391741..a8802b8 100644 (file)
@@ -2247,6 +2247,10 @@ bool ima_appraise_signature(enum kernel_read_file_id id)
        if (id >= READING_MAX_ID)
                return false;
 
+       if (id == READING_KEXEC_IMAGE && !(ima_appraise & IMA_APPRAISE_ENFORCE)
+           && security_locked_down(LOCKDOWN_KEXEC))
+               return false;
+
        func = read_idmap[id] ?: FILE_CHECK;
 
        rcu_read_lock();