Merge tag 'for-linus-4.14c-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel...

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 29 Sep 2017 19:24:28 +0000 (12:24 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 29 Sep 2017 19:24:28 +0000 (12:24 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 29 Sep 2017 19:24:28 +0000 (12:24 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 29 Sep 2017 19:24:28 +0000 (12:24 -0700)
diff --git a/Documentation/cpu-freq/index.txt b/Documentation/cpu-freq/index.txt

index 03a7cee..c15e753 100644 (file)
--- a/Documentation/cpu-freq/index.txt
+++ b/Documentation/cpu-freq/index.txt
@@ -32,8 +32,6 @@ cpufreq-stats.txt -   General description of sysfs cpufreq stats.
  
  index.txt      -       File index, Mailing list and Links (this document)
  
-intel-pstate.txt -     Intel pstate cpufreq driver specific file.
-
  pcc-cpufreq.txt -      PCC cpufreq driver specific file.
  
  
diff --git a/Documentation/devicetree/bindings/clock/st,stm32h7-rcc.txt b/Documentation/devicetree/bindings/clock/st,stm32h7-rcc.txt

index a135504..cac24ee 100644 (file)
--- a/Documentation/devicetree/bindings/clock/st,stm32h7-rcc.txt
+++ b/Documentation/devicetree/bindings/clock/st,stm32h7-rcc.txt
@@ -32,7 +32,7 @@ Example:
                 compatible = "st,stm32h743-rcc", "st,stm32-rcc";
                 reg = <0x58024400 0x400>;
                 #reset-cells = <1>;
-               #clock-cells = <2>;
+               #clock-cells = <1>;
                 clocks = <&clk_hse>, <&clk_lse>, <&clk_i2s_ckin>;
  
                 st,syscfg = <&pwrcfg>;
diff --git a/Documentation/devicetree/bindings/security/tpm/tpm-i2c.txt b/Documentation/devicetree/bindings/security/tpm/tpm-i2c.txt

index 3eca6de..a65d7b7 100644 (file)
--- a/Documentation/devicetree/bindings/security/tpm/tpm-i2c.txt
+++ b/Documentation/devicetree/bindings/security/tpm/tpm-i2c.txt
@@ -8,6 +8,12 @@ Required properties:
                     the firmware event log
  - linux,sml-size : size of the memory allocated for the firmware event log
  
+Optional properties:
+
+- powered-while-suspended: present when the TPM is left powered on between
+                           suspend and resume (makes the suspend/resume
+                           callbacks do nothing).
+
  Example (for OpenPower Systems with Nuvoton TPM 2.0 on I2C)
  ----------------------------------------------------------
  
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt

index 1ea1fd4..1afd298 100644 (file)
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -3,8 +3,8 @@ Device tree binding vendor prefix registry.  Keep list in alphabetical order.
  This isn't an exhaustive list, but you should add new prefixes to it before
  using them to avoid name-space collisions.
  
-abcn   Abracon Corporation
  abilis Abilis Systems
+abracon        Abracon Corporation
  actions        Actions Semiconductor Co., Ltd.
  active-semi    Active-Semi International Inc
  ad     Avionic Design GmbH
diff --git a/Documentation/driver-api/pm/devices.rst b/Documentation/driver-api/pm/devices.rst

index bedd323..a0dc287 100644 (file)
--- a/Documentation/driver-api/pm/devices.rst
+++ b/Documentation/driver-api/pm/devices.rst
@@ -675,7 +675,7 @@ sub-domain of the parent domain.
  
  Support for power domains is provided through the :c:member:`pm_domain` field of
  |struct device|.  This field is a pointer to an object of type
-|struct dev_pm_domain|, defined in :file:`include/linux/pm.h``, providing a set
+|struct dev_pm_domain|, defined in :file:`include/linux/pm.h`, providing a set
  of power management callbacks analogous to the subsystem-level and device driver
  callbacks that are executed for the given device during all power transitions,
  instead of the respective subsystem-level callbacks.  Specifically, if a
diff --git a/Documentation/filesystems/cifs/AUTHORS b/Documentation/filesystems/cifs/AUTHORS

index c98800d..9f4f87e 100644 (file)
--- a/Documentation/filesystems/cifs/AUTHORS
+++ b/Documentation/filesystems/cifs/AUTHORS
@@ -41,6 +41,11 @@ Igor Mammedov (DFS support)
  Jeff Layton (many, many fixes, as well as great work on the cifs Kerberos code)
  Scott Lovenberg
  Pavel Shilovsky (for great work adding SMB2 support, and various SMB3 features)
+Aurelien Aptel (for DFS SMB3 work and some key bug fixes)
+Ronnie Sahlberg (for SMB3 xattr work and bug fixes)
+Shirish Pargaonkar (for many ACL patches over the years)
+Sachin Prabhu (many bug fixes, including for reconnect, copy offload and security)
+
  
  Test case and Bug Report contributors
  -------------------------------------
diff --git a/Documentation/filesystems/cifs/README b/Documentation/filesystems/cifs/README

index a547884..a9da515 100644 (file)
--- a/Documentation/filesystems/cifs/README
+++ b/Documentation/filesystems/cifs/README
@@ -1,10 +1,14 @@
-The CIFS VFS support for Linux supports many advanced network filesystem 
-features such as hierarchical dfs like namespace, hardlinks, locking and more.  
+This module supports the SMB3 family of advanced network protocols (as well
+as older dialects, originally called "CIFS" or SMB1).
+
+The CIFS VFS module for Linux supports many advanced network filesystem
+features such as hierarchical DFS like namespace, hardlinks, locking and more.
  It was designed to comply with the SNIA CIFS Technical Reference (which 
  supersedes the 1992 X/Open SMB Standard) as well as to perform best practice 
  practical interoperability with Windows 2000, Windows XP, Samba and equivalent 
  servers.  This code was developed in participation with the Protocol Freedom
-Information Foundation.
+Information Foundation.  CIFS and now SMB3 has now become a defacto
+standard for interoperating between Macs and Windows and major NAS appliances.
  
  Please see
    http://protocolfreedom.org/ and
@@ -15,30 +19,11 @@ for more details.
  For questions or bug reports please contact:
      sfrench@samba.org (sfrench@us.ibm.com) 
  
+See the project page at: https://wiki.samba.org/index.php/LinuxCIFS_utils
+
  Build instructions:
  ==================
-For Linux 2.4:
-1) Get the kernel source (e.g.from http://www.kernel.org)
-and download the cifs vfs source (see the project page
-at http://us1.samba.org/samba/Linux_CIFS_client.html)
-and change directory into the top of the kernel directory
-then patch the kernel (e.g. "patch -p1 < cifs_24.patch") 
-to add the cifs vfs to your kernel configure options if
-it has not already been added (e.g. current SuSE and UL
-users do not need to apply the cifs_24.patch since the cifs vfs is
-already in the kernel configure menu) and then
-mkdir linux/fs/cifs and then copy the current cifs vfs files from
-the cifs download to your kernel build directory e.g.
-
-       cp <cifs_download_dir>/fs/cifs/* to <kernel_download_dir>/fs/cifs
-       
-2) make menuconfig (or make xconfig)
-3) select cifs from within the network filesystem choices
-4) save and exit
-5) make dep
-6) make modules (or "make" if CIFS VFS not to be built as a module)
-
-For Linux 2.6:
+For Linux:
  1) Download the kernel (e.g. from http://www.kernel.org)
  and change directory into the top of the kernel directory tree
  (e.g. /usr/src/linux-2.5.73)
@@ -61,16 +46,13 @@ would simply type "make install").
  If you do not have the utility mount.cifs (in the Samba 3.0 source tree and on 
  the CIFS VFS web site) copy it to the same directory in which mount.smbfs and 
  similar files reside (usually /sbin).  Although the helper software is not  
-required, mount.cifs is recommended.  Eventually the Samba 3.0 utility program 
-"net" may also be helpful since it may someday provide easier mount syntax for
-users who are used to Windows e.g.
-       net use <mount point> <UNC name or cifs URL>
+required, mount.cifs is recommended.  Most distros include a "cifs-utils"
+package that includes this utility so it is recommended to install this.
+
  Note that running the Winbind pam/nss module (logon service) on all of your
  Linux clients is useful in mapping Uids and Gids consistently across the
  domain to the proper network user.  The mount.cifs mount helper can be
-trivially built from Samba 3.0 or later source e.g. by executing:
-
-       gcc samba/source/client/mount.cifs.c -o mount.cifs
+found at cifs-utils.git on git.samba.org
  
  If cifs is built as a module, then the size and number of network buffers
  and maximum number of simultaneous requests to one server can be configured.
@@ -79,6 +61,18 @@ Changing these from their defaults is not recommended. By executing modinfo
  on kernel/fs/cifs/cifs.ko the list of configuration changes that can be made
  at module initialization time (by running insmod cifs.ko) can be seen.
  
+Recommendations
+===============
+To improve security the SMB2.1 dialect or later (usually will get SMB3) is now
+the new default. To use old dialects (e.g. to mount Windows XP) use "vers=1.0"
+on mount (or vers=2.0 for Windows Vista).  Note that the CIFS (vers=1.0) is
+much older and less secure than the default dialect SMB3 which includes
+many advanced security features such as downgrade attack detection
+and encrypted shares and stronger signing and authentication algorithms.
+There are additional mount options that may be helpful for SMB3 to get
+improved POSIX behavior (NB: can use vers=3.0 to force only SMB3, never 2.1):
+     "mfsymlinks" and "cifsacl" and "idsfromsid"
+
  Allowing User Mounts
  ====================
  To permit users to mount and unmount over directories they own is possible
@@ -98,9 +92,7 @@ and execution of suid programs on the remote target would be enabled
  by default. This can be changed, as with nfs and other filesystems, 
  by simply specifying "nosuid" among the mount options. For user mounts 
  though to be able to pass the suid flag to mount requires rebuilding 
-mount.cifs with the following flag: 
- 
-        gcc samba/source/client/mount.cifs.c -DCIFS_ALLOW_USR_SUID -o mount.cifs
+mount.cifs with the following flag: CIFS_ALLOW_USR_SUID
  
  There is a corresponding manual page for cifs mounting in the Samba 3.0 and
  later source tree in docs/manpages/mount.cifs.8 
@@ -189,18 +181,18 @@ applications running on the same server as Samba.
  Use instructions:
  ================
  Once the CIFS VFS support is built into the kernel or installed as a module 
-(cifs.o), you can use mount syntax like the following to access Samba or Windows 
-servers: 
+(cifs.ko), you can use mount syntax like the following to access Samba or
+Mac or Windows servers:
  
-  mount -t cifs //9.53.216.11/e$ /mnt -o user=myname,pass=mypassword
+  mount -t cifs //9.53.216.11/e$ /mnt -o username=myname,password=mypassword
  
  Before -o the option -v may be specified to make the mount.cifs
  mount helper display the mount steps more verbosely.  
  After -o the following commonly used cifs vfs specific options
  are supported:
  
-  user=<username>
-  pass=<password>
+  username=<username>
+  password=<password>
    domain=<domain name>
    
  Other cifs mount options are described below.  Use of TCP names (in addition to
@@ -246,13 +238,16 @@ the Server's registry.  Samba starting with version 3.10 will allow such
  filenames (ie those which contain valid Linux characters, which normally
  would be forbidden for Windows/CIFS semantics) as long as the server is
  configured for Unix Extensions (and the client has not disabled
-/proc/fs/cifs/LinuxExtensionsEnabled).
-  
+/proc/fs/cifs/LinuxExtensionsEnabled). In addition the mount option
+"mapposix" can be used on CIFS (vers=1.0) to force the mapping of
+illegal Windows/NTFS/SMB characters to a remap range (this mount parm
+is the default for SMB3). This remap ("mapposix") range is also
+compatible with Mac (and "Services for Mac" on some older Windows).
  
  CIFS VFS Mount Options
  ======================
  A partial list of the supported mount options follows:
-  user         The user name to use when trying to establish
+  username     The user name to use when trying to establish
                 the CIFS session.
    password     The user password.  If the mount helper is
                 installed, the user will be prompted for password
diff --git a/Documentation/filesystems/cifs/TODO b/Documentation/filesystems/cifs/TODO

index 066ffdd..396ecfd 100644 (file)
--- a/Documentation/filesystems/cifs/TODO
+++ b/Documentation/filesystems/cifs/TODO
@@ -1,4 +1,4 @@
-Version 2.03 August 1, 2014
+Version 2.04 September 13, 2017
  
  A Partial List of Missing Features
  ==================================
@@ -8,73 +8,69 @@ for visible, important contributions to this module.  Here
  is a partial list of the known problems and missing features:
  
  a) SMB3 (and SMB3.02) missing optional features:
-   - RDMA
+   - RDMA (started)
     - multichannel (started)
     - directory leases (improved metadata caching)
     - T10 copy offload (copy chunk is only mechanism supported)
-   - encrypted shares
  
  b) improved sparse file support
  
  c) Directory entry caching relies on a 1 second timer, rather than
-using FindNotify or equivalent.  - (started)
+using Directory Leases
  
  d) quota support (needs minor kernel change since quota calls
  to make it to network filesystems or deviceless filesystems)
  
-e) improve support for very old servers (OS/2 and Win9x for example)
-Including support for changing the time remotely (utimes command).
+e) Better optimize open to reduce redundant opens (using reference
+counts more) and to improve use of compounding in SMB3 to reduce
+number of roundtrips.
  
-f) hook lower into the sockets api (as NFS/SunRPC does) to avoid the
-extra copy in/out of the socket buffers in some cases.
-
-g) Better optimize open (and pathbased setfilesize) to reduce the
-oplock breaks coming from windows srv.  Piggyback identical file
-opens on top of each other by incrementing reference count rather
-than resending (helps reduce server resource utilization and avoid
-spurious oplock breaks).
-
-h) Add support for storing symlink info to Windows servers
-in the Extended Attribute format their SFU clients would recognize.
-
-i) Finish inotify support so kde and gnome file list windows
+f) Finish inotify support so kde and gnome file list windows
  will autorefresh (partially complete by Asser). Needs minor kernel
  vfs change to support removing D_NOTIFY on a file.   
  
-j) Add GUI tool to configure /proc/fs/cifs settings and for display of
+g) Add GUI tool to configure /proc/fs/cifs settings and for display of
  the CIFS statistics (started)
  
-k) implement support for security and trusted categories of xattrs
+h) implement support for security and trusted categories of xattrs
  (requires minor protocol extension) to enable better support for SELINUX
  
-l) Implement O_DIRECT flag on open (already supported on mount)
+i) Implement O_DIRECT flag on open (already supported on mount)
  
-m) Create UID mapping facility so server UIDs can be mapped on a per
+j) Create UID mapping facility so server UIDs can be mapped on a per
  mount or a per server basis to client UIDs or nobody if no mapping
-exists.  This is helpful when Unix extensions are negotiated to
-allow better permission checking when UIDs differ on the server
-and client.  Add new protocol request to the CIFS protocol 
-standard for asking the server for the corresponding name of a
-particular uid.
+exists. Also better integration with winbind for resolving SID owners
+
+k) Add tools to take advantage of more smb3 specific ioctls and features
+
+l) encrypted file support
+
+m) improved stats gathering, tools (perhaps integration with nfsometer?)
  
-n) DOS attrs - returned as pseudo-xattr in Samba format (check VFAT and NTFS for this too)
+n) allow setting more NTFS/SMB3 file attributes remotely (currently limited to compressed
+file attribute via chflags) and improve user space tools for managing and
+viewing them.
  
-o) mount check for unmatched uids
+o) mount helper GUI (to simplify the various configuration options on mount)
  
-p) Add support for new vfs entry point for fallocate
+p) autonegotiation of dialects (offering more than one dialect ie SMB3.02,
+SMB3, SMB2.1 not just SMB3).
  
-q) Add tools to take advantage of cifs/smb3 specific ioctls and features
-such as "CopyChunk" (fast server side file copy)
+q) Allow mount.cifs to be more verbose in reporting errors with dialect
+or unsupported feature errors.
  
-r) encrypted file support
+r) updating cifs documentation, and user guid.
  
-s) improved stats gathering, tools (perhaps integration with nfsometer?)
+s) Addressing bugs found by running a broader set of xfstests in standard
+file system xfstest suite.
  
-t) allow setting more NTFS/SMB3 file attributes remotely (currently limited to compressed
-file attribute via chflags)
+t) split cifs and smb3 support into separate modules so legacy (and less
+secure) CIFS dialect can be disabled in environments that don't need it
+and simplify the code.
  
-u) mount helper GUI (to simplify the various configuration options on mount)
+u) Finish up SMB3.1.1 dialect support
  
+v) POSIX Extensions for SMB3.1.1
  
  KNOWN BUGS
  ====================================
diff --git a/Documentation/filesystems/cifs/cifs.txt b/Documentation/filesystems/cifs/cifs.txt

index 2fac91a..6775660 100644 (file)
--- a/Documentation/filesystems/cifs/cifs.txt
+++ b/Documentation/filesystems/cifs/cifs.txt
@@ -1,24 +1,28 @@
-  This is the client VFS module for the Common Internet File System
-  (CIFS) protocol which is the successor to the Server Message Block 
+  This is the client VFS module for the SMB3 NAS protocol as well
+  older dialects such as the Common Internet File System (CIFS)
+  protocol which was the successor to the Server Message Block
    (SMB) protocol, the native file sharing mechanism for most early
    PC operating systems. New and improved versions of CIFS are now
    called SMB2 and SMB3. These dialects are also supported by the
    CIFS VFS module. CIFS is fully supported by network
-  file servers such as Windows 2000, 2003, 2008 and 2012
+  file servers such as Windows 2000, 2003, 2008, 2012 and 2016
    as well by Samba (which provides excellent CIFS
-  server support for Linux and many other operating systems), so
+  server support for Linux and many other operating systems), Apple
+  systems, as well as most Network Attached Storage vendors, so
    this network filesystem client can mount to a wide variety of
    servers.
  
    The intent of this module is to provide the most advanced network
-  file system function for CIFS compliant servers, including better
-  POSIX compliance, secure per-user session establishment, high
-  performance safe distributed caching (oplock), optional packet
+  file system function for SMB3 compliant servers, including advanced
+  security features, excellent parallelized high performance i/o, better
+  POSIX compliance, secure per-user session establishment, encryption,
+  high performance safe distributed caching (leases/oplocks), optional packet
    signing, large files, Unicode support and other internationalization
    improvements. Since both Samba server and this filesystem client support
-  the CIFS Unix extensions, the combination can provide a reasonable 
-  alternative to NFSv4 for fileserving in some Linux to Linux environments,
-  not just in Linux to Windows environments.
+  the CIFS Unix extensions (and in the future SMB3 POSIX extensions),
+  the combination can provide a reasonable alternative to other network and
+  cluster file systems for fileserving in some Linux to Linux environments,
+  not just in Linux to Windows (or Linux to Mac) environments.
  
    This filesystem has an mount utility (mount.cifs) that can be obtained from
  
diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt

index 789b74d..8781485 100644 (file)
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -337,7 +337,7 @@ Examples for low-level BPF:
    jeq #14, good           /* __NR_rt_sigprocmask */
    jeq #13, good           /* __NR_rt_sigaction */
    jeq #35, good           /* __NR_nanosleep */
-  bad: ret #0             /* SECCOMP_RET_KILL */
+  bad: ret #0             /* SECCOMP_RET_KILL_THREAD */
    good: ret #0x7fff0000   /* SECCOMP_RET_ALLOW */
  
  The above example code can be placed into a file (here called "foo"), and
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt

index b3345d0..77f4de5 100644 (file)
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1680,6 +1680,9 @@ accept_dad - INTEGER
         2: Enable DAD, and disable IPv6 operation if MAC-based duplicate
            link-local address has been found.
  
+       DAD operation and mode on a given interface will be selected according
+       to the maximum value of conf/{all,interface}/accept_dad.
+
  force_tllao - BOOLEAN
         Enable sending the target link-layer address option even when
         responding to a unicast neighbor solicitation.
@@ -1727,16 +1730,23 @@ suppress_frag_ndisc - INTEGER
  
  optimistic_dad - BOOLEAN
         Whether to perform Optimistic Duplicate Address Detection (RFC 4429).
-               0: disabled (default)
-               1: enabled
+       0: disabled (default)
+       1: enabled
+
+       Optimistic Duplicate Address Detection for the interface will be enabled
+       if at least one of conf/{all,interface}/optimistic_dad is set to 1,
+       it will be disabled otherwise.
  
  use_optimistic - BOOLEAN
         If enabled, do not classify optimistic addresses as deprecated during
         source address selection.  Preferred addresses will still be chosen
         before optimistic addresses, subject to other ranking in the source
         address selection algorithm.
-               0: disabled (default)
-               1: enabled
+       0: disabled (default)
+       1: enabled
+
+       This will be enabled if at least one of
+       conf/{all,interface}/use_optimistic is set to 1, disabled otherwise.
  
  stable_secret - IPv6 address
         This IPv6 address will be used as a secret to generate IPv6
diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt

index 5e40e1f..82236a1 100644 (file)
--- a/Documentation/networking/switchdev.txt
+++ b/Documentation/networking/switchdev.txt
@@ -13,42 +13,42 @@ an example setup using a data-center-class switch ASIC chip.  Other setups
  with SR-IOV or soft switches, such as OVS, are possible.
  
  
-                             User-space tools
-
-       user space                   |
-      +-------------------------------------------------------------------+
-       kernel                       | Netlink
-                                    |
-                     +--------------+-------------------------------+
-                     |         Network stack                        |
-                     |           (Linux)                            |
-                     |                                              |
-                     +----------------------------------------------+
+                             User-space tools
+
+       user space                   |
+      +-------------------------------------------------------------------+
+       kernel                       | Netlink
+                                    |
+                     +--------------+-------------------------------+
+                     |         Network stack                        |
+                     |           (Linux)                            |
+                     |                                              |
+                     +----------------------------------------------+
  
                             sw1p2     sw1p4     sw1p6
-                      sw1p1  +  sw1p3  +  sw1p5  +          eth1
-                        +    |    +    |    +    |            +
-                        |    |    |    |    |    |            |
-                     +--+----+----+----+-+--+----+---+  +-----+-----+
-                     |         Switch driver         |  |    mgmt   |
-                     |        (this document)        |  |   driver  |
-                     |                               |  |           |
-                     +--------------+----------------+  +-----------+
-                                    |
-       kernel                       | HW bus (eg PCI)
-      +-------------------------------------------------------------------+
-       hardware                     |
-                     +--------------+---+------------+
-                     |         Switch device (sw1)   |
-                     |  +----+                       +--------+
-                     |  |    v offloaded data path   | mgmt port
-                     |  |    |                       |
-                     +--|----|----+----+----+----+---+
-                        |    |    |    |    |    |
-                        +    +    +    +    +    +
-                       p1   p2   p3   p4   p5   p6
-
-                             front-panel ports
+                      sw1p1  +  sw1p3  +  sw1p5  +          eth1
+                        +    |    +    |    +    |            +
+                        |    |    |    |    |    |            |
+                     +--+----+----+----+----+----+---+  +-----+-----+
+                     |         Switch driver         |  |    mgmt   |
+                     |        (this document)        |  |   driver  |
+                     |                               |  |           |
+                     +--------------+----------------+  +-----------+
+                                    |
+       kernel                       | HW bus (eg PCI)
+      +-------------------------------------------------------------------+
+       hardware                     |
+                     +--------------+----------------+
+                     |         Switch device (sw1)   |
+                     |  +----+                       +--------+
+                     |  |    v offloaded data path   | mgmt port
+                     |  |    |                       |
+                     +--|----|----+----+----+----+---+
+                        |    |    |    |    |    |
+                        +    +    +    +    +    +
+                       p1   p2   p3   p4   p5   p6
+
+                             front-panel ports
  
  
                                      Fig 1.
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt

index ce61d1f..694968c 100644 (file)
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -75,6 +75,7 @@ show up in /proc/sys/kernel:
  - reboot-cmd                  [ SPARC only ]
  - rtsig-max
  - rtsig-nr
+- seccomp/                    ==> Documentation/userspace-api/seccomp_filter.rst
  - sem
  - sem_next_id                [ sysv ipc ]
  - sg-big-buff                 [ generic SCSI device (sg) ]
diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst

index f71eb5e..099c412 100644 (file)
--- a/Documentation/userspace-api/seccomp_filter.rst
+++ b/Documentation/userspace-api/seccomp_filter.rst
@@ -87,11 +87,16 @@ Return values
  A seccomp filter may return any of the following values. If multiple
  filters exist, the return value for the evaluation of a given system
  call will always use the highest precedent value. (For example,
-``SECCOMP_RET_KILL`` will always take precedence.)
+``SECCOMP_RET_KILL_PROCESS`` will always take precedence.)
  
  In precedence order, they are:
  
-``SECCOMP_RET_KILL``:
+``SECCOMP_RET_KILL_PROCESS``:
+       Results in the entire process exiting immediately without executing
+       the system call.  The exit status of the task (``status & 0x7f``)
+       will be ``SIGSYS``, not ``SIGKILL``.
+
+``SECCOMP_RET_KILL_THREAD``:
         Results in the task exiting immediately without executing the
         system call.  The exit status of the task (``status & 0x7f``) will
         be ``SIGSYS``, not ``SIGKILL``.
@@ -141,6 +146,15 @@ In precedence order, they are:
         allow use of ptrace, even of other sandboxed processes, without
         extreme care; ptracers can use this mechanism to escape.)
  
+``SECCOMP_RET_LOG``:
+       Results in the system call being executed after it is logged. This
+       should be used by application developers to learn which syscalls their
+       application needs without having to iterate through multiple test and
+       development cycles to build the list.
+
+       This action will only be logged if "log" is present in the
+       actions_logged sysctl string.
+
  ``SECCOMP_RET_ALLOW``:
         Results in the system call being executed.
  
@@ -169,7 +183,41 @@ The ``samples/seccomp/`` directory contains both an x86-specific example
  and a more generic example of a higher level macro interface for BPF
  program generation.
  
+Sysctls
+=======
  
+Seccomp's sysctl files can be found in the ``/proc/sys/kernel/seccomp/``
+directory. Here's a description of each file in that directory:
+
+``actions_avail``:
+       A read-only ordered list of seccomp return values (refer to the
+       ``SECCOMP_RET_*`` macros above) in string form. The ordering, from
+       left-to-right, is the least permissive return value to the most
+       permissive return value.
+
+       The list represents the set of seccomp return values supported
+       by the kernel. A userspace program may use this list to
+       determine if the actions found in the ``seccomp.h``, when the
+       program was built, differs from the set of actions actually
+       supported in the current running kernel.
+
+``actions_logged``:
+       A read-write ordered list of seccomp return values (refer to the
+       ``SECCOMP_RET_*`` macros above) that are allowed to be logged. Writes
+       to the file do not need to be in ordered form but reads from the file
+       will be ordered in the same way as the actions_avail sysctl.
+
+       It is important to note that the value of ``actions_logged`` does not
+       prevent certain actions from being logged when the audit subsystem is
+       configured to audit a task. If the action is not found in
+       ``actions_logged`` list, the final decision on whether to audit the
+       action for that task is ultimately left up to the audit subsystem to
+       decide for all seccomp return values other than ``SECCOMP_RET_ALLOW``.
+
+       The ``allow`` string is not accepted in the ``actions_logged`` sysctl
+       as it is not possible to log ``SECCOMP_RET_ALLOW`` actions. Attempting
+       to write ``allow`` to the sysctl will result in an EINVAL being
+       returned.
  
  Adding architecture support
  ===========================
diff --git a/MAINTAINERS b/MAINTAINERS

index 2281af4..6671f37 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -352,6 +352,18 @@ L: linux-acpi@vger.kernel.org
  S:     Maintained
  F:     drivers/acpi/arm64
  
+ACPI PMIC DRIVERS
+M:     "Rafael J. Wysocki" <rjw@rjwysocki.net>
+M:     Len Brown <lenb@kernel.org>
+R:     Andy Shevchenko <andy@infradead.org>
+R:     Mika Westerberg <mika.westerberg@linux.intel.com>
+L:     linux-acpi@vger.kernel.org
+Q:     https://patchwork.kernel.org/project/linux-acpi/list/
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+B:     https://bugzilla.kernel.org
+S:     Supported
+F:     drivers/acpi/pmic/
+
  ACPI THERMAL DRIVER
  M:     Zhang Rui <rui.zhang@intel.com>
  L:     linux-acpi@vger.kernel.org
@@ -2853,7 +2865,6 @@ S:        Supported
  F:     drivers/scsi/bnx2i/
  
  BROADCOM BNX2X 10 GIGABIT ETHERNET DRIVER
-M:     Yuval Mintz <Yuval.Mintz@cavium.com>
  M:     Ariel Elior <ariel.elior@cavium.com>
  M:     everest-linux-l2@cavium.com
  L:     netdev@vger.kernel.org
@@ -6643,8 +6654,8 @@ M:        Alexander Aring <alex.aring@gmail.com>
  M:     Stefan Schmidt <stefan@osg.samsung.com>
  L:     linux-wpan@vger.kernel.org
  W:     http://wpan.cakelab.org/
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth.git
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth-next.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/sschmidt/wpan.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/sschmidt/wpan-next.git
  S:     Maintained
  F:     net/ieee802154/
  F:     net/mac802154/
@@ -11047,7 +11058,6 @@ S:      Supported
  F:     drivers/scsi/qedi/
  
  QLOGIC QL4xxx ETHERNET DRIVER
-M:     Yuval Mintz <Yuval.Mintz@cavium.com>
  M:     Ariel Elior <Ariel.Elior@cavium.com>
  M:     everest-linux-l2@cavium.com
  L:     netdev@vger.kernel.org
diff --git a/Makefile b/Makefile

index 64cbc66..f0c5b21 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
  VERSION = 4
  PATCHLEVEL = 14
  SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
  NAME = Fearless Coyote
  
  # *DOCUMENTATION*
@@ -1172,11 +1172,11 @@ headers_check: headers_install
  
  PHONY += kselftest
  kselftest:
-       $(Q)$(MAKE) -C tools/testing/selftests run_tests
+       $(Q)$(MAKE) -C $(srctree)/tools/testing/selftests run_tests
  
  PHONY += kselftest-clean
  kselftest-clean:
-       $(Q)$(MAKE) -C tools/testing/selftests clean
+       $(Q)$(MAKE) -C $(srctree)/tools/testing/selftests clean
  
  PHONY += kselftest-merge
  kselftest-merge:
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h

index d400a21..8ee41e9 100644 (file)
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -78,9 +78,6 @@ struct task_struct;
  
  #endif
  
-#define copy_segments(tsk, mm)      do { } while (0)
-#define release_segments(mm)        do { } while (0)
-
  #define KSTK_EIP(tsk)   (task_pt_regs(tsk)->ret)
  #define KSTK_ESP(tsk)   (task_pt_regs(tsk)->sp)
  
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h

index 1d468b5..776757d 100644 (file)
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -139,11 +139,10 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
  #define TIF_NEED_RESCHED       1       /* rescheduling necessary */
  #define TIF_NOTIFY_RESUME      2       /* callback before returning to user */
  #define TIF_UPROBE             3       /* breakpointed or singlestepping */
-#define TIF_FSCHECK            4       /* Check FS is USER_DS on return */
-#define TIF_SYSCALL_TRACE      5       /* syscall trace active */
-#define TIF_SYSCALL_AUDIT      6       /* syscall auditing active */
-#define TIF_SYSCALL_TRACEPOINT 7       /* syscall tracepoint instrumentation */
-#define TIF_SECCOMP            8       /* seccomp syscall filtering active */
+#define TIF_SYSCALL_TRACE      4       /* syscall trace active */
+#define TIF_SYSCALL_AUDIT      5       /* syscall auditing active */
+#define TIF_SYSCALL_TRACEPOINT 6       /* syscall tracepoint instrumentation */
+#define TIF_SECCOMP            7       /* seccomp syscall filtering active */
  
  #define TIF_NOHZ               12      /* in adaptive nohz mode */
  #define TIF_USING_IWMMXT       17
@@ -154,7 +153,6 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
  #define _TIF_NEED_RESCHED      (1 << TIF_NEED_RESCHED)
  #define _TIF_NOTIFY_RESUME     (1 << TIF_NOTIFY_RESUME)
  #define _TIF_UPROBE            (1 << TIF_UPROBE)
-#define _TIF_FSCHECK           (1 << TIF_FSCHECK)
  #define _TIF_SYSCALL_TRACE     (1 << TIF_SYSCALL_TRACE)
  #define _TIF_SYSCALL_AUDIT     (1 << TIF_SYSCALL_AUDIT)
  #define _TIF_SYSCALL_TRACEPOINT        (1 << TIF_SYSCALL_TRACEPOINT)
@@ -168,9 +166,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
  /*
   * Change these and you break ASM code in entry-common.S
   */
-#define _TIF_WORK_MASK         (_TIF_NEED_RESCHED | _TIF_SIGPENDING |  \
-                                _TIF_NOTIFY_RESUME | _TIF_UPROBE |     \
-                                _TIF_FSCHECK)
+#define _TIF_WORK_MASK         (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
+                                _TIF_NOTIFY_RESUME | _TIF_UPROBE)
  
  #endif /* __KERNEL__ */
  #endif /* __ASM_ARM_THREAD_INFO_H */
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h

index 87936dd..0bf2347 100644 (file)
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -70,8 +70,6 @@ static inline void set_fs(mm_segment_t fs)
  {
         current_thread_info()->addr_limit = fs;
         modify_domain(DOMAIN_KERNEL, fs ? DOMAIN_CLIENT : DOMAIN_MANAGER);
-       /* On user-mode return, check fs is correct */
-       set_thread_flag(TIF_FSCHECK);
  }
  
  #define segment_eq(a, b)       ((a) == (b))
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S

index ca3614d..99c9082 100644 (file)
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -12,6 +12,7 @@
  #include <asm/unistd.h>
  #include <asm/ftrace.h>
  #include <asm/unwind.h>
+#include <asm/memory.h>
  #ifdef CONFIG_AEABI
  #include <asm/unistd-oabi.h>
  #endif
@@ -48,12 +49,14 @@ ret_fast_syscall:
   UNWIND(.fnstart       )
   UNWIND(.cantunwind    )
         disable_irq_notrace                     @ disable interrupts
+       ldr     r2, [tsk, #TI_ADDR_LIMIT]
+       cmp     r2, #TASK_SIZE
+       blne    addr_limit_check_failed
         ldr     r1, [tsk, #TI_FLAGS]            @ re-check for syscall tracing
-       tst     r1, #_TIF_SYSCALL_WORK
-       bne     fast_work_pending
-       tst     r1, #_TIF_WORK_MASK
+       tst     r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
         bne     fast_work_pending
  
+
         /* perform architecture specific actions before user return */
         arch_ret_to_user r1, lr
  
@@ -76,16 +79,16 @@ ret_fast_syscall:
   UNWIND(.cantunwind    )
         str     r0, [sp, #S_R0 + S_OFF]!        @ save returned r0
         disable_irq_notrace                     @ disable interrupts
+       ldr     r2, [tsk, #TI_ADDR_LIMIT]
+       cmp     r2, #TASK_SIZE
+       blne    addr_limit_check_failed
         ldr     r1, [tsk, #TI_FLAGS]            @ re-check for syscall tracing
-       tst     r1, #_TIF_SYSCALL_WORK
-       bne     fast_work_pending
-       tst     r1, #_TIF_WORK_MASK
+       tst     r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
         beq     no_work_pending
   UNWIND(.fnend         )
  ENDPROC(ret_fast_syscall)
  
         /* Slower path - fall through to work_pending */
-fast_work_pending:
  #endif
  
         tst     r1, #_TIF_SYSCALL_WORK
@@ -111,6 +114,9 @@ ENTRY(ret_to_user)
  ret_slow_syscall:
         disable_irq_notrace                     @ disable interrupts
  ENTRY(ret_to_user_from_irq)
+       ldr     r2, [tsk, #TI_ADDR_LIMIT]
+       cmp     r2, #TASK_SIZE
+       blne    addr_limit_check_failed
         ldr     r1, [tsk, #TI_FLAGS]
         tst     r1, #_TIF_WORK_MASK
         bne     slow_work_pending
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c

index e2de50b..b67ae12 100644 (file)
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -614,10 +614,6 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
          * Update the trace code with the current status.
          */
         trace_hardirqs_off();
-
-       /* Check valid user FS if needed */
-       addr_limit_user_check();
-
         do {
                 if (likely(thread_flags & _TIF_NEED_RESCHED)) {
                         schedule();
@@ -678,3 +674,9 @@ struct page *get_signal_page(void)
  
         return page;
  }
+
+/* Defer to generic check */
+asmlinkage void addr_limit_check_failed(void)
+{
+       addr_limit_user_check();
+}
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile

index 9b41f1e..939b310 100644 (file)
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -50,17 +50,22 @@ KBUILD_CFLAGS       += -fno-asynchronous-unwind-tables
  KBUILD_CFLAGS  += $(call cc-option, -mpc-relative-literal-loads)
  KBUILD_AFLAGS  += $(lseinstr) $(brokengasinst)
  
+KBUILD_CFLAGS  += $(call cc-option,-mabi=lp64)
+KBUILD_AFLAGS  += $(call cc-option,-mabi=lp64)
+
  ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
  KBUILD_CPPFLAGS        += -mbig-endian
  CHECKFLAGS     += -D__AARCH64EB__
  AS             += -EB
  LD             += -EB
+LDFLAGS                += -maarch64linuxb
  UTS_MACHINE    := aarch64_be
  else
  KBUILD_CPPFLAGS        += -mlittle-endian
  CHECKFLAGS     += -D__AARCH64EL__
  AS             += -EL
  LD             += -EL
+LDFLAGS                += -maarch64linux
  UTS_MACHINE    := aarch64
  endif
  
diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h

index 636c1bc..1b26629 100644 (file)
--- a/arch/arm64/include/asm/linkage.h
+++ b/arch/arm64/include/asm/linkage.h
@@ -1,7 +1,7 @@
  #ifndef __ASM_LINKAGE_H
  #define __ASM_LINKAGE_H
  
-#define __ALIGN                .align 4
-#define __ALIGN_STR    ".align 4"
+#define __ALIGN                .align 2
+#define __ALIGN_STR    ".align 2"
  
  #endif
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c

index 3a68cf3..f444f37 100644 (file)
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -321,6 +321,8 @@ void kernel_neon_end(void)
  }
  EXPORT_SYMBOL(kernel_neon_end);
  
+#ifdef CONFIG_EFI
+
  static DEFINE_PER_CPU(struct fpsimd_state, efi_fpsimd_state);
  static DEFINE_PER_CPU(bool, efi_fpsimd_state_used);
  
@@ -370,6 +372,8 @@ void __efi_fpsimd_end(void)
                 kernel_neon_end();
  }
  
+#endif /* CONFIG_EFI */
+
  #endif /* CONFIG_KERNEL_MODE_NEON */
  
  #ifdef CONFIG_CPU_PM
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c

index c45214f..0bdc96c 100644 (file)
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -751,10 +751,10 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
          */
         trace_hardirqs_off();
  
-       /* Check valid user FS if needed */
-       addr_limit_user_check();
-
         do {
+               /* Check valid user FS if needed */
+               addr_limit_user_check();
+
                 if (thread_flags & _TIF_NEED_RESCHED) {
                         schedule();
                 } else {
diff --git a/arch/c6x/include/asm/processor.h b/arch/c6x/include/asm/processor.h

index 7c87b5b..8f7cce8 100644 (file)
--- a/arch/c6x/include/asm/processor.h
+++ b/arch/c6x/include/asm/processor.h
@@ -92,9 +92,6 @@ static inline void release_thread(struct task_struct *dead_task)
  {
  }
  
-#define copy_segments(tsk, mm)         do { } while (0)
-#define release_segments(mm)           do { } while (0)
-
  /*
   * saved kernel SP and DP of a blocked thread.
   */
diff --git a/arch/frv/include/asm/processor.h b/arch/frv/include/asm/processor.h

index e4d08d7..021cce7 100644 (file)
--- a/arch/frv/include/asm/processor.h
+++ b/arch/frv/include/asm/processor.h
@@ -92,10 +92,6 @@ static inline void release_thread(struct task_struct *dead_task)
  extern asmlinkage void save_user_regs(struct user_context *target);
  extern asmlinkage void *restore_user_regs(const struct user_context *target, ...);
  
-#define copy_segments(tsk, mm)         do { } while (0)
-#define release_segments(mm)           do { } while (0)
-#define forget_segments()              do { } while (0)
-
  unsigned long get_wchan(struct task_struct *p);
  
  #define        KSTK_EIP(tsk)   ((tsk)->thread.frame0->pc)
diff --git a/arch/m32r/include/asm/processor.h b/arch/m32r/include/asm/processor.h

index 657874e..c70fa9a 100644 (file)
--- a/arch/m32r/include/asm/processor.h
+++ b/arch/m32r/include/asm/processor.h
@@ -118,14 +118,6 @@ struct mm_struct;
  /* Free all resources held by a thread. */
  extern void release_thread(struct task_struct *);
  
-/* Copy and release all segment info associated with a VM */
-extern void copy_segments(struct task_struct *p, struct mm_struct * mm);
-extern void release_segments(struct mm_struct * mm);
-
-/* Copy and release all segment info associated with a VM */
-#define copy_segments(p, mm)  do { } while (0)
-#define release_segments(mm)  do { } while (0)
-
  unsigned long get_wchan(struct task_struct *p);
  #define KSTK_EIP(tsk)  ((tsk)->thread.lr)
  #define KSTK_ESP(tsk)  ((tsk)->thread.sp)
diff --git a/arch/metag/include/asm/processor.h b/arch/metag/include/asm/processor.h

index ec6a490..8ae92d6 100644 (file)
--- a/arch/metag/include/asm/processor.h
+++ b/arch/metag/include/asm/processor.h
@@ -131,9 +131,6 @@ static inline void release_thread(struct task_struct *dead_task)
  {
  }
  
-#define copy_segments(tsk, mm)         do { } while (0)
-#define release_segments(mm)           do { } while (0)
-
  /*
   * Return saved PC of a blocked thread.
   */
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig

index 9d26abd..4f798aa 100644 (file)
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -39,7 +39,7 @@ config MICROBLAZE
  # Endianness selection
  choice
         prompt "Endianness selection"
-       default CPU_BIG_ENDIAN
+       default CPU_LITTLE_ENDIAN
         help
           microblaze architectures can be configured for either little or
           big endian formats. Be sure to select the appropriate mode.
diff --git a/arch/microblaze/include/uapi/asm/Kbuild b/arch/microblaze/include/uapi/asm/Kbuild

index e77a596..06609ca 100644 (file)
--- a/arch/microblaze/include/uapi/asm/Kbuild
+++ b/arch/microblaze/include/uapi/asm/Kbuild
@@ -7,6 +7,7 @@ generic-y += fcntl.h
  generic-y += ioctl.h
  generic-y += ioctls.h
  generic-y += ipcbuf.h
+generic-y += kvm_para.h
  generic-y += mman.h
  generic-y += msgbuf.h
  generic-y += param.h
diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c

index e45ada8..94700c5 100644 (file)
--- a/arch/microblaze/kernel/dma.c
+++ b/arch/microblaze/kernel/dma.c
@@ -165,7 +165,7 @@ int dma_direct_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
                              unsigned long attrs)
  {
  #ifdef CONFIG_MMU
-       unsigned long user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+       unsigned long user_count = vma_pages(vma);
         unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
         unsigned long off = vma->vm_pgoff;
         unsigned long pfn;
diff --git a/arch/mips/ath79/pci.c b/arch/mips/ath79/pci.c

index 730c0b0..b816cb4 100644 (file)
--- a/arch/mips/ath79/pci.c
+++ b/arch/mips/ath79/pci.c
@@ -22,10 +22,10 @@
  #include "pci.h"
  
  static int (*ath79_pci_plat_dev_init)(struct pci_dev *dev);
-static const struct ath79_pci_irq *ath79_pci_irq_map __initdata;
-static unsigned ath79_pci_nr_irqs __initdata;
+static const struct ath79_pci_irq *ath79_pci_irq_map;
+static unsigned ath79_pci_nr_irqs;
  
-static const struct ath79_pci_irq ar71xx_pci_irq_map[] __initconst = {
+static const struct ath79_pci_irq ar71xx_pci_irq_map[] = {
         {
                 .slot   = 17,
                 .pin    = 1,
@@ -41,7 +41,7 @@ static const struct ath79_pci_irq ar71xx_pci_irq_map[] __initconst = {
         }
  };
  
-static const struct ath79_pci_irq ar724x_pci_irq_map[] __initconst = {
+static const struct ath79_pci_irq ar724x_pci_irq_map[] = {
         {
                 .slot   = 0,
                 .pin    = 1,
@@ -49,7 +49,7 @@ static const struct ath79_pci_irq ar724x_pci_irq_map[] __initconst = {
         }
  };
  
-static const struct ath79_pci_irq qca955x_pci_irq_map[] __initconst = {
+static const struct ath79_pci_irq qca955x_pci_irq_map[] = {
         {
                 .bus    = 0,
                 .slot   = 0,
@@ -64,7 +64,7 @@ static const struct ath79_pci_irq qca955x_pci_irq_map[] __initconst = {
         },
  };
  
-int __init pcibios_map_irq(const struct pci_dev *dev, uint8_t slot, uint8_t pin)
+int pcibios_map_irq(const struct pci_dev *dev, uint8_t slot, uint8_t pin)
  {
         int irq = -1;
         int i;
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h

index e4ed1bc..a681092 100644 (file)
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -1377,29 +1377,32 @@ do {                                                                    \
  
  #define __write_64bit_c0_split(source, sel, val)                       \
  do {                                                                   \
+       unsigned long long __tmp;                                       \
         unsigned long __flags;                                          \
                                                                         \
         local_irq_save(__flags);                                        \
         if (sel == 0)                                                   \
                 __asm__ __volatile__(                                   \
                         ".set\tmips64\n\t"                              \
-                       "dsll\t%L0, %L0, 32\n\t"                        \
+                       "dsll\t%L0, %L1, 32\n\t"                        \
                         "dsrl\t%L0, %L0, 32\n\t"                        \
-                       "dsll\t%M0, %M0, 32\n\t"                        \
+                       "dsll\t%M0, %M1, 32\n\t"                        \
                         "or\t%L0, %L0, %M0\n\t"                         \
                         "dmtc0\t%L0, " #source "\n\t"                   \
                         ".set\tmips0"                                   \
-                       : : "r" (val));                                 \
+                       : "=&r,r" (__tmp)                               \
+                       : "r,0" (val));                                 \
         else                                                            \
                 __asm__ __volatile__(                                   \
                         ".set\tmips64\n\t"                              \
-                       "dsll\t%L0, %L0, 32\n\t"                        \
+                       "dsll\t%L0, %L1, 32\n\t"                        \
                         "dsrl\t%L0, %L0, 32\n\t"                        \
-                       "dsll\t%M0, %M0, 32\n\t"                        \
+                       "dsll\t%M0, %M1, 32\n\t"                        \
                         "or\t%L0, %L0, %M0\n\t"                         \
                         "dmtc0\t%L0, " #source ", " #sel "\n\t"         \
                         ".set\tmips0"                                   \
-                       : : "r" (val));                                 \
+                       : "=&r,r" (__tmp)                               \
+                       : "r,0" (val));                                 \
         local_irq_restore(__flags);                                     \
  } while (0)
  
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c

index 9e6c74b..6668f67 100644 (file)
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -618,8 +618,7 @@ static int mipspmu_event_init(struct perf_event *event)
                 return -ENOENT;
         }
  
-       if ((unsigned int)event->cpu >= nr_cpumask_bits ||
-           (event->cpu >= 0 && !cpu_online(event->cpu)))
+       if (event->cpu >= 0 && !cpu_online(event->cpu))
                 return -ENODEV;
  
         if (!atomic_inc_not_zero(&active_events)) {
diff --git a/arch/mips/pci/fixup-capcella.c b/arch/mips/pci/fixup-capcella.c

index 1c02f57..b4c263f 100644 (file)
--- a/arch/mips/pci/fixup-capcella.c
+++ b/arch/mips/pci/fixup-capcella.c
@@ -32,13 +32,13 @@
  #define INTC   PC104PLUS_INTC_IRQ
  #define INTD   PC104PLUS_INTD_IRQ
  
-static char irq_tab_capcella[][5] __initdata = {
+static char irq_tab_capcella[][5] = {
   [11] = { -1, INT1, INT1, INT1, INT1 },
   [12] = { -1, INT2, INT2, INT2, INT2 },
   [14] = { -1, INTA, INTB, INTC, INTD }
  };
  
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
  {
         return irq_tab_capcella[slot][pin];
  }
diff --git a/arch/mips/pci/fixup-cobalt.c b/arch/mips/pci/fixup-cobalt.c

index b3ab593..44be65c 100644 (file)
--- a/arch/mips/pci/fixup-cobalt.c
+++ b/arch/mips/pci/fixup-cobalt.c
@@ -147,7 +147,7 @@ static void qube_raq_via_board_id_fixup(struct pci_dev *dev)
  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_0,
          qube_raq_via_board_id_fixup);
  
-static char irq_tab_qube1[] __initdata = {
+static char irq_tab_qube1[] = {
    [COBALT_PCICONF_CPU]    = 0,
    [COBALT_PCICONF_ETH0]           = QUBE1_ETH0_IRQ,
    [COBALT_PCICONF_RAQSCSI] = SCSI_IRQ,
@@ -156,7 +156,7 @@ static char irq_tab_qube1[] __initdata = {
    [COBALT_PCICONF_ETH1]           = 0
  };
  
-static char irq_tab_cobalt[] __initdata = {
+static char irq_tab_cobalt[] = {
    [COBALT_PCICONF_CPU]    = 0,
    [COBALT_PCICONF_ETH0]           = ETH0_IRQ,
    [COBALT_PCICONF_RAQSCSI] = SCSI_IRQ,
@@ -165,7 +165,7 @@ static char irq_tab_cobalt[] __initdata = {
    [COBALT_PCICONF_ETH1]           = ETH1_IRQ
  };
  
-static char irq_tab_raq2[] __initdata = {
+static char irq_tab_raq2[] = {
    [COBALT_PCICONF_CPU]    = 0,
    [COBALT_PCICONF_ETH0]           = ETH0_IRQ,
    [COBALT_PCICONF_RAQSCSI] = RAQ2_SCSI_IRQ,
@@ -174,7 +174,7 @@ static char irq_tab_raq2[] __initdata = {
    [COBALT_PCICONF_ETH1]           = ETH1_IRQ
  };
  
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
  {
         if (cobalt_board_id <= COBALT_BRD_ID_QUBE1)
                 return irq_tab_qube1[slot];
diff --git a/arch/mips/pci/fixup-emma2rh.c b/arch/mips/pci/fixup-emma2rh.c

index 19caf77..c31cb6a 100644 (file)
--- a/arch/mips/pci/fixup-emma2rh.c
+++ b/arch/mips/pci/fixup-emma2rh.c
@@ -43,7 +43,7 @@
   */
  
  #define MAX_SLOT_NUM 10
-static unsigned char irq_map[][5] __initdata = {
+static unsigned char irq_map[][5] = {
         [3] = {0, MARKEINS_PCI_IRQ_INTB, MARKEINS_PCI_IRQ_INTC,
                MARKEINS_PCI_IRQ_INTD, 0,},
         [4] = {0, MARKEINS_PCI_IRQ_INTA, 0, 0, 0,},
@@ -85,7 +85,7 @@ static void emma2rh_pci_host_fixup(struct pci_dev *dev)
  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_EMMA2RH,
                          emma2rh_pci_host_fixup);
  
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
  {
         return irq_map[slot][pin];
  }
diff --git a/arch/mips/pci/fixup-fuloong2e.c b/arch/mips/pci/fixup-fuloong2e.c

index 50da773..b47c277 100644 (file)
--- a/arch/mips/pci/fixup-fuloong2e.c
+++ b/arch/mips/pci/fixup-fuloong2e.c
@@ -19,7 +19,7 @@
  /* South bridge slot number is set by the pci probe process */
  static u8 sb_slot = 5;
  
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
  {
         int irq = 0;
  
diff --git a/arch/mips/pci/fixup-ip32.c b/arch/mips/pci/fixup-ip32.c

index 133685e..c6ec18a 100644 (file)
--- a/arch/mips/pci/fixup-ip32.c
+++ b/arch/mips/pci/fixup-ip32.c
@@ -21,7 +21,7 @@
  #define INTB   MACEPCI_SHARED0_IRQ
  #define INTC   MACEPCI_SHARED1_IRQ
  #define INTD   MACEPCI_SHARED2_IRQ
-static char irq_tab_mace[][5] __initdata = {
+static char irq_tab_mace[][5] = {
        /* Dummy INT#A  INT#B  INT#C  INT#D */
         {0,         0,     0,     0,     0}, /* This is placeholder row - never used */
         {0,     SCSI0, SCSI0, SCSI0, SCSI0},
@@ -39,7 +39,7 @@ static char irq_tab_mace[][5] __initdata = {
   * irqs.  I suppose a device without a pin A will thank us for doing it
   * right if there exists such a broken piece of crap.
   */
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
  {
         return irq_tab_mace[slot][pin];
  }
diff --git a/arch/mips/pci/fixup-jmr3927.c b/arch/mips/pci/fixup-jmr3927.c

index 0f10695..d3102ee 100644 (file)
--- a/arch/mips/pci/fixup-jmr3927.c
+++ b/arch/mips/pci/fixup-jmr3927.c
@@ -31,7 +31,7 @@
  #include <asm/txx9/pci.h>
  #include <asm/txx9/jmr3927.h>
  
-int __init jmr3927_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int jmr3927_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
  {
         unsigned char irq = pin;
  
diff --git a/arch/mips/pci/fixup-lantiq.c b/arch/mips/pci/fixup-lantiq.c

index 2b5427d..81530a1 100644 (file)
--- a/arch/mips/pci/fixup-lantiq.c
+++ b/arch/mips/pci/fixup-lantiq.c
@@ -23,7 +23,7 @@ int pcibios_plat_dev_init(struct pci_dev *dev)
         return 0;
  }
  
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
  {
         return of_irq_parse_and_map_pci(dev, slot, pin);
  }
diff --git a/arch/mips/pci/fixup-lemote2f.c b/arch/mips/pci/fixup-lemote2f.c

index 95ab9a1..20cdfdc 100644 (file)
--- a/arch/mips/pci/fixup-lemote2f.c
+++ b/arch/mips/pci/fixup-lemote2f.c
@@ -30,7 +30,7 @@
  #define PCID           7
  
  /* all the pci device has the PCIA pin, check the datasheet. */
-static char irq_tab[][5] __initdata = {
+static char irq_tab[][5] = {
         /*      INTA    INTB    INTC    INTD */
         {0, 0, 0, 0, 0},        /*  11: Unused */
         {0, 0, 0, 0, 0},        /*  12: Unused */
@@ -51,7 +51,7 @@ static char irq_tab[][5] __initdata = {
         {0, 0, 0, 0, 0},        /*  27: Unused */
  };
  
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
  {
         int virq;
  
diff --git a/arch/mips/pci/fixup-loongson3.c b/arch/mips/pci/fixup-loongson3.c

index 2b6d5e1..8a741c2 100644 (file)
--- a/arch/mips/pci/fixup-loongson3.c
+++ b/arch/mips/pci/fixup-loongson3.c
@@ -32,7 +32,7 @@ static void print_fixup_info(const struct pci_dev *pdev)
                         pdev->vendor, pdev->device, pdev->irq);
  }
  
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
  {
         print_fixup_info(dev);
         return dev->irq;
diff --git a/arch/mips/pci/fixup-malta.c b/arch/mips/pci/fixup-malta.c

index 40e920c..3ec8533 100644 (file)
--- a/arch/mips/pci/fixup-malta.c
+++ b/arch/mips/pci/fixup-malta.c
@@ -12,7 +12,7 @@
  static char pci_irq[5] = {
  };
  
-static char irq_tab[][5] __initdata = {
+static char irq_tab[][5] = {
         /*      INTA    INTB    INTC    INTD */
         {0,     0,      0,      0,      0 },    /*  0: GT64120 PCI bridge */
         {0,     0,      0,      0,      0 },    /*  1: Unused */
@@ -38,7 +38,7 @@ static char irq_tab[][5] __initdata = {
         {0,     PCID,   PCIA,   PCIB,   PCIC }  /* 21: PCI Slot 4 */
  };
  
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
  {
         int virq;
         virq = irq_tab[slot][pin];
diff --git a/arch/mips/pci/fixup-mpc30x.c b/arch/mips/pci/fixup-mpc30x.c

index 8e4f828..66eaf45 100644 (file)
--- a/arch/mips/pci/fixup-mpc30x.c
+++ b/arch/mips/pci/fixup-mpc30x.c
@@ -22,19 +22,19 @@
  
  #include <asm/vr41xx/mpc30x.h>
  
-static const int internal_func_irqs[] __initconst = {
+static const int internal_func_irqs[] = {
         VRC4173_CASCADE_IRQ,
         VRC4173_AC97_IRQ,
         VRC4173_USB_IRQ,
  };
  
-static const int irq_tab_mpc30x[] __initconst = {
+static const int irq_tab_mpc30x[] = {
   [12] = VRC4173_PCMCIA1_IRQ,
   [13] = VRC4173_PCMCIA2_IRQ,
   [29] = MQ200_IRQ,
  };
  
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
  {
         if (slot == 30)
                 return internal_func_irqs[PCI_FUNC(dev->devfn)];
diff --git a/arch/mips/pci/fixup-pmcmsp.c b/arch/mips/pci/fixup-pmcmsp.c

index fab405c..4ad2ef0 100644 (file)
--- a/arch/mips/pci/fixup-pmcmsp.c
+++ b/arch/mips/pci/fixup-pmcmsp.c
@@ -47,7 +47,7 @@
  
  #if defined(CONFIG_PMC_MSP7120_GW)
  /* Garibaldi Board IRQ wiring to PCI slots */
-static char irq_tab[][5] __initdata = {
+static char irq_tab[][5] = {
         /* INTA    INTB    INTC    INTD */
         {0,     0,      0,      0,      0 },    /*    (AD[0]): Unused */
         {0,     0,      0,      0,      0 },    /*    (AD[1]): Unused */
@@ -86,7 +86,7 @@ static char irq_tab[][5] __initdata = {
  #elif defined(CONFIG_PMC_MSP7120_EVAL)
  
  /* MSP7120 Eval Board IRQ wiring to PCI slots */
-static char irq_tab[][5] __initdata = {
+static char irq_tab[][5] = {
         /* INTA    INTB    INTC    INTD */
         {0,     0,      0,      0,      0 },    /*    (AD[0]): Unused */
         {0,     0,      0,      0,      0 },    /*    (AD[1]): Unused */
@@ -125,7 +125,7 @@ static char irq_tab[][5] __initdata = {
  #else
  
  /* Unknown board -- don't assign any IRQs */
-static char irq_tab[][5] __initdata = {
+static char irq_tab[][5] = {
         /* INTA    INTB    INTC    INTD */
         {0,     0,      0,      0,      0 },    /*    (AD[0]): Unused */
         {0,     0,      0,      0,      0 },    /*    (AD[1]): Unused */
@@ -202,7 +202,7 @@ int pcibios_plat_dev_init(struct pci_dev *dev)
   *  RETURNS:    IRQ number
   *
   ****************************************************************************/
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
  {
  #if !defined(CONFIG_PMC_MSP7120_GW) && !defined(CONFIG_PMC_MSP7120_EVAL)
         printk(KERN_WARNING "PCI: unknown board, no PCI IRQs assigned.\n");
diff --git a/arch/mips/pci/fixup-rbtx4927.c b/arch/mips/pci/fixup-rbtx4927.c

index 321db26..d6aaed1 100644 (file)
--- a/arch/mips/pci/fixup-rbtx4927.c
+++ b/arch/mips/pci/fixup-rbtx4927.c
@@ -36,7 +36,7 @@
  #include <asm/txx9/pci.h>
  #include <asm/txx9/rbtx4927.h>
  
-int __init rbtx4927_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int rbtx4927_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
  {
         unsigned char irq = pin;
  
diff --git a/arch/mips/pci/fixup-rbtx4938.c b/arch/mips/pci/fixup-rbtx4938.c

index a80579a..ff22a22 100644 (file)
--- a/arch/mips/pci/fixup-rbtx4938.c
+++ b/arch/mips/pci/fixup-rbtx4938.c
@@ -13,7 +13,7 @@
  #include <asm/txx9/pci.h>
  #include <asm/txx9/rbtx4938.h>
  
-int __init rbtx4938_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int rbtx4938_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
  {
         int irq = tx4938_pcic1_map_irq(dev, slot);
  
diff --git a/arch/mips/pci/fixup-sni.c b/arch/mips/pci/fixup-sni.c

index f67ebee..adb9a58 100644 (file)
--- a/arch/mips/pci/fixup-sni.c
+++ b/arch/mips/pci/fixup-sni.c
@@ -40,7 +40,7 @@
   * seem to be a documentation error.  At least on my RM200C the Cirrus
   * Logic CL-GD5434 VGA is device 3.
   */
-static char irq_tab_rm200[8][5] __initdata = {
+static char irq_tab_rm200[8][5] = {
         /*       INTA  INTB  INTC  INTD */
         {     0,    0,    0,    0,    0 },      /* EISA bridge */
         {  SCSI, SCSI, SCSI, SCSI, SCSI },      /* SCSI */
@@ -57,7 +57,7 @@ static char irq_tab_rm200[8][5] __initdata = {
   *
   * The VGA card is optional for RM300 systems.
   */
-static char irq_tab_rm300d[8][5] __initdata = {
+static char irq_tab_rm300d[8][5] = {
         /*       INTA  INTB  INTC  INTD */
         {     0,    0,    0,    0,    0 },      /* EISA bridge */
         {  SCSI, SCSI, SCSI, SCSI, SCSI },      /* SCSI */
@@ -69,7 +69,7 @@ static char irq_tab_rm300d[8][5] __initdata = {
         {     0, INTD, INTA, INTB, INTC },      /* Slot 4 */
  };
  
-static char irq_tab_rm300e[5][5] __initdata = {
+static char irq_tab_rm300e[5][5] = {
         /*       INTA  INTB  INTC  INTD */
         {     0,    0,    0,    0,    0 },      /* HOST bridge */
         {  SCSI, SCSI, SCSI, SCSI, SCSI },      /* SCSI */
@@ -96,7 +96,7 @@ static char irq_tab_rm300e[5][5] __initdata = {
  #define INTC   PCIT_IRQ_INTC
  #define INTD   PCIT_IRQ_INTD
  
-static char irq_tab_pcit[13][5] __initdata = {
+static char irq_tab_pcit[13][5] = {
         /*       INTA  INTB  INTC  INTD */
         {     0,     0,     0,     0,     0 },  /* HOST bridge */
         { SCSI0, SCSI0, SCSI0, SCSI0, SCSI0 },  /* SCSI */
@@ -113,7 +113,7 @@ static char irq_tab_pcit[13][5] __initdata = {
         {     0,  INTA,  INTB,  INTC,  INTD },  /* Slot 5 */
  };
  
-static char irq_tab_pcit_cplus[13][5] __initdata = {
+static char irq_tab_pcit_cplus[13][5] = {
         /*       INTA  INTB  INTC  INTD */
         {     0,     0,     0,     0,     0 },  /* HOST bridge */
         {     0,  INTB,  INTC,  INTD,  INTA },  /* PCI Slot 9 */
@@ -130,7 +130,7 @@ static inline int is_rm300_revd(void)
         return (csmsr & 0xa0) == 0x20;
  }
  
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
  {
         switch (sni_brd_type) {
         case SNI_BRD_PCI_TOWER_CPLUS:
diff --git a/arch/mips/pci/fixup-tb0219.c b/arch/mips/pci/fixup-tb0219.c

index d0b0083..cc58153 100644 (file)
--- a/arch/mips/pci/fixup-tb0219.c
+++ b/arch/mips/pci/fixup-tb0219.c
@@ -23,7 +23,7 @@
  
  #include <asm/vr41xx/tb0219.h>
  
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
  {
         int irq = -1;
  
diff --git a/arch/mips/pci/fixup-tb0226.c b/arch/mips/pci/fixup-tb0226.c

index 4196ccf..b827b5c 100644 (file)
--- a/arch/mips/pci/fixup-tb0226.c
+++ b/arch/mips/pci/fixup-tb0226.c
@@ -23,7 +23,7 @@
  #include <asm/vr41xx/giu.h>
  #include <asm/vr41xx/tb0226.h>
  
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
  {
         int irq = -1;
  
diff --git a/arch/mips/pci/fixup-tb0287.c b/arch/mips/pci/fixup-tb0287.c

index 8c5039e..98f2628 100644 (file)
--- a/arch/mips/pci/fixup-tb0287.c
+++ b/arch/mips/pci/fixup-tb0287.c
@@ -22,7 +22,7 @@
  
  #include <asm/vr41xx/tb0287.h>
  
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
  {
         unsigned char bus;
         int irq = -1;
diff --git a/arch/mips/pci/pci-alchemy.c b/arch/mips/pci/pci-alchemy.c

index e99ca77..f15ec98 100644 (file)
--- a/arch/mips/pci/pci-alchemy.c
+++ b/arch/mips/pci/pci-alchemy.c
@@ -522,7 +522,7 @@ static int __init alchemy_pci_init(void)
  arch_initcall(alchemy_pci_init);
  
  
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
  {
         struct alchemy_pci_context *ctx = dev->sysdata;
         if (ctx && ctx->board_map_irq)
diff --git a/arch/mips/pci/pci-bcm47xx.c b/arch/mips/pci/pci-bcm47xx.c

index 76f16ea..230d7dd 100644 (file)
--- a/arch/mips/pci/pci-bcm47xx.c
+++ b/arch/mips/pci/pci-bcm47xx.c
@@ -28,7 +28,7 @@
  #include <linux/bcma/bcma.h>
  #include <bcm47xx.h>
  
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
  {
         return 0;
  }
diff --git a/arch/mips/pci/pci-lasat.c b/arch/mips/pci/pci-lasat.c

index 40d2797..47f4ee6 100644 (file)
--- a/arch/mips/pci/pci-lasat.c
+++ b/arch/mips/pci/pci-lasat.c
@@ -61,7 +61,7 @@ arch_initcall(lasat_pci_setup);
  #define LASAT_IRQ_PCIC  (LASAT_IRQ_BASE + 7)
  #define LASAT_IRQ_PCID  (LASAT_IRQ_BASE + 8)
  
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
  {
         switch (slot) {
         case 1:
diff --git a/arch/mips/pci/pci-mt7620.c b/arch/mips/pci/pci-mt7620.c

index 4e633c1..90fba9b 100644 (file)
--- a/arch/mips/pci/pci-mt7620.c
+++ b/arch/mips/pci/pci-mt7620.c
@@ -361,7 +361,7 @@ static int mt7620_pci_probe(struct platform_device *pdev)
         return 0;
  }
  
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
  {
         u16 cmd;
         u32 val;
diff --git a/arch/mips/pci/pci-octeon.c b/arch/mips/pci/pci-octeon.c

index 9ee0193..3e92a06 100644 (file)
--- a/arch/mips/pci/pci-octeon.c
+++ b/arch/mips/pci/pci-octeon.c
@@ -59,8 +59,7 @@ union octeon_pci_address {
         } s;
  };
  
-int __initconst (*octeon_pcibios_map_irq)(const struct pci_dev *dev,
-                                        u8 slot, u8 pin);
+int (*octeon_pcibios_map_irq)(const struct pci_dev *dev, u8 slot, u8 pin);
  enum octeon_dma_bar_type octeon_dma_bar_type = OCTEON_DMA_BAR_TYPE_INVALID;
  
  /**
@@ -74,7 +73,7 @@ enum octeon_dma_bar_type octeon_dma_bar_type = OCTEON_DMA_BAR_TYPE_INVALID;
   *              as it goes through each bridge.
   * Returns Interrupt number for the device
   */
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
  {
         if (octeon_pcibios_map_irq)
                 return octeon_pcibios_map_irq(dev, slot, pin);
diff --git a/arch/mips/pci/pci-rt2880.c b/arch/mips/pci/pci-rt2880.c

index d6360fe..711cdcc 100644 (file)
--- a/arch/mips/pci/pci-rt2880.c
+++ b/arch/mips/pci/pci-rt2880.c
@@ -181,7 +181,7 @@ static inline void rt2880_pci_write_u32(unsigned long reg, u32 val)
         spin_unlock_irqrestore(&rt2880_pci_lock, flags);
  }
  
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
  {
         u16 cmd;
         int irq = -1;
diff --git a/arch/mips/pci/pci-rt3883.c b/arch/mips/pci/pci-rt3883.c

index 04f8ea9..958899f 100644 (file)
--- a/arch/mips/pci/pci-rt3883.c
+++ b/arch/mips/pci/pci-rt3883.c
@@ -564,7 +564,7 @@ err_put_intc_node:
         return err;
  }
  
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
  {
         return of_irq_parse_and_map_pci(dev, slot, pin);
  }
diff --git a/arch/mips/pci/pci-tx4938.c b/arch/mips/pci/pci-tx4938.c

index 000c0e1..a641846 100644 (file)
--- a/arch/mips/pci/pci-tx4938.c
+++ b/arch/mips/pci/pci-tx4938.c
@@ -112,7 +112,7 @@ int __init tx4938_pciclk66_setup(void)
         return pciclk;
  }
  
-int __init tx4938_pcic1_map_irq(const struct pci_dev *dev, u8 slot)
+int tx4938_pcic1_map_irq(const struct pci_dev *dev, u8 slot)
  {
         if (get_tx4927_pcicptr(dev->bus->sysdata) == tx4938_pcic1ptr) {
                 switch (slot) {
diff --git a/arch/mips/pci/pci-tx4939.c b/arch/mips/pci/pci-tx4939.c

index 9d6acc0..09a65f7 100644 (file)
--- a/arch/mips/pci/pci-tx4939.c
+++ b/arch/mips/pci/pci-tx4939.c
@@ -48,7 +48,7 @@ void __init tx4939_report_pci1clk(void)
                 ((pciclk + 50000) / 100000) % 10);
  }
  
-int __init tx4939_pcic1_map_irq(const struct pci_dev *dev, u8 slot)
+int tx4939_pcic1_map_irq(const struct pci_dev *dev, u8 slot)
  {
         if (get_tx4927_pcicptr(dev->bus->sysdata) == tx4939_pcic1ptr) {
                 switch (slot) {
@@ -68,7 +68,7 @@ int __init tx4939_pcic1_map_irq(const struct pci_dev *dev, u8 slot)
         return -1;
  }
  
-int __init tx4939_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int tx4939_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
  {
         int irq = tx4939_pcic1_map_irq(dev, slot);
  
diff --git a/arch/mips/pci/pci-xlp.c b/arch/mips/pci/pci-xlp.c

index 7babf01..9eff913 100644 (file)
--- a/arch/mips/pci/pci-xlp.c
+++ b/arch/mips/pci/pci-xlp.c
@@ -205,7 +205,7 @@ int xlp_socdev_to_node(const struct pci_dev *lnkdev)
                 return PCI_SLOT(lnkdev->devfn) / 8;
  }
  
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
  {
         struct pci_dev *lnkdev;
         int lnkfunc, node;
diff --git a/arch/mips/pci/pci-xlr.c b/arch/mips/pci/pci-xlr.c

index 26d2dab..2a1c81a 100644 (file)
--- a/arch/mips/pci/pci-xlr.c
+++ b/arch/mips/pci/pci-xlr.c
@@ -315,7 +315,7 @@ static void xls_pcie_ack_b(struct irq_data *d)
         }
  }
  
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
  {
         return get_irq_vector(dev);
  }
diff --git a/arch/mips/pci/pcie-octeon.c b/arch/mips/pci/pcie-octeon.c

index ad3584d..fd28874 100644 (file)
--- a/arch/mips/pci/pcie-octeon.c
+++ b/arch/mips/pci/pcie-octeon.c
@@ -1464,8 +1464,7 @@ static int cvmx_pcie_rc_initialize(int pcie_port)
   *              as it goes through each bridge.
   * Returns Interrupt number for the device
   */
-int __init octeon_pcie_pcibios_map_irq(const struct pci_dev *dev,
-                                      u8 slot, u8 pin)
+int octeon_pcie_pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
  {
         /*
          * The EBH5600 board with the PCI to PCIe bridge mistakenly
diff --git a/arch/mips/pmcs-msp71xx/msp_smp.c b/arch/mips/pmcs-msp71xx/msp_smp.c

index ffa0f71..2b08242 100644 (file)
--- a/arch/mips/pmcs-msp71xx/msp_smp.c
+++ b/arch/mips/pmcs-msp71xx/msp_smp.c
@@ -22,6 +22,8 @@
  #include <linux/smp.h>
  #include <linux/interrupt.h>
  
+#include <asm/setup.h>
+
  #ifdef CONFIG_MIPS_MT_SMP
  #define MIPS_CPU_IPI_RESCHED_IRQ 0     /* SW int 0 for resched */
  #define MIPS_CPU_IPI_CALL_IRQ 1                /* SW int 1 for call */
diff --git a/arch/mips/txx9/generic/pci.c b/arch/mips/txx9/generic/pci.c

index 0bd2a1e..fb99872 100644 (file)
--- a/arch/mips/txx9/generic/pci.c
+++ b/arch/mips/txx9/generic/pci.c
@@ -386,9 +386,10 @@ int pcibios_plat_dev_init(struct pci_dev *dev)
         return 0;
  }
  
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+static int (*txx9_pci_map_irq)(const struct pci_dev *dev, u8 slot, u8 pin);
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
  {
-       return txx9_board_vec->pci_map_irq(dev, slot, pin);
+       return txx9_pci_map_irq(dev, slot, pin);
  }
  
  char * (*txx9_board_pcibios_setup)(char *str) __initdata;
@@ -424,5 +425,8 @@ char *__init txx9_pcibios_setup(char *str)
                         txx9_pci_err_action = TXX9_PCI_ERR_IGNORE;
                 return NULL;
         }
+
+       txx9_pci_map_irq = txx9_board_vec->pci_map_irq;
+
         return str;
  }
diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c

index 89e8027..7c475fd 100644 (file)
--- a/arch/mn10300/kernel/process.c
+++ b/arch/mn10300/kernel/process.c
@@ -59,10 +59,6 @@ void arch_cpu_idle(void)
  }
  #endif
  
-void release_segments(struct mm_struct *mm)
-{
-}
-
  void machine_restart(char *cmd)
  {
  #ifdef CONFIG_KERNEL_DEBUGGER
@@ -112,14 +108,6 @@ void release_thread(struct task_struct *dead_task)
  {
  }
  
-/*
- * we do not have to muck with descriptors here, that is
- * done in switch_mm() as needed.
- */
-void copy_segments(struct task_struct *p, struct mm_struct *new_mm)
-{
-}
-
  /*
   * this gets called so that we can store lazy state into memory and copy the
   * current task into the new thread.
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig

index ba7b7dd..a57dedb 100644 (file)
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -257,6 +257,18 @@ config PARISC_PAGE_SIZE_64KB
  
  endchoice
  
+config PARISC_SELF_EXTRACT
+       bool "Build kernel as self-extracting executable"
+       default y
+       help
+         Say Y if you want to build the parisc kernel as a kind of
+         self-extracting executable.
+
+         If you say N here, the kernel will be compressed with gzip
+         which can be loaded by the palo bootloader directly too.
+
+         If you don't know what to do here, say Y.
+
  config SMP
         bool "Symmetric multi-processing support"
         ---help---
diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile

index 58fae5d..01946eb 100644 (file)
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -129,8 +129,13 @@ Image: vmlinux
  bzImage: vmlinux
         $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
  
+ifdef CONFIG_PARISC_SELF_EXTRACT
  vmlinuz: bzImage
         $(OBJCOPY) $(boot)/bzImage $@
+else
+vmlinuz: vmlinux
+       @gzip -cf -9 $< > $@
+endif
  
  install:
         $(CONFIG_SHELL) $(src)/arch/parisc/install.sh \
diff --git a/arch/parisc/boot/compressed/Makefile b/arch/parisc/boot/compressed/Makefile

index 5450a11..7d7e594 100644 (file)
--- a/arch/parisc/boot/compressed/Makefile
+++ b/arch/parisc/boot/compressed/Makefile
@@ -15,7 +15,7 @@ targets += misc.o piggy.o sizes.h head.o real2.o firmware.o
  KBUILD_CFLAGS := -D__KERNEL__ -O2 -DBOOTLOADER
  KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
  KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks
-KBUILD_CFLAGS += -fno-PIE -mno-space-regs -mdisable-fpregs
+KBUILD_CFLAGS += -fno-PIE -mno-space-regs -mdisable-fpregs -Os
  ifndef CONFIG_64BIT
  KBUILD_CFLAGS += -mfast-indirect-calls
  endif
diff --git a/arch/parisc/boot/compressed/misc.c b/arch/parisc/boot/compressed/misc.c

index 13a4bf9..9345b44 100644 (file)
--- a/arch/parisc/boot/compressed/misc.c
+++ b/arch/parisc/boot/compressed/misc.c
@@ -24,7 +24,8 @@
  /* Symbols defined by linker scripts */
  extern char input_data[];
  extern int input_len;
-extern __le32 output_len;      /* at unaligned address, little-endian */
+/* output_len is inserted by the linker possibly at an unaligned address */
+extern __le32 output_len __aligned(1);
  extern char _text, _end;
  extern char _bss, _ebss;
  extern char _startcode_end;
diff --git a/arch/parisc/include/asm/pdc.h b/arch/parisc/include/asm/pdc.h

index 26b4455..510341f 100644 (file)
--- a/arch/parisc/include/asm/pdc.h
+++ b/arch/parisc/include/asm/pdc.h
@@ -280,6 +280,7 @@ void setup_pdc(void);               /* in inventory.c */
  /* wrapper-functions from pdc.c */
  
  int pdc_add_valid(unsigned long address);
+int pdc_instr(unsigned int *instr);
  int pdc_chassis_info(struct pdc_chassis_info *chassis_info, void *led_info, unsigned long len);
  int pdc_chassis_disp(unsigned long disp);
  int pdc_chassis_warn(unsigned long *warn);
diff --git a/arch/parisc/include/asm/smp.h b/arch/parisc/include/asm/smp.h

index a5dc906..ad9c9c3 100644 (file)
--- a/arch/parisc/include/asm/smp.h
+++ b/arch/parisc/include/asm/smp.h
@@ -1,6 +1,7 @@
  #ifndef __ASM_SMP_H
  #define __ASM_SMP_H
  
+extern int init_per_cpu(int cpuid);
  
  #if defined(CONFIG_SMP)
  
diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c

index ab80e5c..6d471c0 100644 (file)
--- a/arch/parisc/kernel/firmware.c
+++ b/arch/parisc/kernel/firmware.c
@@ -232,6 +232,26 @@ int pdc_add_valid(unsigned long address)
  }
  EXPORT_SYMBOL(pdc_add_valid);
  
+/**
+ * pdc_instr - Get instruction that invokes PDCE_CHECK in HPMC handler.
+ * @instr: Pointer to variable which will get instruction opcode.
+ *
+ * The return value is PDC_OK (0) in case call succeeded.
+ */
+int __init pdc_instr(unsigned int *instr)
+{
+       int retval;
+       unsigned long flags;
+
+       spin_lock_irqsave(&pdc_lock, flags);
+       retval = mem_pdc_call(PDC_INSTR, 0UL, __pa(pdc_result));
+       convert_to_wide(pdc_result);
+       *instr = pdc_result[0];
+       spin_unlock_irqrestore(&pdc_lock, flags);
+
+       return retval;
+}
+
  /**
   * pdc_chassis_info - Return chassis information.
   * @result: The return buffer.
diff --git a/arch/parisc/kernel/pdt.c b/arch/parisc/kernel/pdt.c

index 05730a8..00aed08 100644 (file)
--- a/arch/parisc/kernel/pdt.c
+++ b/arch/parisc/kernel/pdt.c
@@ -15,6 +15,7 @@
  #include <linux/memblock.h>
  #include <linux/seq_file.h>
  #include <linux/kthread.h>
+#include <linux/initrd.h>
  
  #include <asm/pdc.h>
  #include <asm/pdcpat.h>
@@ -216,8 +217,16 @@ void __init pdc_pdt_init(void)
         }
  
         for (i = 0; i < pdt_status.pdt_entries; i++) {
+               unsigned long addr;
+
                 report_mem_err(pdt_entry[i]);
  
+               addr = pdt_entry[i] & PDT_ADDR_PHYS_MASK;
+               if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) &&
+                       addr >= initrd_start && addr < initrd_end)
+                       pr_crit("CRITICAL: initrd possibly broken "
+                               "due to bad memory!\n");
+
                 /* mark memory page bad */
                 memblock_reserve(pdt_entry[i] & PAGE_MASK, PAGE_SIZE);
         }
diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c

index a778bd3..e120d63 100644 (file)
--- a/arch/parisc/kernel/processor.c
+++ b/arch/parisc/kernel/processor.c
@@ -317,7 +317,7 @@ void __init collect_boot_cpu_data(void)
   *
   * o Enable CPU profiling hooks.
   */
-int init_per_cpu(int cpunum)
+int __init init_per_cpu(int cpunum)
  {
         int ret;
         struct pdc_coproc_cfg coproc_cfg;
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c

index dee6f9d..f7d0c3b 100644 (file)
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -38,6 +38,7 @@
  #include <linux/export.h>
  #include <linux/sched.h>
  #include <linux/sched/clock.h>
+#include <linux/start_kernel.h>
  
  #include <asm/processor.h>
  #include <asm/sections.h>
@@ -48,6 +49,7 @@
  #include <asm/io.h>
  #include <asm/setup.h>
  #include <asm/unwind.h>
+#include <asm/smp.h>
  
  static char __initdata command_line[COMMAND_LINE_SIZE];
  
@@ -115,7 +117,6 @@ void __init dma_ops_init(void)
  }
  #endif
  
-extern int init_per_cpu(int cpuid);
  extern void collect_boot_cpu_data(void);
  
  void __init setup_arch(char **cmdline_p)
@@ -398,9 +399,8 @@ static int __init parisc_init(void)
  }
  arch_initcall(parisc_init);
  
-void start_parisc(void)
+void __init start_parisc(void)
  {
-       extern void start_kernel(void);
         extern void early_trap_init(void);
  
         int ret, cpunum;
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c

index 6336510..30c28ab 100644 (file)
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -255,12 +255,11 @@ void arch_send_call_function_single_ipi(int cpu)
  static void __init
  smp_cpu_init(int cpunum)
  {
-       extern int init_per_cpu(int);  /* arch/parisc/kernel/processor.c */
         extern void init_IRQ(void);    /* arch/parisc/kernel/irq.c */
         extern void start_cpu_itimer(void); /* arch/parisc/kernel/time.c */
  
         /* Set modes and Enable floating point coprocessor */
-       (void) init_per_cpu(cpunum);
+       init_per_cpu(cpunum);
  
         disable_sr_hashing();
  
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c

index 991654c..2303331 100644 (file)
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -817,7 +817,7 @@ void __init initialize_ivt(const void *iva)
         u32 check = 0;
         u32 *ivap;
         u32 *hpmcp;
-       u32 length;
+       u32 length, instr;
  
         if (strcmp((const char *)iva, "cows can fly"))
                 panic("IVT invalid");
@@ -827,6 +827,14 @@ void __init initialize_ivt(const void *iva)
         for (i = 0; i < 8; i++)
             *ivap++ = 0;
  
+       /*
+        * Use PDC_INSTR firmware function to get instruction that invokes
+        * PDCE_CHECK in HPMC handler.  See programming note at page 1-31 of
+        * the PA 1.1 Firmware Architecture document.
+        */
+       if (pdc_instr(&instr) == PDC_OK)
+               ivap[0] = instr;
+
         /* Compute Checksum for HPMC handler */
         length = os_hpmc_size;
         ivap[7] = length;
diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c

index 48dc7d4..caab39d 100644 (file)
--- a/arch/parisc/kernel/unwind.c
+++ b/arch/parisc/kernel/unwind.c
@@ -14,6 +14,7 @@
  #include <linux/slab.h>
  #include <linux/kallsyms.h>
  #include <linux/sort.h>
+#include <linux/sched.h>
  
  #include <linux/uaccess.h>
  #include <asm/assembly.h>
@@ -279,6 +280,17 @@ static void unwind_frame_regs(struct unwind_frame_info *info)
  
                         info->prev_sp = sp - 64;
                         info->prev_ip = 0;
+
+                       /* The stack is at the end inside the thread_union
+                        * struct. If we reach data, we have reached the
+                        * beginning of the stack and should stop unwinding. */
+                       if (info->prev_sp >= (unsigned long) task_thread_info(info->t) &&
+                           info->prev_sp < ((unsigned long) task_thread_info(info->t)
+                                               + THREAD_SZ_ALGN)) {
+                               info->prev_sp = 0;
+                               break;
+                       }
+
                         if (get_user(tmp, (unsigned long *)(info->prev_sp - RP_OFFSET))) 
                                 break;
                         info->prev_ip = tmp;
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c

index 5b101f6..e247edb 100644 (file)
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -17,6 +17,7 @@
  #include <linux/interrupt.h>
  #include <linux/extable.h>
  #include <linux/uaccess.h>
+#include <linux/hugetlb.h>
  
  #include <asm/traps.h>
  
@@ -261,7 +262,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long code,
         struct task_struct *tsk;
         struct mm_struct *mm;
         unsigned long acc_type;
-       int fault;
+       int fault = 0;
         unsigned int flags;
  
         if (faulthandler_disabled())
@@ -315,7 +316,8 @@ good_area:
                         goto out_of_memory;
                 else if (fault & VM_FAULT_SIGSEGV)
                         goto bad_area;
-               else if (fault & VM_FAULT_SIGBUS)
+               else if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
+                                 VM_FAULT_HWPOISON_LARGE))
                         goto bad_area;
                 BUG();
         }
@@ -352,8 +354,7 @@ bad_area:
  
         if (user_mode(regs)) {
                 struct siginfo si;
-
-               show_signal_msg(regs, code, address, tsk, vma);
+               unsigned int lsb = 0;
  
                 switch (code) {
                 case 15:        /* Data TLB miss fault/Data page fault */
@@ -386,6 +387,30 @@ bad_area:
                         si.si_code = (code == 26) ? SEGV_ACCERR : SEGV_MAPERR;
                         break;
                 }
+
+#ifdef CONFIG_MEMORY_FAILURE
+               if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) {
+                       printk(KERN_ERR
+       "MCE: Killing %s:%d due to hardware memory corruption fault at %08lx\n",
+                       tsk->comm, tsk->pid, address);
+                       si.si_signo = SIGBUS;
+                       si.si_code = BUS_MCEERR_AR;
+               }
+#endif
+
+               /*
+                * Either small page or large page may be poisoned.
+                * In other words, VM_FAULT_HWPOISON_LARGE and
+                * VM_FAULT_HWPOISON are mutually exclusive.
+                */
+               if (fault & VM_FAULT_HWPOISON_LARGE)
+                       lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
+               else if (fault & VM_FAULT_HWPOISON)
+                       lsb = PAGE_SHIFT;
+               else
+                       show_signal_msg(regs, code, address, tsk, vma);
+               si.si_addr_lsb = lsb;
+
                 si.si_errno = 0;
                 si.si_addr = (void __user *) address;
                 force_sig_info(si.si_signo, &si, current);
diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig

index e084fa5..063817f 100644 (file)
--- a/arch/powerpc/configs/g5_defconfig
+++ b/arch/powerpc/configs/g5_defconfig
@@ -138,10 +138,11 @@ CONFIG_FRAMEBUFFER_CONSOLE=y
  CONFIG_LOGO=y
  CONFIG_SOUND=m
  CONFIG_SND=m
-CONFIG_SND_SEQUENCER=m
+CONFIG_SND_OSSEMUL=y
  CONFIG_SND_MIXER_OSS=m
  CONFIG_SND_PCM_OSS=m
-CONFIG_SND_SEQUENCER_OSS=y
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQUENCER_OSS=m
  CONFIG_SND_POWERMAC=m
  CONFIG_SND_AOA=m
  CONFIG_SND_AOA_FABRIC_LAYOUT=m
diff --git a/arch/powerpc/configs/gamecube_defconfig b/arch/powerpc/configs/gamecube_defconfig

index 79bbc82..805b0f8 100644 (file)
--- a/arch/powerpc/configs/gamecube_defconfig
+++ b/arch/powerpc/configs/gamecube_defconfig
@@ -64,11 +64,12 @@ CONFIG_LOGO=y
  # CONFIG_LOGO_LINUX_CLUT224 is not set
  CONFIG_SOUND=y
  CONFIG_SND=y
-CONFIG_SND_SEQUENCER=y
+CONFIG_SND_OSSEMUL=y
  CONFIG_SND_MIXER_OSS=y
  CONFIG_SND_PCM_OSS=y
-CONFIG_SND_SEQUENCER_OSS=y
  # CONFIG_SND_VERBOSE_PROCFS is not set
+CONFIG_SND_SEQUENCER=y
+CONFIG_SND_SEQUENCER_OSS=y
  # CONFIG_USB_SUPPORT is not set
  CONFIG_RTC_CLASS=y
  CONFIG_RTC_DRV_GENERIC=y
diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig

index 8cf4a46..6daa56f 100644 (file)
--- a/arch/powerpc/configs/pasemi_defconfig
+++ b/arch/powerpc/configs/pasemi_defconfig
@@ -115,9 +115,10 @@ CONFIG_VGACON_SOFT_SCROLLBACK=y
  CONFIG_LOGO=y
  CONFIG_SOUND=y
  CONFIG_SND=y
-CONFIG_SND_SEQUENCER=y
+CONFIG_SND_OSSEMUL=y
  CONFIG_SND_MIXER_OSS=y
  CONFIG_SND_PCM_OSS=y
+CONFIG_SND_SEQUENCER=y
  CONFIG_SND_SEQUENCER_OSS=y
  CONFIG_SND_USB_AUDIO=y
  CONFIG_SND_USB_USX2Y=y
diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig

index 8e798b1..1aab9a6 100644 (file)
--- a/arch/powerpc/configs/pmac32_defconfig
+++ b/arch/powerpc/configs/pmac32_defconfig
@@ -227,11 +227,12 @@ CONFIG_FRAMEBUFFER_CONSOLE=y
  CONFIG_LOGO=y
  CONFIG_SOUND=m
  CONFIG_SND=m
-CONFIG_SND_SEQUENCER=m
-CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_OSSEMUL=y
  CONFIG_SND_MIXER_OSS=m
  CONFIG_SND_PCM_OSS=m
-CONFIG_SND_SEQUENCER_OSS=y
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_SEQUENCER_OSS=m
  CONFIG_SND_DUMMY=m
  CONFIG_SND_POWERMAC=m
  CONFIG_SND_AOA=m
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig

index 791db77..6ddca80 100644 (file)
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -222,11 +222,12 @@ CONFIG_FRAMEBUFFER_CONSOLE=y
  CONFIG_LOGO=y
  CONFIG_SOUND=m
  CONFIG_SND=m
-CONFIG_SND_SEQUENCER=m
-CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_OSSEMUL=y
  CONFIG_SND_MIXER_OSS=m
  CONFIG_SND_PCM_OSS=m
-CONFIG_SND_SEQUENCER_OSS=y
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_SEQUENCER_OSS=m
  CONFIG_SND_POWERMAC=m
  CONFIG_SND_AOA=m
  CONFIG_SND_AOA_FABRIC_LAYOUT=m
diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig

index d0fe0f8..41d85cb 100644 (file)
--- a/arch/powerpc/configs/ppc64e_defconfig
+++ b/arch/powerpc/configs/ppc64e_defconfig
@@ -141,11 +141,12 @@ CONFIG_FRAMEBUFFER_CONSOLE=y
  CONFIG_LOGO=y
  CONFIG_SOUND=m
  CONFIG_SND=m
-CONFIG_SND_SEQUENCER=m
-CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_OSSEMUL=y
  CONFIG_SND_MIXER_OSS=m
  CONFIG_SND_PCM_OSS=m
-CONFIG_SND_SEQUENCER_OSS=y
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_SEQUENCER_OSS=m
  CONFIG_HID_DRAGONRISE=y
  CONFIG_HID_GYRATION=y
  CONFIG_HID_TWINHAN=y
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig

index ae6eba4..da0e8d5 100644 (file)
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -789,17 +789,18 @@ CONFIG_LOGO=y
  # CONFIG_LOGO_LINUX_VGA16 is not set
  CONFIG_SOUND=m
  CONFIG_SND=m
-CONFIG_SND_SEQUENCER=m
-CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_OSSEMUL=y
  CONFIG_SND_MIXER_OSS=m
  CONFIG_SND_PCM_OSS=m
-CONFIG_SND_SEQUENCER_OSS=y
  CONFIG_SND_DYNAMIC_MINORS=y
  # CONFIG_SND_SUPPORT_OLD_API is not set
  CONFIG_SND_VERBOSE_PRINTK=y
  CONFIG_SND_DEBUG=y
  CONFIG_SND_DEBUG_VERBOSE=y
  CONFIG_SND_PCM_XRUN_DEBUG=y
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_SEQUENCER_OSS=m
  CONFIG_SND_DUMMY=m
  CONFIG_SND_VIRMIDI=m
  CONFIG_SND_MTPAV=m
diff --git a/arch/powerpc/configs/wii_defconfig b/arch/powerpc/configs/wii_defconfig

index aef41b1..9c7400a 100644 (file)
--- a/arch/powerpc/configs/wii_defconfig
+++ b/arch/powerpc/configs/wii_defconfig
@@ -79,11 +79,12 @@ CONFIG_FB=y
  CONFIG_FRAMEBUFFER_CONSOLE=y
  CONFIG_SOUND=y
  CONFIG_SND=y
-CONFIG_SND_SEQUENCER=y
+CONFIG_SND_OSSEMUL=y
  CONFIG_SND_MIXER_OSS=y
  CONFIG_SND_PCM_OSS=y
-CONFIG_SND_SEQUENCER_OSS=y
  # CONFIG_SND_VERBOSE_PROCFS is not set
+CONFIG_SND_SEQUENCER=y
+CONFIG_SND_SEQUENCER_OSS=y
  CONFIG_HID_APPLE=m
  CONFIG_HID_WACOM=m
  CONFIG_MMC=y
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c

index 9e81678..116000b 100644 (file)
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1019,6 +1019,10 @@ int eeh_init(void)
         } else if ((ret = eeh_ops->init()))
                 return ret;
  
+       /* Initialize PHB PEs */
+       list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
+               eeh_dev_phb_init_dynamic(hose);
+
         /* Initialize EEH event */
         ret = eeh_event_init();
         if (ret)
diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c

index ad04ecd..a34e691 100644 (file)
--- a/arch/powerpc/kernel/eeh_dev.c
+++ b/arch/powerpc/kernel/eeh_dev.c
@@ -78,21 +78,3 @@ void eeh_dev_phb_init_dynamic(struct pci_controller *phb)
         /* EEH PE for PHB */
         eeh_phb_pe_create(phb);
  }
-
-/**
- * eeh_dev_phb_init - Create EEH devices for devices included in existing PHBs
- *
- * Scan all the existing PHBs and create EEH devices for their OF
- * nodes and their children OF nodes
- */
-static int __init eeh_dev_phb_init(void)
-{
-       struct pci_controller *phb, *tmp;
-
-       list_for_each_entry_safe(phb, tmp, &hose_list, list_node)
-               eeh_dev_phb_init_dynamic(phb);
-
-       return 0;
-}
-
-core_initcall(eeh_dev_phb_init);
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c

index 6f8273f..91e037a 100644 (file)
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -104,8 +104,10 @@ static unsigned long can_optimize(struct kprobe *p)
          * and that can be emulated.
          */
         if (!is_conditional_branch(*p->ainsn.insn) &&
-                       analyse_instr(&op, &regs, *p->ainsn.insn))
+                       analyse_instr(&op, &regs, *p->ainsn.insn) == 1) {
+               emulate_update_regs(&regs, &op);
                 nip = regs.nip;
+       }
  
         return nip;
  }
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c

index 07cd22e..f52ad5b 100644 (file)
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -131,7 +131,7 @@ static void flush_tmregs_to_thread(struct task_struct *tsk)
          * in the appropriate thread structures from live.
          */
  
-       if (tsk != current)
+       if ((!cpu_has_feature(CPU_FTR_TM)) || (tsk != current))
                 return;
  
         if (MSR_TM_SUSPENDED(mfmsr())) {
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c

index ec74e20..13c9dcd 100644 (file)
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -437,6 +437,7 @@ static inline int check_io_access(struct pt_regs *regs)
  int machine_check_e500mc(struct pt_regs *regs)
  {
         unsigned long mcsr = mfspr(SPRN_MCSR);
+       unsigned long pvr = mfspr(SPRN_PVR);
         unsigned long reason = mcsr;
         int recoverable = 1;
  
@@ -478,8 +479,15 @@ int machine_check_e500mc(struct pt_regs *regs)
                  * may still get logged and cause a machine check.  We should
                  * only treat the non-write shadow case as non-recoverable.
                  */
-               if (!(mfspr(SPRN_L1CSR2) & L1CSR2_DCWS))
-                       recoverable = 0;
+               /* On e6500 core, L1 DCWS (Data cache write shadow mode) bit
+                * is not implemented but L1 data cache always runs in write
+                * shadow mode. Hence on data cache parity errors HW will
+                * automatically invalidate the L1 Data Cache.
+                */
+               if (PVR_VER(pvr) != PVR_VER_E6500) {
+                       if (!(mfspr(SPRN_L1CSR2) & L1CSR2_DCWS))
+                               recoverable = 0;
+               }
         }
  
         if (reason & MCSR_L2MMU_MHIT) {
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S

index 17936f8..ec69fa4 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1121,6 +1121,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
  BEGIN_FTR_SECTION
         mtspr   SPRN_PPR, r0
  END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+/* Move canary into DSISR to check for later */
+BEGIN_FTR_SECTION
+       li      r0, 0x7fff
+       mtspr   SPRN_HDSISR, r0
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+
         ld      r0, VCPU_GPR(R0)(r4)
         ld      r4, VCPU_GPR(R4)(r4)
  
@@ -1956,9 +1963,14 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
  kvmppc_hdsi:
         ld      r3, VCPU_KVM(r9)
         lbz     r0, KVM_RADIX(r3)
-       cmpwi   r0, 0
         mfspr   r4, SPRN_HDAR
         mfspr   r6, SPRN_HDSISR
+BEGIN_FTR_SECTION
+       /* Look for DSISR canary. If we find it, retry instruction */
+       cmpdi   r6, 0x7fff
+       beq     6f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+       cmpwi   r0, 0
         bne     .Lradix_hdsi            /* on radix, just save DAR/DSISR/ASDR */
         /* HPTE not found fault or protection fault? */
         andis.  r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c

index fb9f58b..5e8418c 100644 (file)
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -944,9 +944,9 @@ NOKPROBE_SYMBOL(emulate_dcbz);
                 : "r" (addr), "i" (-EFAULT), "0" (err))
  
  static nokprobe_inline void set_cr0(const struct pt_regs *regs,
-                                   struct instruction_op *op, int rd)
+                                   struct instruction_op *op)
  {
-       long val = regs->gpr[rd];
+       long val = op->val;
  
         op->type |= SETCC;
         op->ccval = (regs->ccr & 0x0fffffff) | ((regs->xer >> 3) & 0x10000000);
@@ -1326,7 +1326,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
         case 13:        /* addic. */
                 imm = (short) instr;
                 add_with_carry(regs, op, rd, regs->gpr[ra], imm, 0);
-               set_cr0(regs, op, rd);
+               set_cr0(regs, op);
                 return 1;
  
         case 14:        /* addi */
@@ -1397,13 +1397,13 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
  
         case 28:        /* andi. */
                 op->val = regs->gpr[rd] & (unsigned short) instr;
-               set_cr0(regs, op, ra);
+               set_cr0(regs, op);
                 goto logical_done_nocc;
  
         case 29:        /* andis. */
                 imm = (unsigned short) instr;
                 op->val = regs->gpr[rd] & (imm << 16);
-               set_cr0(regs, op, ra);
+               set_cr0(regs, op);
                 goto logical_done_nocc;
  
  #ifdef __powerpc64__
@@ -1513,10 +1513,10 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
                         op->type = COMPUTE + SETCC;
                         imm = 0xf0000000UL;
                         val = regs->gpr[rd];
-                       op->val = regs->ccr;
+                       op->ccval = regs->ccr;
                         for (sh = 0; sh < 8; ++sh) {
                                 if (instr & (0x80000 >> sh))
-                                       op->val = (op->val & ~imm) |
+                                       op->ccval = (op->ccval & ~imm) |
                                                 (val & imm);
                                 imm >>= 4;
                         }
@@ -1651,8 +1651,9 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
                         goto arith_done;
  
                 case 235:       /* mullw */
-                       op->val = (unsigned int) regs->gpr[ra] *
-                               (unsigned int) regs->gpr[rb];
+                       op->val = (long)(int) regs->gpr[ra] *
+                               (int) regs->gpr[rb];
+
                         goto arith_done;
  
                 case 266:       /* add */
@@ -2526,7 +2527,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
  
   logical_done:
         if (instr & 1)
-               set_cr0(regs, op, ra);
+               set_cr0(regs, op);
   logical_done_nocc:
         op->reg = ra;
         op->type |= SETREG;
@@ -2534,7 +2535,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
  
   arith_done:
         if (instr & 1)
-               set_cr0(regs, op, rd);
+               set_cr0(regs, op);
   compute_done:
         op->reg = rd;
         op->type |= SETREG;
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c

index 2e3eb74..9e3da16 100644 (file)
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -793,6 +793,11 @@ void perf_event_print_debug(void)
         u32 pmcs[MAX_HWEVENTS];
         int i;
  
+       if (!ppmu) {
+               pr_info("Performance monitor hardware not registered.\n");
+               return;
+       }
+
         if (!ppmu->n_counter)
                 return;
  
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c

index 9f59041..443d5ca 100644 (file)
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -393,7 +393,13 @@ static void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
         u64 pir = get_hard_smp_processor_id(cpu);
  
         mtspr(SPRN_LPCR, lpcr_val);
-       opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
+
+       /*
+        * Program the LPCR via stop-api only if the deepest stop state
+        * can lose hypervisor context.
+        */
+       if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT)
+               opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
  }
  
  /*
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c

index 783f363..e45b5f1 100644 (file)
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -266,7 +266,6 @@ int dlpar_attach_node(struct device_node *dn, struct device_node *parent)
                 return rc;
         }
  
-       of_node_put(dn->parent);
         return 0;
  }
  
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c

index fc0d8f9..fadb95e 100644 (file)
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -462,15 +462,19 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
         }
  
         dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
-       of_node_put(parent);
         if (!dn) {
                 pr_warn("Failed call to configure-connector, drc index: %x\n",
                         drc_index);
                 dlpar_release_drc(drc_index);
+               of_node_put(parent);
                 return -EINVAL;
         }
  
         rc = dlpar_attach_node(dn, parent);
+
+       /* Regardless we are done with parent now */
+       of_node_put(parent);
+
         if (rc) {
                 saved_rc = rc;
                 pr_warn("Failed to attach node %s, rc: %d, drc index: %x\n",
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c

index 210ce63..f7042ad 100644 (file)
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -226,8 +226,10 @@ static int add_dt_node(__be32 parent_phandle, __be32 drc_index)
                 return -ENOENT;
  
         dn = dlpar_configure_connector(drc_index, parent_dn);
-       if (!dn)
+       if (!dn) {
+               of_node_put(parent_dn);
                 return -ENOENT;
+       }
  
         rc = dlpar_attach_node(dn, parent_dn);
         if (rc)
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h

index dce708e..20e75a2 100644 (file)
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1507,7 +1507,9 @@ static inline pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma,
  static inline void pmdp_invalidate(struct vm_area_struct *vma,
                                    unsigned long addr, pmd_t *pmdp)
  {
-       pmdp_xchg_direct(vma->vm_mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
+       pmd_t pmd = __pmd(pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID);
+
+       pmdp_xchg_direct(vma->vm_mm, addr, pmdp, pmd);
  }
  
  #define __HAVE_ARCH_PMDP_SET_WRPROTECT
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c

index ca8cd80..60181ca 100644 (file)
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -404,18 +404,6 @@ static inline void save_vector_registers(void)
  #endif
  }
  
-static int __init topology_setup(char *str)
-{
-       bool enabled;
-       int rc;
-
-       rc = kstrtobool(str, &enabled);
-       if (!rc && !enabled)
-               S390_lowcore.machine_flags &= ~MACHINE_FLAG_TOPOLOGY;
-       return rc;
-}
-early_param("topology", topology_setup);
-
  static int __init disable_vector_extension(char *str)
  {
         S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c

index c1bf75f..7e1e403 100644 (file)
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -823,9 +823,12 @@ static int cpumsf_pmu_event_init(struct perf_event *event)
         }
  
         /* Check online status of the CPU to which the event is pinned */
-       if ((unsigned int)event->cpu >= nr_cpumask_bits ||
-           (event->cpu >= 0 && !cpu_online(event->cpu)))
-               return -ENODEV;
+       if (event->cpu >= 0) {
+               if ((unsigned int)event->cpu >= nr_cpumask_bits)
+                       return -ENODEV;
+               if (!cpu_online(event->cpu))
+                       return -ENODEV;
+       }
  
         /* Force reset of idle/hv excludes regardless of what the
          * user requested.
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c

index bb47c92..ed0bdd2 100644 (file)
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -8,6 +8,8 @@
  
  #include <linux/workqueue.h>
  #include <linux/bootmem.h>
+#include <linux/uaccess.h>
+#include <linux/sysctl.h>
  #include <linux/cpuset.h>
  #include <linux/device.h>
  #include <linux/export.h>
@@ -29,12 +31,20 @@
  #define PTF_VERTICAL   (1UL)
  #define PTF_CHECK      (2UL)
  
+enum {
+       TOPOLOGY_MODE_HW,
+       TOPOLOGY_MODE_SINGLE,
+       TOPOLOGY_MODE_PACKAGE,
+       TOPOLOGY_MODE_UNINITIALIZED
+};
+
  struct mask_info {
         struct mask_info *next;
         unsigned char id;
         cpumask_t mask;
  };
  
+static int topology_mode = TOPOLOGY_MODE_UNINITIALIZED;
  static void set_topology_timer(void);
  static void topology_work_fn(struct work_struct *work);
  static struct sysinfo_15_1_x *tl_info;
@@ -59,11 +69,26 @@ static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu)
         cpumask_t mask;
  
         cpumask_copy(&mask, cpumask_of(cpu));
-       if (!MACHINE_HAS_TOPOLOGY)
-               return mask;
-       for (; info; info = info->next) {
-               if (cpumask_test_cpu(cpu, &info->mask))
-                       return info->mask;
+       switch (topology_mode) {
+       case TOPOLOGY_MODE_HW:
+               while (info) {
+                       if (cpumask_test_cpu(cpu, &info->mask)) {
+                               mask = info->mask;
+                               break;
+                       }
+                       info = info->next;
+               }
+               if (cpumask_empty(&mask))
+                       cpumask_copy(&mask, cpumask_of(cpu));
+               break;
+       case TOPOLOGY_MODE_PACKAGE:
+               cpumask_copy(&mask, cpu_present_mask);
+               break;
+       default:
+               /* fallthrough */
+       case TOPOLOGY_MODE_SINGLE:
+               cpumask_copy(&mask, cpumask_of(cpu));
+               break;
         }
         return mask;
  }
@@ -74,7 +99,7 @@ static cpumask_t cpu_thread_map(unsigned int cpu)
         int i;
  
         cpumask_copy(&mask, cpumask_of(cpu));
-       if (!MACHINE_HAS_TOPOLOGY)
+       if (topology_mode != TOPOLOGY_MODE_HW)
                 return mask;
         cpu -= cpu % (smp_cpu_mtid + 1);
         for (i = 0; i <= smp_cpu_mtid; i++)
@@ -184,10 +209,8 @@ static void topology_update_polarization_simple(void)
  {
         int cpu;
  
-       mutex_lock(&smp_cpu_state_mutex);
         for_each_possible_cpu(cpu)
                 smp_cpu_set_polarization(cpu, POLARIZATION_HRZ);
-       mutex_unlock(&smp_cpu_state_mutex);
  }
  
  static int ptf(unsigned long fc)
@@ -223,7 +246,7 @@ int topology_set_cpu_management(int fc)
  static void update_cpu_masks(void)
  {
         struct cpu_topology_s390 *topo;
-       int cpu;
+       int cpu, id;
  
         for_each_possible_cpu(cpu) {
                 topo = &cpu_topology[cpu];
@@ -231,12 +254,13 @@ static void update_cpu_masks(void)
                 topo->core_mask = cpu_group_map(&socket_info, cpu);
                 topo->book_mask = cpu_group_map(&book_info, cpu);
                 topo->drawer_mask = cpu_group_map(&drawer_info, cpu);
-               if (!MACHINE_HAS_TOPOLOGY) {
+               if (topology_mode != TOPOLOGY_MODE_HW) {
+                       id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu;
                         topo->thread_id = cpu;
                         topo->core_id = cpu;
-                       topo->socket_id = cpu;
-                       topo->book_id = cpu;
-                       topo->drawer_id = cpu;
+                       topo->socket_id = id;
+                       topo->book_id = id;
+                       topo->drawer_id = id;
                         if (cpu_present(cpu))
                                 cpumask_set_cpu(cpu, &cpus_with_topology);
                 }
@@ -254,6 +278,7 @@ static int __arch_update_cpu_topology(void)
         struct sysinfo_15_1_x *info = tl_info;
         int rc = 0;
  
+       mutex_lock(&smp_cpu_state_mutex);
         cpumask_clear(&cpus_with_topology);
         if (MACHINE_HAS_TOPOLOGY) {
                 rc = 1;
@@ -263,6 +288,7 @@ static int __arch_update_cpu_topology(void)
         update_cpu_masks();
         if (!MACHINE_HAS_TOPOLOGY)
                 topology_update_polarization_simple();
+       mutex_unlock(&smp_cpu_state_mutex);
         return rc;
  }
  
@@ -289,6 +315,11 @@ void topology_schedule_update(void)
         schedule_work(&topology_work);
  }
  
+static void topology_flush_work(void)
+{
+       flush_work(&topology_work);
+}
+
  static void topology_timer_fn(unsigned long ignored)
  {
         if (ptf(PTF_CHECK))
@@ -459,6 +490,12 @@ void __init topology_init_early(void)
         struct sysinfo_15_1_x *info;
  
         set_sched_topology(s390_topology);
+       if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) {
+               if (MACHINE_HAS_TOPOLOGY)
+                       topology_mode = TOPOLOGY_MODE_HW;
+               else
+                       topology_mode = TOPOLOGY_MODE_SINGLE;
+       }
         if (!MACHINE_HAS_TOPOLOGY)
                 goto out;
         tl_info = memblock_virt_alloc(PAGE_SIZE, PAGE_SIZE);
@@ -474,12 +511,97 @@ out:
         __arch_update_cpu_topology();
  }
  
+static inline int topology_get_mode(int enabled)
+{
+       if (!enabled)
+               return TOPOLOGY_MODE_SINGLE;
+       return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE;
+}
+
+static inline int topology_is_enabled(void)
+{
+       return topology_mode != TOPOLOGY_MODE_SINGLE;
+}
+
+static int __init topology_setup(char *str)
+{
+       bool enabled;
+       int rc;
+
+       rc = kstrtobool(str, &enabled);
+       if (rc)
+               return rc;
+       topology_mode = topology_get_mode(enabled);
+       return 0;
+}
+early_param("topology", topology_setup);
+
+static int topology_ctl_handler(struct ctl_table *ctl, int write,
+                               void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       unsigned int len;
+       int new_mode;
+       char buf[2];
+
+       if (!*lenp || *ppos) {
+               *lenp = 0;
+               return 0;
+       }
+       if (!write) {
+               strncpy(buf, topology_is_enabled() ? "1\n" : "0\n",
+                       ARRAY_SIZE(buf));
+               len = strnlen(buf, ARRAY_SIZE(buf));
+               if (len > *lenp)
+                       len = *lenp;
+               if (copy_to_user(buffer, buf, len))
+                       return -EFAULT;
+               goto out;
+       }
+       len = *lenp;
+       if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len))
+               return -EFAULT;
+       if (buf[0] != '0' && buf[0] != '1')
+               return -EINVAL;
+       mutex_lock(&smp_cpu_state_mutex);
+       new_mode = topology_get_mode(buf[0] == '1');
+       if (topology_mode != new_mode) {
+               topology_mode = new_mode;
+               topology_schedule_update();
+       }
+       mutex_unlock(&smp_cpu_state_mutex);
+       topology_flush_work();
+out:
+       *lenp = len;
+       *ppos += len;
+       return 0;
+}
+
+static struct ctl_table topology_ctl_table[] = {
+       {
+               .procname       = "topology",
+               .mode           = 0644,
+               .proc_handler   = topology_ctl_handler,
+       },
+       { },
+};
+
+static struct ctl_table topology_dir_table[] = {
+       {
+               .procname       = "s390",
+               .maxlen         = 0,
+               .mode           = 0555,
+               .child          = topology_ctl_table,
+       },
+       { },
+};
+
  static int __init topology_init(void)
  {
         if (MACHINE_HAS_TOPOLOGY)
                 set_topology_timer();
         else
                 topology_update_polarization_simple();
+       register_sysctl_table(topology_dir_table);
         return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching);
  }
  device_initcall(topology_init);
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c

index 8ecc25e..98ffe3e 100644 (file)
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -56,13 +56,12 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
  static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
                 unsigned long end, int write, struct page **pages, int *nr)
  {
-       unsigned long mask, result;
         struct page *head, *page;
+       unsigned long mask;
         int refs;
  
-       result = write ? 0 : _SEGMENT_ENTRY_PROTECT;
-       mask = result | _SEGMENT_ENTRY_INVALID;
-       if ((pmd_val(pmd) & mask) != result)
+       mask = (write ? _SEGMENT_ENTRY_PROTECT : 0) | _SEGMENT_ENTRY_INVALID;
+       if ((pmd_val(pmd) & mask) != 0)
                 return 0;
         VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT));
  
diff --git a/arch/sh/include/asm/processor_32.h b/arch/sh/include/asm/processor_32.h

index 18e0377..88ce1e2 100644 (file)
--- a/arch/sh/include/asm/processor_32.h
+++ b/arch/sh/include/asm/processor_32.h
@@ -136,10 +136,6 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_pc, unsigned lo
  /* Free all resources held by a thread. */
  extern void release_thread(struct task_struct *);
  
-/* Copy and release all segment info associated with a VM */
-#define copy_segments(p, mm)   do { } while(0)
-#define release_segments(mm)   do { } while(0)
-
  /*
   * FPU lazy state save handling.
   */
diff --git a/arch/sh/include/asm/processor_64.h b/arch/sh/include/asm/processor_64.h

index eedd4f6..777a163 100644 (file)
--- a/arch/sh/include/asm/processor_64.h
+++ b/arch/sh/include/asm/processor_64.h
@@ -170,10 +170,6 @@ struct mm_struct;
  /* Free all resources held by a thread. */
  extern void release_thread(struct task_struct *);
  
-/* Copy and release all segment info associated with a VM */
-#define copy_segments(p, mm)   do { } while (0)
-#define release_segments(mm)   do { } while (0)
-#define forget_segments()      do { } while (0)
  /*
   * FPU lazy state save handling.
   */
diff --git a/arch/tile/configs/tilegx_defconfig b/arch/tile/configs/tilegx_defconfig

index 0d925fa..9f94435 100644 (file)
--- a/arch/tile/configs/tilegx_defconfig
+++ b/arch/tile/configs/tilegx_defconfig
@@ -409,5 +409,4 @@ CONFIG_CRYPTO_SEED=m
  CONFIG_CRYPTO_SERPENT=m
  CONFIG_CRYPTO_TEA=m
  CONFIG_CRYPTO_TWOFISH=m
-CONFIG_CRYPTO_ZLIB=m
  CONFIG_CRYPTO_LZO=m
diff --git a/arch/tile/configs/tilepro_defconfig b/arch/tile/configs/tilepro_defconfig

index 149d8e8..1c5bd4f 100644 (file)
--- a/arch/tile/configs/tilepro_defconfig
+++ b/arch/tile/configs/tilepro_defconfig
@@ -189,7 +189,6 @@ CONFIG_IP_NF_MATCH_ECN=m
  CONFIG_IP_NF_MATCH_TTL=m
  CONFIG_IP_NF_FILTER=y
  CONFIG_IP_NF_TARGET_REJECT=y
-CONFIG_IP_NF_TARGET_ULOG=m
  CONFIG_IP_NF_MANGLE=m
  CONFIG_IP_NF_TARGET_ECN=m
  CONFIG_IP_NF_TARGET_TTL=m
@@ -521,7 +520,6 @@ CONFIG_CRYPTO_SEED=m
  CONFIG_CRYPTO_SERPENT=m
  CONFIG_CRYPTO_TEA=m
  CONFIG_CRYPTO_TWOFISH=m
-CONFIG_CRYPTO_ZLIB=m
  CONFIG_CRYPTO_LZO=m
  CONFIG_CRC_CCITT=m
  CONFIG_CRC7=m
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c

index 6becb96..ad83c1e 100644 (file)
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -140,7 +140,7 @@ static int __init setup_maxnodemem(char *str)
  {
         char *endp;
         unsigned long long maxnodemem;
-       long node;
+       unsigned long node;
  
         node = str ? simple_strtoul(str, &endp, 0) : INT_MAX;
         if (node >= MAX_NUMNODES || *endp != ':')
diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h

index f6d1a3f..86942a4 100644 (file)
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -58,11 +58,6 @@ static inline void release_thread(struct task_struct *task)
  {
  }
  
-static inline void mm_copy_segments(struct mm_struct *from_mm,
-                                   struct mm_struct *new_mm)
-{
-}
-
  #define init_stack     (init_thread_union.stack)
  
  /*
diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S

index 246c670..8c1fcb6 100644 (file)
--- a/arch/x86/crypto/blowfish-x86_64-asm_64.S
+++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S
@@ -33,7 +33,7 @@
  #define s3     ((16 + 2 + (3 * 256)) * 4)
  
  /* register macros */
-#define CTX %rdi
+#define CTX %r12
  #define RIO %rsi
  
  #define RX0 %rax
@@ -56,12 +56,12 @@
  #define RX2bh %ch
  #define RX3bh %dh
  
-#define RT0 %rbp
+#define RT0 %rdi
  #define RT1 %rsi
  #define RT2 %r8
  #define RT3 %r9
  
-#define RT0d %ebp
+#define RT0d %edi
  #define RT1d %esi
  #define RT2d %r8d
  #define RT3d %r9d
@@ -120,13 +120,14 @@
  
  ENTRY(__blowfish_enc_blk)
         /* input:
-        *      %rdi: ctx, CTX
+        *      %rdi: ctx
          *      %rsi: dst
          *      %rdx: src
          *      %rcx: bool, if true: xor output
          */
-       movq %rbp, %r11;
+       movq %r12, %r11;
  
+       movq %rdi, CTX;
         movq %rsi, %r10;
         movq %rdx, RIO;
  
@@ -142,7 +143,7 @@ ENTRY(__blowfish_enc_blk)
         round_enc(14);
         add_roundkey_enc(16);
  
-       movq %r11, %rbp;
+       movq %r11, %r12;
  
         movq %r10, RIO;
         test %cl, %cl;
@@ -157,12 +158,13 @@ ENDPROC(__blowfish_enc_blk)
  
  ENTRY(blowfish_dec_blk)
         /* input:
-        *      %rdi: ctx, CTX
+        *      %rdi: ctx
          *      %rsi: dst
          *      %rdx: src
          */
-       movq %rbp, %r11;
+       movq %r12, %r11;
  
+       movq %rdi, CTX;
         movq %rsi, %r10;
         movq %rdx, RIO;
  
@@ -181,7 +183,7 @@ ENTRY(blowfish_dec_blk)
         movq %r10, RIO;
         write_block();
  
-       movq %r11, %rbp;
+       movq %r11, %r12;
  
         ret;
  ENDPROC(blowfish_dec_blk)
@@ -298,20 +300,21 @@ ENDPROC(blowfish_dec_blk)
  
  ENTRY(__blowfish_enc_blk_4way)
         /* input:
-        *      %rdi: ctx, CTX
+        *      %rdi: ctx
          *      %rsi: dst
          *      %rdx: src
          *      %rcx: bool, if true: xor output
          */
-       pushq %rbp;
+       pushq %r12;
         pushq %rbx;
         pushq %rcx;
  
-       preload_roundkey_enc(0);
-
+       movq %rdi, CTX
         movq %rsi, %r11;
         movq %rdx, RIO;
  
+       preload_roundkey_enc(0);
+
         read_block4();
  
         round_enc4(0);
@@ -324,39 +327,40 @@ ENTRY(__blowfish_enc_blk_4way)
         round_enc4(14);
         add_preloaded_roundkey4();
  
-       popq %rbp;
+       popq %r12;
         movq %r11, RIO;
  
-       test %bpl, %bpl;
+       test %r12b, %r12b;
         jnz .L__enc_xor4;
  
         write_block4();
  
         popq %rbx;
-       popq %rbp;
+       popq %r12;
         ret;
  
  .L__enc_xor4:
         xor_block4();
  
         popq %rbx;
-       popq %rbp;
+       popq %r12;
         ret;
  ENDPROC(__blowfish_enc_blk_4way)
  
  ENTRY(blowfish_dec_blk_4way)
         /* input:
-        *      %rdi: ctx, CTX
+        *      %rdi: ctx
          *      %rsi: dst
          *      %rdx: src
          */
-       pushq %rbp;
+       pushq %r12;
         pushq %rbx;
-       preload_roundkey_dec(17);
  
-       movq %rsi, %r11;
+       movq %rdi, CTX;
+       movq %rsi, %r11
         movq %rdx, RIO;
  
+       preload_roundkey_dec(17);
         read_block4();
  
         round_dec4(17);
@@ -373,7 +377,7 @@ ENTRY(blowfish_dec_blk_4way)
         write_block4();
  
         popq %rbx;
-       popq %rbp;
+       popq %r12;
  
         ret;
  ENDPROC(blowfish_dec_blk_4way)
diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S

index 310319c..95ba695 100644 (file)
--- a/arch/x86/crypto/camellia-x86_64-asm_64.S
+++ b/arch/x86/crypto/camellia-x86_64-asm_64.S
@@ -75,17 +75,17 @@
  #define RCD1bh %dh
  
  #define RT0 %rsi
-#define RT1 %rbp
+#define RT1 %r12
  #define RT2 %r8
  
  #define RT0d %esi
-#define RT1d %ebp
+#define RT1d %r12d
  #define RT2d %r8d
  
  #define RT2bl %r8b
  
  #define RXOR %r9
-#define RRBP %r10
+#define RR12 %r10
  #define RDST %r11
  
  #define RXORd %r9d
@@ -197,7 +197,7 @@ ENTRY(__camellia_enc_blk)
          *      %rdx: src
          *      %rcx: bool xor
          */
-       movq %rbp, RRBP;
+       movq %r12, RR12;
  
         movq %rcx, RXOR;
         movq %rsi, RDST;
@@ -227,13 +227,13 @@ ENTRY(__camellia_enc_blk)
  
         enc_outunpack(mov, RT1);
  
-       movq RRBP, %rbp;
+       movq RR12, %r12;
         ret;
  
  .L__enc_xor:
         enc_outunpack(xor, RT1);
  
-       movq RRBP, %rbp;
+       movq RR12, %r12;
         ret;
  ENDPROC(__camellia_enc_blk)
  
@@ -248,7 +248,7 @@ ENTRY(camellia_dec_blk)
         movl $24, RXORd;
         cmovel RXORd, RT2d; /* max */
  
-       movq %rbp, RRBP;
+       movq %r12, RR12;
         movq %rsi, RDST;
         movq %rdx, RIO;
  
@@ -271,7 +271,7 @@ ENTRY(camellia_dec_blk)
  
         dec_outunpack();
  
-       movq RRBP, %rbp;
+       movq RR12, %r12;
         ret;
  ENDPROC(camellia_dec_blk)
  
@@ -433,7 +433,7 @@ ENTRY(__camellia_enc_blk_2way)
          */
         pushq %rbx;
  
-       movq %rbp, RRBP;
+       movq %r12, RR12;
         movq %rcx, RXOR;
         movq %rsi, RDST;
         movq %rdx, RIO;
@@ -461,14 +461,14 @@ ENTRY(__camellia_enc_blk_2way)
  
         enc_outunpack2(mov, RT2);
  
-       movq RRBP, %rbp;
+       movq RR12, %r12;
         popq %rbx;
         ret;
  
  .L__enc2_xor:
         enc_outunpack2(xor, RT2);
  
-       movq RRBP, %rbp;
+       movq RR12, %r12;
         popq %rbx;
         ret;
  ENDPROC(__camellia_enc_blk_2way)
@@ -485,7 +485,7 @@ ENTRY(camellia_dec_blk_2way)
         cmovel RXORd, RT2d; /* max */
  
         movq %rbx, RXOR;
-       movq %rbp, RRBP;
+       movq %r12, RR12;
         movq %rsi, RDST;
         movq %rdx, RIO;
  
@@ -508,7 +508,7 @@ ENTRY(camellia_dec_blk_2way)
  
         dec_outunpack2();
  
-       movq RRBP, %rbp;
+       movq RR12, %r12;
         movq RXOR, %rbx;
         ret;
  ENDPROC(camellia_dec_blk_2way)
diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S

index b4a8806..86107c9 100644 (file)
--- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
@@ -47,7 +47,7 @@
  /**********************************************************************
    16-way AVX cast5
   **********************************************************************/
-#define CTX %rdi
+#define CTX %r15
  
  #define RL1 %xmm0
  #define RR1 %xmm1
@@ -70,8 +70,8 @@
  
  #define RTMP %xmm15
  
-#define RID1  %rbp
-#define RID1d %ebp
+#define RID1  %rdi
+#define RID1d %edi
  #define RID2  %rsi
  #define RID2d %esi
  
@@ -226,7 +226,7 @@
  .align 16
  __cast5_enc_blk16:
         /* input:
-        *      %rdi: ctx, CTX
+        *      %rdi: ctx
          *      RL1: blocks 1 and 2
          *      RR1: blocks 3 and 4
          *      RL2: blocks 5 and 6
@@ -246,9 +246,11 @@ __cast5_enc_blk16:
          *      RR4: encrypted blocks 15 and 16
          */
  
-       pushq %rbp;
+       pushq %r15;
         pushq %rbx;
  
+       movq %rdi, CTX;
+
         vmovdqa .Lbswap_mask, RKM;
         vmovd .Lfirst_mask, R1ST;
         vmovd .L32_mask, R32;
@@ -283,7 +285,7 @@ __cast5_enc_blk16:
  
  .L__skip_enc:
         popq %rbx;
-       popq %rbp;
+       popq %r15;
  
         vmovdqa .Lbswap_mask, RKM;
  
@@ -298,7 +300,7 @@ ENDPROC(__cast5_enc_blk16)
  .align 16
  __cast5_dec_blk16:
         /* input:
-        *      %rdi: ctx, CTX
+        *      %rdi: ctx
          *      RL1: encrypted blocks 1 and 2
          *      RR1: encrypted blocks 3 and 4
          *      RL2: encrypted blocks 5 and 6
@@ -318,9 +320,11 @@ __cast5_dec_blk16:
          *      RR4: decrypted blocks 15 and 16
          */
  
-       pushq %rbp;
+       pushq %r15;
         pushq %rbx;
  
+       movq %rdi, CTX;
+
         vmovdqa .Lbswap_mask, RKM;
         vmovd .Lfirst_mask, R1ST;
         vmovd .L32_mask, R32;
@@ -356,7 +360,7 @@ __cast5_dec_blk16:
  
         vmovdqa .Lbswap_mask, RKM;
         popq %rbx;
-       popq %rbp;
+       popq %r15;
  
         outunpack_blocks(RR1, RL1, RTMP, RX, RKM);
         outunpack_blocks(RR2, RL2, RTMP, RX, RKM);
@@ -372,12 +376,14 @@ ENDPROC(__cast5_dec_blk16)
  
  ENTRY(cast5_ecb_enc_16way)
         /* input:
-        *      %rdi: ctx, CTX
+        *      %rdi: ctx
          *      %rsi: dst
          *      %rdx: src
          */
         FRAME_BEGIN
+       pushq %r15;
  
+       movq %rdi, CTX;
         movq %rsi, %r11;
  
         vmovdqu (0*4*4)(%rdx), RL1;
@@ -400,18 +406,22 @@ ENTRY(cast5_ecb_enc_16way)
         vmovdqu RR4, (6*4*4)(%r11);
         vmovdqu RL4, (7*4*4)(%r11);
  
+       popq %r15;
         FRAME_END
         ret;
  ENDPROC(cast5_ecb_enc_16way)
  
  ENTRY(cast5_ecb_dec_16way)
         /* input:
-        *      %rdi: ctx, CTX
+        *      %rdi: ctx
          *      %rsi: dst
          *      %rdx: src
          */
  
         FRAME_BEGIN
+       pushq %r15;
+
+       movq %rdi, CTX;
         movq %rsi, %r11;
  
         vmovdqu (0*4*4)(%rdx), RL1;
@@ -434,20 +444,22 @@ ENTRY(cast5_ecb_dec_16way)
         vmovdqu RR4, (6*4*4)(%r11);
         vmovdqu RL4, (7*4*4)(%r11);
  
+       popq %r15;
         FRAME_END
         ret;
  ENDPROC(cast5_ecb_dec_16way)
  
  ENTRY(cast5_cbc_dec_16way)
         /* input:
-        *      %rdi: ctx, CTX
+        *      %rdi: ctx
          *      %rsi: dst
          *      %rdx: src
          */
         FRAME_BEGIN
-
         pushq %r12;
+       pushq %r15;
  
+       movq %rdi, CTX;
         movq %rsi, %r11;
         movq %rdx, %r12;
  
@@ -483,23 +495,24 @@ ENTRY(cast5_cbc_dec_16way)
         vmovdqu RR4, (6*16)(%r11);
         vmovdqu RL4, (7*16)(%r11);
  
+       popq %r15;
         popq %r12;
-
         FRAME_END
         ret;
  ENDPROC(cast5_cbc_dec_16way)
  
  ENTRY(cast5_ctr_16way)
         /* input:
-        *      %rdi: ctx, CTX
+        *      %rdi: ctx
          *      %rsi: dst
          *      %rdx: src
          *      %rcx: iv (big endian, 64bit)
          */
         FRAME_BEGIN
-
         pushq %r12;
+       pushq %r15;
  
+       movq %rdi, CTX;
         movq %rsi, %r11;
         movq %rdx, %r12;
  
@@ -558,8 +571,8 @@ ENTRY(cast5_ctr_16way)
         vmovdqu RR4, (6*16)(%r11);
         vmovdqu RL4, (7*16)(%r11);
  
+       popq %r15;
         popq %r12;
-
         FRAME_END
         ret;
  ENDPROC(cast5_ctr_16way)
diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S

index 952d315..7f30b6f 100644 (file)
--- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
@@ -47,7 +47,7 @@
  /**********************************************************************
    8-way AVX cast6
   **********************************************************************/
-#define CTX %rdi
+#define CTX %r15
  
  #define RA1 %xmm0
  #define RB1 %xmm1
@@ -70,8 +70,8 @@
  
  #define RTMP %xmm15
  
-#define RID1  %rbp
-#define RID1d %ebp
+#define RID1  %rdi
+#define RID1d %edi
  #define RID2  %rsi
  #define RID2d %esi
  
@@ -264,15 +264,17 @@
  .align 8
  __cast6_enc_blk8:
         /* input:
-        *      %rdi: ctx, CTX
+        *      %rdi: ctx
          *      RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks
          * output:
          *      RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
          */
  
-       pushq %rbp;
+       pushq %r15;
         pushq %rbx;
  
+       movq %rdi, CTX;
+
         vmovdqa .Lbswap_mask, RKM;
         vmovd .Lfirst_mask, R1ST;
         vmovd .L32_mask, R32;
@@ -297,7 +299,7 @@ __cast6_enc_blk8:
         QBAR(11);
  
         popq %rbx;
-       popq %rbp;
+       popq %r15;
  
         vmovdqa .Lbswap_mask, RKM;
  
@@ -310,15 +312,17 @@ ENDPROC(__cast6_enc_blk8)
  .align 8
  __cast6_dec_blk8:
         /* input:
-        *      %rdi: ctx, CTX
+        *      %rdi: ctx
          *      RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
          * output:
          *      RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: decrypted blocks
          */
  
-       pushq %rbp;
+       pushq %r15;
         pushq %rbx;
  
+       movq %rdi, CTX;
+
         vmovdqa .Lbswap_mask, RKM;
         vmovd .Lfirst_mask, R1ST;
         vmovd .L32_mask, R32;
@@ -343,7 +347,7 @@ __cast6_dec_blk8:
         QBAR(0);
  
         popq %rbx;
-       popq %rbp;
+       popq %r15;
  
         vmovdqa .Lbswap_mask, RKM;
         outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
@@ -354,12 +358,14 @@ ENDPROC(__cast6_dec_blk8)
  
  ENTRY(cast6_ecb_enc_8way)
         /* input:
-        *      %rdi: ctx, CTX
+        *      %rdi: ctx
          *      %rsi: dst
          *      %rdx: src
          */
         FRAME_BEGIN
+       pushq %r15;
  
+       movq %rdi, CTX;
         movq %rsi, %r11;
  
         load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
@@ -368,18 +374,21 @@ ENTRY(cast6_ecb_enc_8way)
  
         store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
  
+       popq %r15;
         FRAME_END
         ret;
  ENDPROC(cast6_ecb_enc_8way)
  
  ENTRY(cast6_ecb_dec_8way)
         /* input:
-        *      %rdi: ctx, CTX
+        *      %rdi: ctx
          *      %rsi: dst
          *      %rdx: src
          */
         FRAME_BEGIN
+       pushq %r15;
  
+       movq %rdi, CTX;
         movq %rsi, %r11;
  
         load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
@@ -388,20 +397,22 @@ ENTRY(cast6_ecb_dec_8way)
  
         store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
  
+       popq %r15;
         FRAME_END
         ret;
  ENDPROC(cast6_ecb_dec_8way)
  
  ENTRY(cast6_cbc_dec_8way)
         /* input:
-        *      %rdi: ctx, CTX
+        *      %rdi: ctx
          *      %rsi: dst
          *      %rdx: src
          */
         FRAME_BEGIN
-
         pushq %r12;
+       pushq %r15;
  
+       movq %rdi, CTX;
         movq %rsi, %r11;
         movq %rdx, %r12;
  
@@ -411,8 +422,8 @@ ENTRY(cast6_cbc_dec_8way)
  
         store_cbc_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
  
+       popq %r15;
         popq %r12;
-
         FRAME_END
         ret;
  ENDPROC(cast6_cbc_dec_8way)
@@ -425,9 +436,10 @@ ENTRY(cast6_ctr_8way)
          *      %rcx: iv (little endian, 128bit)
          */
         FRAME_BEGIN
-
         pushq %r12;
+       pushq %r15
  
+       movq %rdi, CTX;
         movq %rsi, %r11;
         movq %rdx, %r12;
  
@@ -438,8 +450,8 @@ ENTRY(cast6_ctr_8way)
  
         store_ctr_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
  
+       popq %r15;
         popq %r12;
-
         FRAME_END
         ret;
  ENDPROC(cast6_ctr_8way)
@@ -452,7 +464,9 @@ ENTRY(cast6_xts_enc_8way)
          *      %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
          */
         FRAME_BEGIN
+       pushq %r15;
  
+       movq %rdi, CTX
         movq %rsi, %r11;
  
         /* regs <= src, dst <= IVs, regs <= regs xor IVs */
@@ -464,6 +478,7 @@ ENTRY(cast6_xts_enc_8way)
         /* dst <= regs xor IVs(in dst) */
         store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
  
+       popq %r15;
         FRAME_END
         ret;
  ENDPROC(cast6_xts_enc_8way)
@@ -476,7 +491,9 @@ ENTRY(cast6_xts_dec_8way)
          *      %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
          */
         FRAME_BEGIN
+       pushq %r15;
  
+       movq %rdi, CTX
         movq %rsi, %r11;
  
         /* regs <= src, dst <= IVs, regs <= regs xor IVs */
@@ -488,6 +505,7 @@ ENTRY(cast6_xts_dec_8way)
         /* dst <= regs xor IVs(in dst) */
         store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
  
+       popq %r15;
         FRAME_END
         ret;
  ENDPROC(cast6_xts_dec_8way)
diff --git a/arch/x86/crypto/des3_ede-asm_64.S b/arch/x86/crypto/des3_ede-asm_64.S

index f3e9164..8e49ce1 100644 (file)
--- a/arch/x86/crypto/des3_ede-asm_64.S
+++ b/arch/x86/crypto/des3_ede-asm_64.S
@@ -64,12 +64,12 @@
  #define RW2bh %ch
  
  #define RT0 %r15
-#define RT1 %rbp
+#define RT1 %rsi
  #define RT2 %r14
  #define RT3 %rdx
  
  #define RT0d %r15d
-#define RT1d %ebp
+#define RT1d %esi
  #define RT2d %r14d
  #define RT3d %edx
  
@@ -177,13 +177,14 @@ ENTRY(des3_ede_x86_64_crypt_blk)
          *      %rsi: dst
          *      %rdx: src
          */
-       pushq %rbp;
         pushq %rbx;
         pushq %r12;
         pushq %r13;
         pushq %r14;
         pushq %r15;
  
+       pushq %rsi; /* dst */
+
         read_block(%rdx, RL0, RR0);
         initial_permutation(RL0, RR0);
  
@@ -241,6 +242,8 @@ ENTRY(des3_ede_x86_64_crypt_blk)
         round1(32+15, RL0, RR0, dummy2);
  
         final_permutation(RR0, RL0);
+
+       popq %rsi /* dst */
         write_block(%rsi, RR0, RL0);
  
         popq %r15;
@@ -248,7 +251,6 @@ ENTRY(des3_ede_x86_64_crypt_blk)
         popq %r13;
         popq %r12;
         popq %rbx;
-       popq %rbp;
  
         ret;
  ENDPROC(des3_ede_x86_64_crypt_blk)
@@ -432,13 +434,14 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
          *      %rdx: src (3 blocks)
          */
  
-       pushq %rbp;
         pushq %rbx;
         pushq %r12;
         pushq %r13;
         pushq %r14;
         pushq %r15;
  
+       pushq %rsi /* dst */
+
         /* load input */
         movl 0 * 4(%rdx), RL0d;
         movl 1 * 4(%rdx), RR0d;
@@ -520,6 +523,7 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
         bswapl RR2d;
         bswapl RL2d;
  
+       popq %rsi /* dst */
         movl RR0d, 0 * 4(%rsi);
         movl RL0d, 1 * 4(%rsi);
         movl RR1d, 2 * 4(%rsi);
@@ -532,7 +536,6 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
         popq %r13;
         popq %r12;
         popq %rbx;
-       popq %rbp;
  
         ret;
  ENDPROC(des3_ede_x86_64_crypt_blk_3way)
diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S

index 1eab79c..9f712a7 100644 (file)
--- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S
+++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
@@ -89,7 +89,7 @@
  #define        REG_RE  %rdx
  #define        REG_RTA %r12
  #define        REG_RTB %rbx
-#define        REG_T1  %ebp
+#define        REG_T1  %r11d
  #define        xmm_mov vmovups
  #define        avx2_zeroupper  vzeroupper
  #define        RND_F1  1
@@ -637,7 +637,6 @@ _loop3:
         ENTRY(\name)
  
         push    %rbx
-       push    %rbp
         push    %r12
         push    %r13
         push    %r14
@@ -673,7 +672,6 @@ _loop3:
         pop     %r14
         pop     %r13
         pop     %r12
-       pop     %rbp
         pop     %rbx
  
         ret
diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S

index a410950..6204bd5 100644 (file)
--- a/arch/x86/crypto/sha1_ssse3_asm.S
+++ b/arch/x86/crypto/sha1_ssse3_asm.S
@@ -37,7 +37,7 @@
  #define REG_A  %ecx
  #define REG_B  %esi
  #define REG_C  %edi
-#define REG_D  %ebp
+#define REG_D  %r12d
  #define REG_E  %edx
  
  #define REG_T1 %eax
@@ -74,10 +74,10 @@
         ENTRY(\name)
  
         push    %rbx
-       push    %rbp
         push    %r12
+       push    %rbp
+       mov     %rsp, %rbp
  
-       mov     %rsp, %r12
         sub     $64, %rsp               # allocate workspace
         and     $~15, %rsp              # align stack
  
@@ -99,10 +99,9 @@
         xor     %rax, %rax
         rep stosq
  
-       mov     %r12, %rsp              # deallocate workspace
-
-       pop     %r12
+       mov     %rbp, %rsp              # deallocate workspace
         pop     %rbp
+       pop     %r12
         pop     %rbx
         ret
  
diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S

index e08888a..001bbcf 100644 (file)
--- a/arch/x86/crypto/sha256-avx-asm.S
+++ b/arch/x86/crypto/sha256-avx-asm.S
@@ -103,7 +103,7 @@ SRND = %rsi       # clobbers INP
  c = %ecx
  d = %r8d
  e = %edx
-TBL = %rbp
+TBL = %r12
  a = %eax
  b = %ebx
  
@@ -350,13 +350,13 @@ a = TMP_
  ENTRY(sha256_transform_avx)
  .align 32
         pushq   %rbx
-       pushq   %rbp
+       pushq   %r12
         pushq   %r13
         pushq   %r14
         pushq   %r15
-       pushq   %r12
+       pushq   %rbp
+       movq    %rsp, %rbp
  
-       mov     %rsp, %r12
         subq    $STACK_SIZE, %rsp       # allocate stack space
         and     $~15, %rsp              # align stack pointer
  
@@ -452,13 +452,12 @@ loop2:
  
  done_hash:
  
-       mov     %r12, %rsp
-
-       popq    %r12
+       mov     %rbp, %rsp
+       popq    %rbp
         popq    %r15
         popq    %r14
         popq    %r13
-       popq    %rbp
+       popq    %r12
         popq    %rbx
         ret
  ENDPROC(sha256_transform_avx)
diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S

index 89c8f09..1420db1 100644 (file)
--- a/arch/x86/crypto/sha256-avx2-asm.S
+++ b/arch/x86/crypto/sha256-avx2-asm.S
@@ -98,8 +98,6 @@ d     = %r8d
  e       = %edx # clobbers NUM_BLKS
  y3     = %esi  # clobbers INP
  
-
-TBL    = %rbp
  SRND   = CTX   # SRND is same register as CTX
  
  a = %eax
@@ -531,7 +529,6 @@ STACK_SIZE  = _RSP      + _RSP_SIZE
  ENTRY(sha256_transform_rorx)
  .align 32
         pushq   %rbx
-       pushq   %rbp
         pushq   %r12
         pushq   %r13
         pushq   %r14
@@ -568,8 +565,6 @@ ENTRY(sha256_transform_rorx)
         mov     CTX, _CTX(%rsp)
  
  loop0:
-       lea     K256(%rip), TBL
-
         ## Load first 16 dwords from two blocks
         VMOVDQ  0*32(INP),XTMP0
         VMOVDQ  1*32(INP),XTMP1
@@ -597,19 +592,19 @@ last_block_enter:
  
  .align 16
  loop1:
-       vpaddd  0*32(TBL, SRND), X0, XFER
+       vpaddd  K256+0*32(SRND), X0, XFER
         vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
         FOUR_ROUNDS_AND_SCHED   _XFER + 0*32
  
-       vpaddd  1*32(TBL, SRND), X0, XFER
+       vpaddd  K256+1*32(SRND), X0, XFER
         vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
         FOUR_ROUNDS_AND_SCHED   _XFER + 1*32
  
-       vpaddd  2*32(TBL, SRND), X0, XFER
+       vpaddd  K256+2*32(SRND), X0, XFER
         vmovdqa XFER, 2*32+_XFER(%rsp, SRND)
         FOUR_ROUNDS_AND_SCHED   _XFER + 2*32
  
-       vpaddd  3*32(TBL, SRND), X0, XFER
+       vpaddd  K256+3*32(SRND), X0, XFER
         vmovdqa XFER, 3*32+_XFER(%rsp, SRND)
         FOUR_ROUNDS_AND_SCHED   _XFER + 3*32
  
@@ -619,10 +614,11 @@ loop1:
  
  loop2:
         ## Do last 16 rounds with no scheduling
-       vpaddd  0*32(TBL, SRND), X0, XFER
+       vpaddd  K256+0*32(SRND), X0, XFER
         vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
         DO_4ROUNDS      _XFER + 0*32
-       vpaddd  1*32(TBL, SRND), X1, XFER
+
+       vpaddd  K256+1*32(SRND), X1, XFER
         vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
         DO_4ROUNDS      _XFER + 1*32
         add     $2*32, SRND
@@ -676,9 +672,6 @@ loop3:
         ja      done_hash
  
  do_last_block:
-       #### do last block
-       lea     K256(%rip), TBL
-
         VMOVDQ  0*16(INP),XWORD0
         VMOVDQ  1*16(INP),XWORD1
         VMOVDQ  2*16(INP),XWORD2
@@ -718,7 +711,6 @@ done_hash:
         popq    %r14
         popq    %r13
         popq    %r12
-       popq    %rbp
         popq    %rbx
         ret
  ENDPROC(sha256_transform_rorx)
diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S

index 39b83c9..c6c05ed 100644 (file)
--- a/arch/x86/crypto/sha256-ssse3-asm.S
+++ b/arch/x86/crypto/sha256-ssse3-asm.S
@@ -95,7 +95,7 @@ SRND = %rsi       # clobbers INP
  c = %ecx
  d = %r8d
  e = %edx
-TBL = %rbp
+TBL = %r12
  a = %eax
  b = %ebx
  
@@ -356,13 +356,13 @@ a = TMP_
  ENTRY(sha256_transform_ssse3)
  .align 32
         pushq   %rbx
-       pushq   %rbp
+       pushq   %r12
         pushq   %r13
         pushq   %r14
         pushq   %r15
-       pushq   %r12
+       pushq   %rbp
+       mov     %rsp, %rbp
  
-       mov     %rsp, %r12
         subq    $STACK_SIZE, %rsp
         and     $~15, %rsp
  
@@ -462,13 +462,12 @@ loop2:
  
  done_hash:
  
-       mov     %r12, %rsp
-
-       popq    %r12
+       mov     %rbp, %rsp
+       popq    %rbp
         popq    %r15
         popq    %r14
         popq    %r13
-       popq    %rbp
+       popq    %r12
         popq    %rbx
  
         ret
diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S

index 7f5f6c6..b16d560 100644 (file)
--- a/arch/x86/crypto/sha512-avx2-asm.S
+++ b/arch/x86/crypto/sha512-avx2-asm.S
@@ -69,8 +69,9 @@ XFER  = YTMP0
  
  BYTE_FLIP_MASK  = %ymm9
  
-# 1st arg
-CTX         = %rdi
+# 1st arg is %rdi, which is saved to the stack and accessed later via %r12
+CTX1        = %rdi
+CTX2        = %r12
  # 2nd arg
  INP         = %rsi
  # 3rd arg
@@ -81,7 +82,7 @@ d           = %r8
  e           = %rdx
  y3          = %rsi
  
-TBL   = %rbp
+TBL   = %rdi # clobbers CTX1
  
  a     = %rax
  b     = %rbx
@@ -91,26 +92,26 @@ g     = %r10
  h     = %r11
  old_h = %r11
  
-T1    = %r12
+T1    = %r12 # clobbers CTX2
  y0    = %r13
  y1    = %r14
  y2    = %r15
  
-y4    = %r12
-
  # Local variables (stack frame)
  XFER_SIZE = 4*8
  SRND_SIZE = 1*8
  INP_SIZE = 1*8
  INPEND_SIZE = 1*8
+CTX_SIZE = 1*8
  RSPSAVE_SIZE = 1*8
-GPRSAVE_SIZE = 6*8
+GPRSAVE_SIZE = 5*8
  
  frame_XFER = 0
  frame_SRND = frame_XFER + XFER_SIZE
  frame_INP = frame_SRND + SRND_SIZE
  frame_INPEND = frame_INP + INP_SIZE
-frame_RSPSAVE = frame_INPEND + INPEND_SIZE
+frame_CTX = frame_INPEND + INPEND_SIZE
+frame_RSPSAVE = frame_CTX + CTX_SIZE
  frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE
  frame_size = frame_GPRSAVE + GPRSAVE_SIZE
  
@@ -576,12 +577,11 @@ ENTRY(sha512_transform_rorx)
         mov     %rax, frame_RSPSAVE(%rsp)
  
         # Save GPRs
-       mov     %rbp, frame_GPRSAVE(%rsp)
-       mov     %rbx, 8*1+frame_GPRSAVE(%rsp)
-       mov     %r12, 8*2+frame_GPRSAVE(%rsp)
-       mov     %r13, 8*3+frame_GPRSAVE(%rsp)
-       mov     %r14, 8*4+frame_GPRSAVE(%rsp)
-       mov     %r15, 8*5+frame_GPRSAVE(%rsp)
+       mov     %rbx, 8*0+frame_GPRSAVE(%rsp)
+       mov     %r12, 8*1+frame_GPRSAVE(%rsp)
+       mov     %r13, 8*2+frame_GPRSAVE(%rsp)
+       mov     %r14, 8*3+frame_GPRSAVE(%rsp)
+       mov     %r15, 8*4+frame_GPRSAVE(%rsp)
  
         shl     $7, NUM_BLKS    # convert to bytes
         jz      done_hash
@@ -589,14 +589,17 @@ ENTRY(sha512_transform_rorx)
         mov     NUM_BLKS, frame_INPEND(%rsp)
  
         ## load initial digest
-       mov     8*0(CTX),a
-       mov     8*1(CTX),b
-       mov     8*2(CTX),c
-       mov     8*3(CTX),d
-       mov     8*4(CTX),e
-       mov     8*5(CTX),f
-       mov     8*6(CTX),g
-       mov     8*7(CTX),h
+       mov     8*0(CTX1), a
+       mov     8*1(CTX1), b
+       mov     8*2(CTX1), c
+       mov     8*3(CTX1), d
+       mov     8*4(CTX1), e
+       mov     8*5(CTX1), f
+       mov     8*6(CTX1), g
+       mov     8*7(CTX1), h
+
+       # save %rdi (CTX) before it gets clobbered
+       mov     %rdi, frame_CTX(%rsp)
  
         vmovdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK
  
@@ -652,14 +655,15 @@ loop2:
         subq    $1, frame_SRND(%rsp)
         jne     loop2
  
-       addm    8*0(CTX),a
-       addm    8*1(CTX),b
-       addm    8*2(CTX),c
-       addm    8*3(CTX),d
-       addm    8*4(CTX),e
-       addm    8*5(CTX),f
-       addm    8*6(CTX),g
-       addm    8*7(CTX),h
+       mov     frame_CTX(%rsp), CTX2
+       addm    8*0(CTX2), a
+       addm    8*1(CTX2), b
+       addm    8*2(CTX2), c
+       addm    8*3(CTX2), d
+       addm    8*4(CTX2), e
+       addm    8*5(CTX2), f
+       addm    8*6(CTX2), g
+       addm    8*7(CTX2), h
  
         mov     frame_INP(%rsp), INP
         add     $128, INP
@@ -669,12 +673,11 @@ loop2:
  done_hash:
  
  # Restore GPRs
-       mov     frame_GPRSAVE(%rsp)     ,%rbp
-       mov     8*1+frame_GPRSAVE(%rsp) ,%rbx
-       mov     8*2+frame_GPRSAVE(%rsp) ,%r12
-       mov     8*3+frame_GPRSAVE(%rsp) ,%r13
-       mov     8*4+frame_GPRSAVE(%rsp) ,%r14
-       mov     8*5+frame_GPRSAVE(%rsp) ,%r15
+       mov     8*0+frame_GPRSAVE(%rsp), %rbx
+       mov     8*1+frame_GPRSAVE(%rsp), %r12
+       mov     8*2+frame_GPRSAVE(%rsp), %r13
+       mov     8*3+frame_GPRSAVE(%rsp), %r14
+       mov     8*4+frame_GPRSAVE(%rsp), %r15
  
         # Restore Stack Pointer
         mov     frame_RSPSAVE(%rsp), %rsp
diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S

index b3f49d2..73b471d 100644 (file)
--- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
@@ -76,8 +76,8 @@
  #define RT %xmm14
  #define RR %xmm15
  
-#define RID1  %rbp
-#define RID1d %ebp
+#define RID1  %r13
+#define RID1d %r13d
  #define RID2  %rsi
  #define RID2d %esi
  
@@ -259,7 +259,7 @@ __twofish_enc_blk8:
  
         vmovdqu w(CTX), RK1;
  
-       pushq %rbp;
+       pushq %r13;
         pushq %rbx;
         pushq %rcx;
  
@@ -282,7 +282,7 @@ __twofish_enc_blk8:
  
         popq %rcx;
         popq %rbx;
-       popq %rbp;
+       popq %r13;
  
         outunpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
         outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
@@ -301,7 +301,7 @@ __twofish_dec_blk8:
  
         vmovdqu (w+4*4)(CTX), RK1;
  
-       pushq %rbp;
+       pushq %r13;
         pushq %rbx;
  
         inpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
@@ -322,7 +322,7 @@ __twofish_dec_blk8:
         vmovdqu (w)(CTX), RK1;
  
         popq %rbx;
-       popq %rbp;
+       popq %r13;
  
         outunpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2);
         outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c

index e0bb46c..0e2a5ed 100644 (file)
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -231,7 +231,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
                  ksig->ka.sa.sa_restorer)
                 sp = (unsigned long) ksig->ka.sa.sa_restorer;
  
-       if (fpu->fpstate_active) {
+       if (fpu->initialized) {
                 unsigned long fx_aligned, math_size;
  
                 sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size);
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h

index 1b02038..c096624 100644 (file)
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -218,10 +218,9 @@ static inline int alternatives_text_reserved(void *start, void *end)
  #define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2,   \
                            output, input...)                                  \
  {                                                                            \
-       register void *__sp asm(_ASM_SP);                                     \
         asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\
                 "call %P[new2]", feature2)                                    \
-               : output, "+r" (__sp)                                         \
+               : output, ASM_CALL_CONSTRAINT                                 \
                 : [old] "i" (oldfunc), [new1] "i" (newfunc1),                 \
                   [new2] "i" (newfunc2), ## input);                           \
  }
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h

index 676ee58..c1eadba 100644 (file)
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -132,4 +132,15 @@
  /* For C file, we already have NOKPROBE_SYMBOL macro */
  #endif
  
+#ifndef __ASSEMBLY__
+/*
+ * This output constraint should be used for any inline asm which has a "call"
+ * instruction.  Otherwise the asm may be inserted before the frame pointer
+ * gets set up by the containing function.  If you forget to do this, objtool
+ * may print a "call without frame pointer save/setup" warning.
+ */
+register unsigned int __asm_call_sp asm("esp");
+#define ASM_CALL_CONSTRAINT "+r" (__asm_call_sp)
+#endif
+
  #endif /* _ASM_X86_ASM_H */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h

index 554cdb2..e3221ff 100644 (file)
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -23,11 +23,9 @@
  /*
   * High level FPU state handling functions:
   */
-extern void fpu__activate_curr(struct fpu *fpu);
-extern void fpu__activate_fpstate_read(struct fpu *fpu);
-extern void fpu__activate_fpstate_write(struct fpu *fpu);
-extern void fpu__current_fpstate_write_begin(void);
-extern void fpu__current_fpstate_write_end(void);
+extern void fpu__initialize(struct fpu *fpu);
+extern void fpu__prepare_read(struct fpu *fpu);
+extern void fpu__prepare_write(struct fpu *fpu);
  extern void fpu__save(struct fpu *fpu);
  extern void fpu__restore(struct fpu *fpu);
  extern int  fpu__restore_sig(void __user *buf, int ia32_frame);
@@ -120,20 +118,11 @@ extern void fpstate_sanitize_xstate(struct fpu *fpu);
         err;                                                            \
  })
  
-#define check_insn(insn, output, input...)                             \
-({                                                                     \
-       int err;                                                        \
+#define kernel_insn(insn, output, input...)                            \
         asm volatile("1:" #insn "\n\t"                                  \
                      "2:\n"                                             \
-                    ".section .fixup,\"ax\"\n"                         \
-                    "3:  movl $-1,%[err]\n"                            \
-                    "    jmp  2b\n"                                    \
-                    ".previous\n"                                      \
-                    _ASM_EXTABLE(1b, 3b)                               \
-                    : [err] "=r" (err), output                         \
-                    : "0"(0), input);                                  \
-       err;                                                            \
-})
+                    _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_fprestore)  \
+                    : output : input)
  
  static inline int copy_fregs_to_user(struct fregs_state __user *fx)
  {
@@ -153,20 +142,16 @@ static inline int copy_fxregs_to_user(struct fxregs_state __user *fx)
  
  static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
  {
-       int err;
-
         if (IS_ENABLED(CONFIG_X86_32)) {
-               err = check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
+               kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
         } else {
                 if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) {
-                       err = check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
+                       kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
                 } else {
                         /* See comment in copy_fxregs_to_kernel() below. */
-                       err = check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx));
+                       kernel_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx));
                 }
         }
-       /* Copying from a kernel buffer to FPU registers should never fail: */
-       WARN_ON_FPU(err);
  }
  
  static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
@@ -183,9 +168,7 @@ static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
  
  static inline void copy_kernel_to_fregs(struct fregs_state *fx)
  {
-       int err = check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
-
-       WARN_ON_FPU(err);
+       kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
  }
  
  static inline int copy_user_to_fregs(struct fregs_state __user *fx)
@@ -281,18 +264,13 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
   * Use XRSTORS to restore context if it is enabled. XRSTORS supports compact
   * XSAVE area format.
   */
-#define XSTATE_XRESTORE(st, lmask, hmask, err)                         \
+#define XSTATE_XRESTORE(st, lmask, hmask)                              \
         asm volatile(ALTERNATIVE(XRSTOR,                                \
                                  XRSTORS, X86_FEATURE_XSAVES)           \
                      "\n"                                               \
-                    "xor %[err], %[err]\n"                             \
                      "3:\n"                                             \
-                    ".pushsection .fixup,\"ax\"\n"                     \
-                    "4: movl $-2, %[err]\n"                            \
-                    "jmp 3b\n"                                         \
-                    ".popsection\n"                                    \
-                    _ASM_EXTABLE(661b, 4b)                             \
-                    : [err] "=r" (err)                                 \
+                    _ASM_EXTABLE_HANDLE(661b, 3b, ex_handler_fprestore)\
+                    :                                                  \
                      : "D" (st), "m" (*st), "a" (lmask), "d" (hmask)    \
                      : "memory")
  
@@ -336,7 +314,10 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
         else
                 XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
  
-       /* We should never fault when copying from a kernel buffer: */
+       /*
+        * We should never fault when copying from a kernel buffer, and the FPU
+        * state we set at boot time should be valid.
+        */
         WARN_ON_FPU(err);
  }
  
@@ -350,7 +331,7 @@ static inline void copy_xregs_to_kernel(struct xregs_state *xstate)
         u32 hmask = mask >> 32;
         int err;
  
-       WARN_ON(!alternatives_patched);
+       WARN_ON_FPU(!alternatives_patched);
  
         XSTATE_XSAVE(xstate, lmask, hmask, err);
  
@@ -365,12 +346,8 @@ static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask)
  {
         u32 lmask = mask;
         u32 hmask = mask >> 32;
-       int err;
-
-       XSTATE_XRESTORE(xstate, lmask, hmask, err);
  
-       /* We should never fault when copying from a kernel buffer: */
-       WARN_ON_FPU(err);
+       XSTATE_XRESTORE(xstate, lmask, hmask);
  }
  
  /*
@@ -526,37 +503,16 @@ static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu)
   */
  static inline void fpregs_deactivate(struct fpu *fpu)
  {
-       WARN_ON_FPU(!fpu->fpregs_active);
-
-       fpu->fpregs_active = 0;
         this_cpu_write(fpu_fpregs_owner_ctx, NULL);
         trace_x86_fpu_regs_deactivated(fpu);
  }
  
  static inline void fpregs_activate(struct fpu *fpu)
  {
-       WARN_ON_FPU(fpu->fpregs_active);
-
-       fpu->fpregs_active = 1;
         this_cpu_write(fpu_fpregs_owner_ctx, fpu);
         trace_x86_fpu_regs_activated(fpu);
  }
  
-/*
- * The question "does this thread have fpu access?"
- * is slightly racy, since preemption could come in
- * and revoke it immediately after the test.
- *
- * However, even in that very unlikely scenario,
- * we can just assume we have FPU access - typically
- * to save the FP state - we'll just take a #NM
- * fault and get the FPU access back.
- */
-static inline int fpregs_active(void)
-{
-       return current->thread.fpu.fpregs_active;
-}
-
  /*
   * FPU state switching for scheduling.
   *
@@ -571,14 +527,13 @@ static inline int fpregs_active(void)
  static inline void
  switch_fpu_prepare(struct fpu *old_fpu, int cpu)
  {
-       if (old_fpu->fpregs_active) {
+       if (old_fpu->initialized) {
                 if (!copy_fpregs_to_fpstate(old_fpu))
                         old_fpu->last_cpu = -1;
                 else
                         old_fpu->last_cpu = cpu;
  
                 /* But leave fpu_fpregs_owner_ctx! */
-               old_fpu->fpregs_active = 0;
                 trace_x86_fpu_regs_deactivated(old_fpu);
         } else
                 old_fpu->last_cpu = -1;
@@ -595,7 +550,7 @@ switch_fpu_prepare(struct fpu *old_fpu, int cpu)
  static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu)
  {
         bool preload = static_cpu_has(X86_FEATURE_FPU) &&
-                      new_fpu->fpstate_active;
+                      new_fpu->initialized;
  
         if (preload) {
                 if (!fpregs_state_valid(new_fpu, cpu))
@@ -617,8 +572,7 @@ static inline void user_fpu_begin(void)
         struct fpu *fpu = &current->thread.fpu;
  
         preempt_disable();
-       if (!fpregs_active())
-               fpregs_activate(fpu);
+       fpregs_activate(fpu);
         preempt_enable();
  }
  
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h

index 3c80f5b..a152057 100644 (file)
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -68,6 +68,9 @@ struct fxregs_state {
  /* Default value for fxregs_state.mxcsr: */
  #define MXCSR_DEFAULT          0x1f80
  
+/* Copy both mxcsr & mxcsr_flags with a single u64 memcpy: */
+#define MXCSR_AND_FLAGS_SIZE sizeof(u64)
+
  /*
   * Software based FPU emulation state. This is arbitrary really,
   * it matches the x87 format to make it easier to understand:
@@ -290,36 +293,13 @@ struct fpu {
         unsigned int                    last_cpu;
  
         /*
-        * @fpstate_active:
+        * @initialized:
          *
-        * This flag indicates whether this context is active: if the task
+        * This flag indicates whether this context is initialized: if the task
          * is not running then we can restore from this context, if the task
          * is running then we should save into this context.
          */
-       unsigned char                   fpstate_active;
-
-       /*
-        * @fpregs_active:
-        *
-        * This flag determines whether a given context is actively
-        * loaded into the FPU's registers and that those registers
-        * represent the task's current FPU state.
-        *
-        * Note the interaction with fpstate_active:
-        *
-        *   # task does not use the FPU:
-        *   fpstate_active == 0
-        *
-        *   # task uses the FPU and regs are active:
-        *   fpstate_active == 1 && fpregs_active == 1
-        *
-        *   # the regs are inactive but still match fpstate:
-        *   fpstate_active == 1 && fpregs_active == 0 && fpregs_owner == fpu
-        *
-        * The third state is what we use for the lazy restore optimization
-        * on lazy-switching CPUs.
-        */
-       unsigned char                   fpregs_active;
+       unsigned char                   initialized;
  
         /*
          * @state:
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h

index 1b2799e..83fee24 100644 (file)
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -48,8 +48,12 @@ void fpu__xstate_clear_all_cpu_caps(void);
  void *get_xsave_addr(struct xregs_state *xsave, int xstate);
  const void *get_xsave_field_ptr(int xstate_field);
  int using_compacted_format(void);
-int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
-                       void __user *ubuf, struct xregs_state *xsave);
-int copyin_to_xsaves(const void *kbuf, const void __user *ubuf,
-                    struct xregs_state *xsave);
+int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
+int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
+int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf);
+int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf);
+
+/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
+extern int validate_xstate_header(const struct xstate_header *hdr);
+
  #endif
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h

index 7ae318c..c120b5d 100644 (file)
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -286,6 +286,32 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
         return __pkru_allows_pkey(vma_pkey(vma), write);
  }
  
+/*
+ * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID
+ * bits.  This serves two purposes.  It prevents a nasty situation in
+ * which PCID-unaware code saves CR3, loads some other value (with PCID
+ * == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if
+ * the saved ASID was nonzero.  It also means that any bugs involving
+ * loading a PCID-enabled CR3 with CR4.PCIDE off will trigger
+ * deterministically.
+ */
+
+static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
+{
+       if (static_cpu_has(X86_FEATURE_PCID)) {
+               VM_WARN_ON_ONCE(asid > 4094);
+               return __sme_pa(mm->pgd) | (asid + 1);
+       } else {
+               VM_WARN_ON_ONCE(asid != 0);
+               return __sme_pa(mm->pgd);
+       }
+}
+
+static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
+{
+       VM_WARN_ON_ONCE(asid > 4094);
+       return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH;
+}
  
  /*
   * This can be used from process context to figure out what the value of
@@ -296,10 +322,8 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
   */
  static inline unsigned long __get_current_cr3_fast(void)
  {
-       unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd);
-
-       if (static_cpu_has(X86_FEATURE_PCID))
-               cr3 |= this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+       unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm),
+               this_cpu_read(cpu_tlbstate.loaded_mm_asid));
  
         /* For now, be very restrictive about when this can be called. */
         VM_WARN_ON(in_nmi() || preemptible());
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h

index 63cc96f..738503e 100644 (file)
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -179,7 +179,6 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
         u64 input_address = input ? virt_to_phys(input) : 0;
         u64 output_address = output ? virt_to_phys(output) : 0;
         u64 hv_status;
-       register void *__sp asm(_ASM_SP);
  
  #ifdef CONFIG_X86_64
         if (!hv_hypercall_pg)
@@ -187,7 +186,7 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
  
         __asm__ __volatile__("mov %4, %%r8\n"
                              "call *%5"
-                            : "=a" (hv_status), "+r" (__sp),
+                            : "=a" (hv_status), ASM_CALL_CONSTRAINT,
                                "+c" (control), "+d" (input_address)
                              :  "r" (output_address), "m" (hv_hypercall_pg)
                              : "cc", "memory", "r8", "r9", "r10", "r11");
@@ -202,7 +201,7 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
  
         __asm__ __volatile__("call *%7"
                              : "=A" (hv_status),
-                              "+c" (input_address_lo), "+r" (__sp)
+                              "+c" (input_address_lo), ASM_CALL_CONSTRAINT
                              : "A" (control),
                                "b" (input_address_hi),
                                "D"(output_address_hi), "S"(output_address_lo),
@@ -224,12 +223,11 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
  static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
  {
         u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
-       register void *__sp asm(_ASM_SP);
  
  #ifdef CONFIG_X86_64
         {
                 __asm__ __volatile__("call *%4"
-                                    : "=a" (hv_status), "+r" (__sp),
+                                    : "=a" (hv_status), ASM_CALL_CONSTRAINT,
                                        "+c" (control), "+d" (input1)
                                      : "m" (hv_hypercall_pg)
                                      : "cc", "r8", "r9", "r10", "r11");
@@ -242,7 +240,7 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
                 __asm__ __volatile__ ("call *%5"
                                       : "=A"(hv_status),
                                         "+c"(input1_lo),
-                                       "+r"(__sp)
+                                       ASM_CALL_CONSTRAINT
                                       : "A" (control),
                                         "b" (input1_hi),
                                         "m" (hv_hypercall_pg)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h

index 42873ed..280d94c 100644 (file)
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -459,8 +459,8 @@ int paravirt_disable_iospace(void);
   */
  #ifdef CONFIG_X86_32
  #define PVOP_VCALL_ARGS                                                        \
-       unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx;      \
-       register void *__sp asm("esp")
+       unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx;
+
  #define PVOP_CALL_ARGS                 PVOP_VCALL_ARGS
  
  #define PVOP_CALL_ARG1(x)              "a" ((unsigned long)(x))
@@ -480,8 +480,8 @@ int paravirt_disable_iospace(void);
  /* [re]ax isn't an arg, but the return val */
  #define PVOP_VCALL_ARGS                                                \
         unsigned long __edi = __edi, __esi = __esi,             \
-               __edx = __edx, __ecx = __ecx, __eax = __eax;    \
-       register void *__sp asm("rsp")
+               __edx = __edx, __ecx = __ecx, __eax = __eax;
+
  #define PVOP_CALL_ARGS         PVOP_VCALL_ARGS
  
  #define PVOP_CALL_ARG1(x)              "D" ((unsigned long)(x))
@@ -532,7 +532,7 @@ int paravirt_disable_iospace(void);
                         asm volatile(pre                                \
                                      paravirt_alt(PARAVIRT_CALL)        \
                                      post                               \
-                                    : call_clbr, "+r" (__sp)           \
+                                    : call_clbr, ASM_CALL_CONSTRAINT   \
                                      : paravirt_type(op),               \
                                        paravirt_clobber(clbr),          \
                                        ##__VA_ARGS__                    \
@@ -542,7 +542,7 @@ int paravirt_disable_iospace(void);
                         asm volatile(pre                                \
                                      paravirt_alt(PARAVIRT_CALL)        \
                                      post                               \
-                                    : call_clbr, "+r" (__sp)           \
+                                    : call_clbr, ASM_CALL_CONSTRAINT   \
                                      : paravirt_type(op),               \
                                        paravirt_clobber(clbr),          \
                                        ##__VA_ARGS__                    \
@@ -569,7 +569,7 @@ int paravirt_disable_iospace(void);
                 asm volatile(pre                                        \
                              paravirt_alt(PARAVIRT_CALL)                \
                              post                                       \
-                            : call_clbr, "+r" (__sp)                   \
+                            : call_clbr, ASM_CALL_CONSTRAINT           \
                              : paravirt_type(op),                       \
                                paravirt_clobber(clbr),                  \
                                ##__VA_ARGS__                            \
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h

index ec1f3c6..4f44505 100644 (file)
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -100,19 +100,14 @@ static __always_inline bool should_resched(int preempt_offset)
  
  #ifdef CONFIG_PREEMPT
    extern asmlinkage void ___preempt_schedule(void);
-# define __preempt_schedule()                                  \
-({                                                             \
-       register void *__sp asm(_ASM_SP);                       \
-       asm volatile ("call ___preempt_schedule" : "+r"(__sp)); \
-})
+# define __preempt_schedule() \
+       asm volatile ("call ___preempt_schedule" : ASM_CALL_CONSTRAINT)
  
    extern asmlinkage void preempt_schedule(void);
    extern asmlinkage void ___preempt_schedule_notrace(void);
-# define __preempt_schedule_notrace()                                  \
-({                                                                     \
-       register void *__sp asm(_ASM_SP);                               \
-       asm volatile ("call ___preempt_schedule_notrace" : "+r"(__sp)); \
-})
+# define __preempt_schedule_notrace() \
+       asm volatile ("call ___preempt_schedule_notrace" : ASM_CALL_CONSTRAINT)
+
    extern asmlinkage void preempt_schedule_notrace(void);
  #endif
  
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h

index 3fa26a6..b390ff7 100644 (file)
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -677,8 +677,6 @@ static inline void sync_core(void)
          * Like all of Linux's memory ordering operations, this is a
          * compiler barrier as well.
          */
-       register void *__sp asm(_ASM_SP);
-
  #ifdef CONFIG_X86_32
         asm volatile (
                 "pushfl\n\t"
@@ -686,7 +684,7 @@ static inline void sync_core(void)
                 "pushl $1f\n\t"
                 "iret\n\t"
                 "1:"
-               : "+r" (__sp) : : "memory");
+               : ASM_CALL_CONSTRAINT : : "memory");
  #else
         unsigned int tmp;
  
@@ -703,7 +701,7 @@ static inline void sync_core(void)
                 "iretq\n\t"
                 UNWIND_HINT_RESTORE
                 "1:"
-               : "=&r" (tmp), "+r" (__sp) : : "cc", "memory");
+               : "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory");
  #endif
  }
  
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h

index a34e0d4..7116b79 100644 (file)
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -103,7 +103,6 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem)
  ({                                                     \
         long tmp;                                       \
         struct rw_semaphore* ret;                       \
-       register void *__sp asm(_ASM_SP);               \
                                                         \
         asm volatile("# beginning down_write\n\t"       \
                      LOCK_PREFIX "  xadd      %1,(%4)\n\t"      \
@@ -114,7 +113,8 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem)
                      "  call " slow_path "\n"           \
                      "1:\n"                             \
                      "# ending down_write"              \
-                    : "+m" (sem->count), "=d" (tmp), "=a" (ret), "+r" (__sp) \
+                    : "+m" (sem->count), "=d" (tmp),   \
+                      "=a" (ret), ASM_CALL_CONSTRAINT  \
                      : "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS) \
                      : "memory", "cc");                 \
         ret;                                            \
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h

index 342e597..39f7a27 100644 (file)
--- a/arch/x86/include/asm/trace/fpu.h
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -12,25 +12,22 @@ DECLARE_EVENT_CLASS(x86_fpu,
  
         TP_STRUCT__entry(
                 __field(struct fpu *, fpu)
-               __field(bool, fpregs_active)
-               __field(bool, fpstate_active)
+               __field(bool, initialized)
                 __field(u64, xfeatures)
                 __field(u64, xcomp_bv)
                 ),
  
         TP_fast_assign(
                 __entry->fpu            = fpu;
-               __entry->fpregs_active  = fpu->fpregs_active;
-               __entry->fpstate_active = fpu->fpstate_active;
+               __entry->initialized    = fpu->initialized;
                 if (boot_cpu_has(X86_FEATURE_OSXSAVE)) {
                         __entry->xfeatures = fpu->state.xsave.header.xfeatures;
                         __entry->xcomp_bv  = fpu->state.xsave.header.xcomp_bv;
                 }
         ),
-       TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d xfeatures: %llx xcomp_bv: %llx",
+       TP_printk("x86/fpu: %p initialized: %d xfeatures: %llx xcomp_bv: %llx",
                         __entry->fpu,
-                       __entry->fpregs_active,
-                       __entry->fpstate_active,
+                       __entry->initialized,
                         __entry->xfeatures,
                         __entry->xcomp_bv
         )
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h

index 184eb98..78e8fcc 100644 (file)
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -166,11 +166,11 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
  ({                                                                     \
         int __ret_gu;                                                   \
         register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX);            \
-       register void *__sp asm(_ASM_SP);                               \
         __chk_user_ptr(ptr);                                            \
         might_fault();                                                  \
         asm volatile("call __get_user_%P4"                              \
-                    : "=a" (__ret_gu), "=r" (__val_gu), "+r" (__sp)    \
+                    : "=a" (__ret_gu), "=r" (__val_gu),                \
+                       ASM_CALL_CONSTRAINT                             \
                      : "0" (ptr), "i" (sizeof(*(ptr))));                \
         (x) = (__force __typeof__(*(ptr))) __val_gu;                    \
         __builtin_expect(__ret_gu, 0);                                  \
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h

index e089c16..7cb282e 100644 (file)
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -113,10 +113,9 @@ extern struct { char _entry[32]; } hypercall_page[];
         register unsigned long __arg2 asm(__HYPERCALL_ARG2REG) = __arg2; \
         register unsigned long __arg3 asm(__HYPERCALL_ARG3REG) = __arg3; \
         register unsigned long __arg4 asm(__HYPERCALL_ARG4REG) = __arg4; \
-       register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5; \
-       register void *__sp asm(_ASM_SP);
+       register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5;
  
-#define __HYPERCALL_0PARAM     "=r" (__res), "+r" (__sp)
+#define __HYPERCALL_0PARAM     "=r" (__res), ASM_CALL_CONSTRAINT
  #define __HYPERCALL_1PARAM     __HYPERCALL_0PARAM, "+r" (__arg1)
  #define __HYPERCALL_2PARAM     __HYPERCALL_1PARAM, "+r" (__arg2)
  #define __HYPERCALL_3PARAM     __HYPERCALL_2PARAM, "+r" (__arg3)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c

index 9862e2c..d58184b 100644 (file)
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -763,6 +763,16 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
         }
  }
  
+static void init_amd_zn(struct cpuinfo_x86 *c)
+{
+       /*
+        * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects
+        * all up to and including B1.
+        */
+       if (c->x86_model <= 1 && c->x86_mask <= 1)
+               set_cpu_cap(c, X86_FEATURE_CPB);
+}
+
  static void init_amd(struct cpuinfo_x86 *c)
  {
         early_init_amd(c);
@@ -791,6 +801,7 @@ static void init_amd(struct cpuinfo_x86 *c)
         case 0x10: init_amd_gh(c); break;
         case 0x12: init_amd_ln(c); break;
         case 0x15: init_amd_bd(c); break;
+       case 0x17: init_amd_zn(c); break;
         }
  
         /* Enable workaround for FXSAVE leak */
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c

index db68488..0af86d9 100644 (file)
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -21,14 +21,6 @@
  
  void __init check_bugs(void)
  {
-#ifdef CONFIG_X86_32
-       /*
-        * Regardless of whether PCID is enumerated, the SDM says
-        * that it can't be enabled in 32-bit mode.
-        */
-       setup_clear_cpu_cap(X86_FEATURE_PCID);
-#endif
-
         identify_boot_cpu();
  
         if (!IS_ENABLED(CONFIG_SMP)) {
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c

index 775f101..c9176ba 100644 (file)
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -904,6 +904,14 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
  
         setup_force_cpu_cap(X86_FEATURE_ALWAYS);
         fpu__init_system(c);
+
+#ifdef CONFIG_X86_32
+       /*
+        * Regardless of whether PCID is enumerated, the SDM says
+        * that it can't be enabled in 32-bit mode.
+        */
+       setup_clear_cpu_cap(X86_FEATURE_PCID);
+#endif
  }
  
  void __init early_cpu_init(void)
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c

index e1114f0..f92a659 100644 (file)
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -100,7 +100,7 @@ void __kernel_fpu_begin(void)
  
         kernel_fpu_disable();
  
-       if (fpu->fpregs_active) {
+       if (fpu->initialized) {
                 /*
                  * Ignore return value -- we don't care if reg state
                  * is clobbered.
@@ -116,7 +116,7 @@ void __kernel_fpu_end(void)
  {
         struct fpu *fpu = &current->thread.fpu;
  
-       if (fpu->fpregs_active)
+       if (fpu->initialized)
                 copy_kernel_to_fpregs(&fpu->state);
  
         kernel_fpu_enable();
@@ -148,7 +148,7 @@ void fpu__save(struct fpu *fpu)
  
         preempt_disable();
         trace_x86_fpu_before_save(fpu);
-       if (fpu->fpregs_active) {
+       if (fpu->initialized) {
                 if (!copy_fpregs_to_fpstate(fpu)) {
                         copy_kernel_to_fpregs(&fpu->state);
                 }
@@ -189,10 +189,9 @@ EXPORT_SYMBOL_GPL(fpstate_init);
  
  int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
  {
-       dst_fpu->fpregs_active = 0;
         dst_fpu->last_cpu = -1;
  
-       if (!src_fpu->fpstate_active || !static_cpu_has(X86_FEATURE_FPU))
+       if (!src_fpu->initialized || !static_cpu_has(X86_FEATURE_FPU))
                 return 0;
  
         WARN_ON_FPU(src_fpu != &current->thread.fpu);
@@ -206,26 +205,14 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
         /*
          * Save current FPU registers directly into the child
          * FPU context, without any memory-to-memory copying.
-        * In lazy mode, if the FPU context isn't loaded into
-        * fpregs, CR0.TS will be set and do_device_not_available
-        * will load the FPU context.
          *
-        * We have to do all this with preemption disabled,
-        * mostly because of the FNSAVE case, because in that
-        * case we must not allow preemption in the window
-        * between the FNSAVE and us marking the context lazy.
-        *
-        * It shouldn't be an issue as even FNSAVE is plenty
-        * fast in terms of critical section length.
+        * ( The function 'fails' in the FNSAVE case, which destroys
+        *   register contents so we have to copy them back. )
          */
-       preempt_disable();
         if (!copy_fpregs_to_fpstate(dst_fpu)) {
-               memcpy(&src_fpu->state, &dst_fpu->state,
-                      fpu_kernel_xstate_size);
-
+               memcpy(&src_fpu->state, &dst_fpu->state, fpu_kernel_xstate_size);
                 copy_kernel_to_fpregs(&src_fpu->state);
         }
-       preempt_enable();
  
         trace_x86_fpu_copy_src(src_fpu);
         trace_x86_fpu_copy_dst(dst_fpu);
@@ -237,45 +224,48 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
   * Activate the current task's in-memory FPU context,
   * if it has not been used before:
   */
-void fpu__activate_curr(struct fpu *fpu)
+void fpu__initialize(struct fpu *fpu)
  {
         WARN_ON_FPU(fpu != &current->thread.fpu);
  
-       if (!fpu->fpstate_active) {
+       if (!fpu->initialized) {
                 fpstate_init(&fpu->state);
                 trace_x86_fpu_init_state(fpu);
  
                 trace_x86_fpu_activate_state(fpu);
                 /* Safe to do for the current task: */
-               fpu->fpstate_active = 1;
+               fpu->initialized = 1;
         }
  }
-EXPORT_SYMBOL_GPL(fpu__activate_curr);
+EXPORT_SYMBOL_GPL(fpu__initialize);
  
  /*
   * This function must be called before we read a task's fpstate.
   *
- * If the task has not used the FPU before then initialize its
- * fpstate.
+ * There's two cases where this gets called:
+ *
+ * - for the current task (when coredumping), in which case we have
+ *   to save the latest FPU registers into the fpstate,
+ *
+ * - or it's called for stopped tasks (ptrace), in which case the
+ *   registers were already saved by the context-switch code when
+ *   the task scheduled out - we only have to initialize the registers
+ *   if they've never been initialized.
   *
   * If the task has used the FPU before then save it.
   */
-void fpu__activate_fpstate_read(struct fpu *fpu)
+void fpu__prepare_read(struct fpu *fpu)
  {
-       /*
-        * If fpregs are active (in the current CPU), then
-        * copy them to the fpstate:
-        */
-       if (fpu->fpregs_active) {
+       if (fpu == &current->thread.fpu) {
                 fpu__save(fpu);
         } else {
-               if (!fpu->fpstate_active) {
+               if (!fpu->initialized) {
                         fpstate_init(&fpu->state);
                         trace_x86_fpu_init_state(fpu);
  
                         trace_x86_fpu_activate_state(fpu);
                         /* Safe to do for current and for stopped child tasks: */
-                       fpu->fpstate_active = 1;
+                       fpu->initialized = 1;
                 }
         }
  }
@@ -283,17 +273,17 @@ void fpu__activate_fpstate_read(struct fpu *fpu)
  /*
   * This function must be called before we write a task's fpstate.
   *
- * If the task has used the FPU before then unlazy it.
+ * If the task has used the FPU before then invalidate any cached FPU registers.
   * If the task has not used the FPU before then initialize its fpstate.
   *
   * After this function call, after registers in the fpstate are
   * modified and the child task has woken up, the child task will
   * restore the modified FPU state from the modified context. If we
- * didn't clear its lazy status here then the lazy in-registers
+ * didn't clear its cached status here then the cached in-registers
   * state pending on its former CPU could be restored, corrupting
   * the modifications.
   */
-void fpu__activate_fpstate_write(struct fpu *fpu)
+void fpu__prepare_write(struct fpu *fpu)
  {
         /*
          * Only stopped child tasks can be used to modify the FPU
@@ -301,8 +291,8 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
          */
         WARN_ON_FPU(fpu == &current->thread.fpu);
  
-       if (fpu->fpstate_active) {
-               /* Invalidate any lazy state: */
+       if (fpu->initialized) {
+               /* Invalidate any cached state: */
                 __fpu_invalidate_fpregs_state(fpu);
         } else {
                 fpstate_init(&fpu->state);
@@ -310,73 +300,10 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
  
                 trace_x86_fpu_activate_state(fpu);
                 /* Safe to do for stopped child tasks: */
-               fpu->fpstate_active = 1;
+               fpu->initialized = 1;
         }
  }
  
-/*
- * This function must be called before we write the current
- * task's fpstate.
- *
- * This call gets the current FPU register state and moves
- * it in to the 'fpstate'.  Preemption is disabled so that
- * no writes to the 'fpstate' can occur from context
- * swiches.
- *
- * Must be followed by a fpu__current_fpstate_write_end().
- */
-void fpu__current_fpstate_write_begin(void)
-{
-       struct fpu *fpu = &current->thread.fpu;
-
-       /*
-        * Ensure that the context-switching code does not write
-        * over the fpstate while we are doing our update.
-        */
-       preempt_disable();
-
-       /*
-        * Move the fpregs in to the fpu's 'fpstate'.
-        */
-       fpu__activate_fpstate_read(fpu);
-
-       /*
-        * The caller is about to write to 'fpu'.  Ensure that no
-        * CPU thinks that its fpregs match the fpstate.  This
-        * ensures we will not be lazy and skip a XRSTOR in the
-        * future.
-        */
-       __fpu_invalidate_fpregs_state(fpu);
-}
-
-/*
- * This function must be paired with fpu__current_fpstate_write_begin()
- *
- * This will ensure that the modified fpstate gets placed back in
- * the fpregs if necessary.
- *
- * Note: This function may be called whether or not an _actual_
- * write to the fpstate occurred.
- */
-void fpu__current_fpstate_write_end(void)
-{
-       struct fpu *fpu = &current->thread.fpu;
-
-       /*
-        * 'fpu' now has an updated copy of the state, but the
-        * registers may still be out of date.  Update them with
-        * an XRSTOR if they are active.
-        */
-       if (fpregs_active())
-               copy_kernel_to_fpregs(&fpu->state);
-
-       /*
-        * Our update is done and the fpregs/fpstate are in sync
-        * if necessary.  Context switches can happen again.
-        */
-       preempt_enable();
-}
-
  /*
   * 'fpu__restore()' is called to copy FPU registers from
   * the FPU fpstate to the live hw registers and to activate
@@ -389,7 +316,7 @@ void fpu__current_fpstate_write_end(void)
   */
  void fpu__restore(struct fpu *fpu)
  {
-       fpu__activate_curr(fpu);
+       fpu__initialize(fpu);
  
         /* Avoid __kernel_fpu_begin() right after fpregs_activate() */
         kernel_fpu_disable();
@@ -414,15 +341,17 @@ void fpu__drop(struct fpu *fpu)
  {
         preempt_disable();
  
-       if (fpu->fpregs_active) {
-               /* Ignore delayed exceptions from user space */
-               asm volatile("1: fwait\n"
-                            "2:\n"
-                            _ASM_EXTABLE(1b, 2b));
-               fpregs_deactivate(fpu);
+       if (fpu == &current->thread.fpu) {
+               if (fpu->initialized) {
+                       /* Ignore delayed exceptions from user space */
+                       asm volatile("1: fwait\n"
+                                    "2:\n"
+                                    _ASM_EXTABLE(1b, 2b));
+                       fpregs_deactivate(fpu);
+               }
         }
  
-       fpu->fpstate_active = 0;
+       fpu->initialized = 0;
  
         trace_x86_fpu_dropped(fpu);
  
@@ -462,9 +391,11 @@ void fpu__clear(struct fpu *fpu)
          * Make sure fpstate is cleared and initialized.
          */
         if (static_cpu_has(X86_FEATURE_FPU)) {
-               fpu__activate_curr(fpu);
+               preempt_disable();
+               fpu__initialize(fpu);
                 user_fpu_begin();
                 copy_init_fpstate_to_fpregs();
+               preempt_enable();
         }
  }
  
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c

index d5d44c4..7affb7e 100644 (file)
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -240,7 +240,7 @@ static void __init fpu__init_system_ctx_switch(void)
         WARN_ON_FPU(!on_boot_cpu);
         on_boot_cpu = 0;
  
-       WARN_ON_FPU(current->thread.fpu.fpstate_active);
+       WARN_ON_FPU(current->thread.fpu.initialized);
  }
  
  /*
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c

index b188b16..3ea1513 100644 (file)
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -16,14 +16,14 @@ int regset_fpregs_active(struct task_struct *target, const struct user_regset *r
  {
         struct fpu *target_fpu = &target->thread.fpu;
  
-       return target_fpu->fpstate_active ? regset->n : 0;
+       return target_fpu->initialized ? regset->n : 0;
  }
  
  int regset_xregset_fpregs_active(struct task_struct *target, const struct user_regset *regset)
  {
         struct fpu *target_fpu = &target->thread.fpu;
  
-       if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->fpstate_active)
+       if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->initialized)
                 return regset->n;
         else
                 return 0;
@@ -38,7 +38,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
         if (!boot_cpu_has(X86_FEATURE_FXSR))
                 return -ENODEV;
  
-       fpu__activate_fpstate_read(fpu);
+       fpu__prepare_read(fpu);
         fpstate_sanitize_xstate(fpu);
  
         return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
@@ -55,7 +55,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
         if (!boot_cpu_has(X86_FEATURE_FXSR))
                 return -ENODEV;
  
-       fpu__activate_fpstate_write(fpu);
+       fpu__prepare_write(fpu);
         fpstate_sanitize_xstate(fpu);
  
         ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
@@ -89,10 +89,13 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
  
         xsave = &fpu->state.xsave;
  
-       fpu__activate_fpstate_read(fpu);
+       fpu__prepare_read(fpu);
  
         if (using_compacted_format()) {
-               ret = copyout_from_xsaves(pos, count, kbuf, ubuf, xsave);
+               if (kbuf)
+                       ret = copy_xstate_to_kernel(kbuf, xsave, pos, count);
+               else
+                       ret = copy_xstate_to_user(ubuf, xsave, pos, count);
         } else {
                 fpstate_sanitize_xstate(fpu);
                 /*
@@ -129,28 +132,29 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
  
         xsave = &fpu->state.xsave;
  
-       fpu__activate_fpstate_write(fpu);
+       fpu__prepare_write(fpu);
  
-       if (boot_cpu_has(X86_FEATURE_XSAVES))
-               ret = copyin_to_xsaves(kbuf, ubuf, xsave);
-       else
+       if (using_compacted_format()) {
+               if (kbuf)
+                       ret = copy_kernel_to_xstate(xsave, kbuf);
+               else
+                       ret = copy_user_to_xstate(xsave, ubuf);
+       } else {
                 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
-
-       /*
-        * In case of failure, mark all states as init:
-        */
-       if (ret)
-               fpstate_init(&fpu->state);
+               if (!ret)
+                       ret = validate_xstate_header(&xsave->header);
+       }
  
         /*
          * mxcsr reserved bits must be masked to zero for security reasons.
          */
         xsave->i387.mxcsr &= mxcsr_feature_mask;
-       xsave->header.xfeatures &= xfeatures_mask;
+
         /*
-        * These bits must be zero.
+        * In case of failure, mark all states as init:
          */
-       memset(&xsave->header.reserved, 0, 48);
+       if (ret)
+               fpstate_init(&fpu->state);
  
         return ret;
  }
@@ -299,7 +303,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
         struct fpu *fpu = &target->thread.fpu;
         struct user_i387_ia32_struct env;
  
-       fpu__activate_fpstate_read(fpu);
+       fpu__prepare_read(fpu);
  
         if (!boot_cpu_has(X86_FEATURE_FPU))
                 return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
@@ -329,7 +333,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
         struct user_i387_ia32_struct env;
         int ret;
  
-       fpu__activate_fpstate_write(fpu);
+       fpu__prepare_write(fpu);
         fpstate_sanitize_xstate(fpu);
  
         if (!boot_cpu_has(X86_FEATURE_FPU))
@@ -369,7 +373,7 @@ int dump_fpu(struct pt_regs *regs, struct user_i387_struct *ufpu)
         struct fpu *fpu = &tsk->thread.fpu;
         int fpvalid;
  
-       fpvalid = fpu->fpstate_active;
+       fpvalid = fpu->initialized;
         if (fpvalid)
                 fpvalid = !fpregs_get(tsk, NULL,
                                       0, sizeof(struct user_i387_ia32_struct),
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c

index 83c23c2..fb639e7 100644 (file)
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -155,7 +155,8 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
   */
  int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
  {
-       struct xregs_state *xsave = &current->thread.fpu.state.xsave;
+       struct fpu *fpu = &current->thread.fpu;
+       struct xregs_state *xsave = &fpu->state.xsave;
         struct task_struct *tsk = current;
         int ia32_fxstate = (buf != buf_fx);
  
@@ -170,13 +171,13 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
                         sizeof(struct user_i387_ia32_struct), NULL,
                         (struct _fpstate_32 __user *) buf) ? -1 : 1;
  
-       if (fpregs_active() || using_compacted_format()) {
+       if (fpu->initialized || using_compacted_format()) {
                 /* Save the live register state to the user directly. */
                 if (copy_fpregs_to_sigframe(buf_fx))
                         return -1;
                 /* Update the thread's fxstate to save the fsave header. */
                 if (ia32_fxstate)
-                       copy_fxregs_to_kernel(&tsk->thread.fpu);
+                       copy_fxregs_to_kernel(fpu);
         } else {
                 /*
                  * It is a *bug* if kernel uses compacted-format for xsave
@@ -189,7 +190,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
                         return -1;
                 }
  
-               fpstate_sanitize_xstate(&tsk->thread.fpu);
+               fpstate_sanitize_xstate(fpu);
                 if (__copy_to_user(buf_fx, xsave, fpu_user_xstate_size))
                         return -1;
         }
@@ -213,8 +214,11 @@ sanitize_restored_xstate(struct task_struct *tsk,
         struct xstate_header *header = &xsave->header;
  
         if (use_xsave()) {
-               /* These bits must be zero. */
-               memset(header->reserved, 0, 48);
+               /*
+                * Note: we don't need to zero the reserved bits in the
+                * xstate_header here because we either didn't copy them at all,
+                * or we checked earlier that they aren't set.
+                */
  
                 /*
                  * Init the state that is not present in the memory
@@ -223,7 +227,7 @@ sanitize_restored_xstate(struct task_struct *tsk,
                 if (fx_only)
                         header->xfeatures = XFEATURE_MASK_FPSSE;
                 else
-                       header->xfeatures &= (xfeatures_mask & xfeatures);
+                       header->xfeatures &= xfeatures;
         }
  
         if (use_fxsr()) {
@@ -279,7 +283,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
         if (!access_ok(VERIFY_READ, buf, size))
                 return -EACCES;
  
-       fpu__activate_curr(fpu);
+       fpu__initialize(fpu);
  
         if (!static_cpu_has(X86_FEATURE_FPU))
                 return fpregs_soft_set(current, NULL,
@@ -307,28 +311,29 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
                 /*
                  * For 32-bit frames with fxstate, copy the user state to the
                  * thread's fpu state, reconstruct fxstate from the fsave
-                * header. Sanitize the copied state etc.
+                * header. Validate and sanitize the copied state.
                  */
                 struct fpu *fpu = &tsk->thread.fpu;
                 struct user_i387_ia32_struct env;
                 int err = 0;
  
                 /*
-                * Drop the current fpu which clears fpu->fpstate_active. This ensures
+                * Drop the current fpu which clears fpu->initialized. This ensures
                  * that any context-switch during the copy of the new state,
                  * avoids the intermediate state from getting restored/saved.
                  * Thus avoiding the new restored state from getting corrupted.
                  * We will be ready to restore/save the state only after
-                * fpu->fpstate_active is again set.
+                * fpu->initialized is again set.
                  */
                 fpu__drop(fpu);
  
                 if (using_compacted_format()) {
-                       err = copyin_to_xsaves(NULL, buf_fx,
-                                              &fpu->state.xsave);
+                       err = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
                 } else {
-                       err = __copy_from_user(&fpu->state.xsave,
-                                              buf_fx, state_size);
+                       err = __copy_from_user(&fpu->state.xsave, buf_fx, state_size);
+
+                       if (!err && state_size > offsetof(struct xregs_state, header))
+                               err = validate_xstate_header(&fpu->state.xsave.header);
                 }
  
                 if (err || __copy_from_user(&env, buf, sizeof(env))) {
@@ -339,7 +344,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
                         sanitize_restored_xstate(tsk, &env, xfeatures, fx_only);
                 }
  
-               fpu->fpstate_active = 1;
+               fpu->initialized = 1;
                 preempt_disable();
                 fpu__restore(fpu);
                 preempt_enable();
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c

index c24ac1e..f1d5476 100644 (file)
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -483,6 +483,30 @@ int using_compacted_format(void)
         return boot_cpu_has(X86_FEATURE_XSAVES);
  }
  
+/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
+int validate_xstate_header(const struct xstate_header *hdr)
+{
+       /* No unknown or supervisor features may be set */
+       if (hdr->xfeatures & (~xfeatures_mask | XFEATURE_MASK_SUPERVISOR))
+               return -EINVAL;
+
+       /* Userspace must use the uncompacted format */
+       if (hdr->xcomp_bv)
+               return -EINVAL;
+
+       /*
+        * If 'reserved' is shrunken to add a new field, make sure to validate
+        * that new field here!
+        */
+       BUILD_BUG_ON(sizeof(hdr->reserved) != 48);
+
+       /* No reserved bits may be set */
+       if (memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved)))
+               return -EINVAL;
+
+       return 0;
+}
+
  static void __xstate_dump_leaves(void)
  {
         int i;
@@ -867,7 +891,7 @@ const void *get_xsave_field_ptr(int xsave_state)
  {
         struct fpu *fpu = &current->thread.fpu;
  
-       if (!fpu->fpstate_active)
+       if (!fpu->initialized)
                 return NULL;
         /*
          * fpu__save() takes the CPU's xstate registers
@@ -920,39 +944,130 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
  }
  #endif /* ! CONFIG_ARCH_HAS_PKEYS */
  
+/*
+ * Weird legacy quirk: SSE and YMM states store information in the
+ * MXCSR and MXCSR_FLAGS fields of the FP area. That means if the FP
+ * area is marked as unused in the xfeatures header, we need to copy
+ * MXCSR and MXCSR_FLAGS if either SSE or YMM are in use.
+ */
+static inline bool xfeatures_mxcsr_quirk(u64 xfeatures)
+{
+       if (!(xfeatures & (XFEATURE_MASK_SSE|XFEATURE_MASK_YMM)))
+               return false;
+
+       if (xfeatures & XFEATURE_MASK_FP)
+               return false;
+
+       return true;
+}
+
  /*
   * This is similar to user_regset_copyout(), but will not add offset to
   * the source data pointer or increment pos, count, kbuf, and ubuf.
   */
-static inline int xstate_copyout(unsigned int pos, unsigned int count,
-                                void *kbuf, void __user *ubuf,
-                                const void *data, const int start_pos,
-                                const int end_pos)
+static inline void
+__copy_xstate_to_kernel(void *kbuf, const void *data,
+                       unsigned int offset, unsigned int size, unsigned int size_total)
  {
-       if ((count == 0) || (pos < start_pos))
-               return 0;
+       if (offset < size_total) {
+               unsigned int copy = min(size, size_total - offset);
  
-       if (end_pos < 0 || pos < end_pos) {
-               unsigned int copy = (end_pos < 0 ? count : min(count, end_pos - pos));
+               memcpy(kbuf + offset, data, copy);
+       }
+}
  
-               if (kbuf) {
-                       memcpy(kbuf + pos, data, copy);
-               } else {
-                       if (__copy_to_user(ubuf + pos, data, copy))
-                               return -EFAULT;
+/*
+ * Convert from kernel XSAVES compacted format to standard format and copy
+ * to a kernel-space ptrace buffer.
+ *
+ * It supports partial copy but pos always starts from zero. This is called
+ * from xstateregs_get() and there we check the CPU has XSAVES.
+ */
+int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total)
+{
+       unsigned int offset, size;
+       struct xstate_header header;
+       int i;
+
+       /*
+        * Currently copy_regset_to_user() starts from pos 0:
+        */
+       if (unlikely(offset_start != 0))
+               return -EFAULT;
+
+       /*
+        * The destination is a ptrace buffer; we put in only user xstates:
+        */
+       memset(&header, 0, sizeof(header));
+       header.xfeatures = xsave->header.xfeatures;
+       header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR;
+
+       /*
+        * Copy xregs_state->header:
+        */
+       offset = offsetof(struct xregs_state, header);
+       size = sizeof(header);
+
+       __copy_xstate_to_kernel(kbuf, &header, offset, size, size_total);
+
+       for (i = 0; i < XFEATURE_MAX; i++) {
+               /*
+                * Copy only in-use xstates:
+                */
+               if ((header.xfeatures >> i) & 1) {
+                       void *src = __raw_xsave_addr(xsave, 1 << i);
+
+                       offset = xstate_offsets[i];
+                       size = xstate_sizes[i];
+
+                       /* The next component has to fit fully into the output buffer: */
+                       if (offset + size > size_total)
+                               break;
+
+                       __copy_xstate_to_kernel(kbuf, src, offset, size, size_total);
                 }
+
+       }
+
+       if (xfeatures_mxcsr_quirk(header.xfeatures)) {
+               offset = offsetof(struct fxregs_state, mxcsr);
+               size = MXCSR_AND_FLAGS_SIZE;
+               __copy_xstate_to_kernel(kbuf, &xsave->i387.mxcsr, offset, size, size_total);
+       }
+
+       /*
+        * Fill xsave->i387.sw_reserved value for ptrace frame:
+        */
+       offset = offsetof(struct fxregs_state, sw_reserved);
+       size = sizeof(xstate_fx_sw_bytes);
+
+       __copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, size_total);
+
+       return 0;
+}
+
+static inline int
+__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int offset, unsigned int size, unsigned int size_total)
+{
+       if (!size)
+               return 0;
+
+       if (offset < size_total) {
+               unsigned int copy = min(size, size_total - offset);
+
+               if (__copy_to_user(ubuf + offset, data, copy))
+                       return -EFAULT;
         }
         return 0;
  }
  
  /*
   * Convert from kernel XSAVES compacted format to standard format and copy
- * to a ptrace buffer. It supports partial copy but pos always starts from
+ * to a user-space buffer. It supports partial copy but pos always starts from
   * zero. This is called from xstateregs_get() and there we check the CPU
   * has XSAVES.
   */
-int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
-                       void __user *ubuf, struct xregs_state *xsave)
+int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total)
  {
         unsigned int offset, size;
         int ret, i;
@@ -961,7 +1076,7 @@ int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
         /*
          * Currently copy_regset_to_user() starts from pos 0:
          */
-       if (unlikely(pos != 0))
+       if (unlikely(offset_start != 0))
                 return -EFAULT;
  
         /*
@@ -977,8 +1092,7 @@ int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
         offset = offsetof(struct xregs_state, header);
         size = sizeof(header);
  
-       ret = xstate_copyout(offset, size, kbuf, ubuf, &header, 0, count);
-
+       ret = __copy_xstate_to_user(ubuf, &header, offset, size, size_total);
         if (ret)
                 return ret;
  
@@ -992,25 +1106,30 @@ int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
                         offset = xstate_offsets[i];
                         size = xstate_sizes[i];
  
-                       ret = xstate_copyout(offset, size, kbuf, ubuf, src, 0, count);
+                       /* The next component has to fit fully into the output buffer: */
+                       if (offset + size > size_total)
+                               break;
  
+                       ret = __copy_xstate_to_user(ubuf, src, offset, size, size_total);
                         if (ret)
                                 return ret;
-
-                       if (offset + size >= count)
-                               break;
                 }
  
         }
  
+       if (xfeatures_mxcsr_quirk(header.xfeatures)) {
+               offset = offsetof(struct fxregs_state, mxcsr);
+               size = MXCSR_AND_FLAGS_SIZE;
+               __copy_xstate_to_user(ubuf, &xsave->i387.mxcsr, offset, size, size_total);
+       }
+
         /*
          * Fill xsave->i387.sw_reserved value for ptrace frame:
          */
         offset = offsetof(struct fxregs_state, sw_reserved);
         size = sizeof(xstate_fx_sw_bytes);
  
-       ret = xstate_copyout(offset, size, kbuf, ubuf, xstate_fx_sw_bytes, 0, count);
-
+       ret = __copy_xstate_to_user(ubuf, xstate_fx_sw_bytes, offset, size, size_total);
         if (ret)
                 return ret;
  
@@ -1018,55 +1137,98 @@ int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
  }
  
  /*
- * Convert from a ptrace standard-format buffer to kernel XSAVES format
- * and copy to the target thread. This is called from xstateregs_set() and
- * there we check the CPU has XSAVES and a whole standard-sized buffer
- * exists.
+ * Convert from a ptrace standard-format kernel buffer to kernel XSAVES format
+ * and copy to the target thread. This is called from xstateregs_set().
   */
-int copyin_to_xsaves(const void *kbuf, const void __user *ubuf,
-                    struct xregs_state *xsave)
+int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
  {
         unsigned int offset, size;
         int i;
-       u64 xfeatures;
-       u64 allowed_features;
+       struct xstate_header hdr;
  
         offset = offsetof(struct xregs_state, header);
-       size = sizeof(xfeatures);
+       size = sizeof(hdr);
  
-       if (kbuf) {
-               memcpy(&xfeatures, kbuf + offset, size);
-       } else {
-               if (__copy_from_user(&xfeatures, ubuf + offset, size))
-                       return -EFAULT;
+       memcpy(&hdr, kbuf + offset, size);
+
+       if (validate_xstate_header(&hdr))
+               return -EINVAL;
+
+       for (i = 0; i < XFEATURE_MAX; i++) {
+               u64 mask = ((u64)1 << i);
+
+               if (hdr.xfeatures & mask) {
+                       void *dst = __raw_xsave_addr(xsave, 1 << i);
+
+                       offset = xstate_offsets[i];
+                       size = xstate_sizes[i];
+
+                       memcpy(dst, kbuf + offset, size);
+               }
+       }
+
+       if (xfeatures_mxcsr_quirk(hdr.xfeatures)) {
+               offset = offsetof(struct fxregs_state, mxcsr);
+               size = MXCSR_AND_FLAGS_SIZE;
+               memcpy(&xsave->i387.mxcsr, kbuf + offset, size);
         }
  
         /*
-        * Reject if the user sets any disabled or supervisor features:
+        * The state that came in from userspace was user-state only.
+        * Mask all the user states out of 'xfeatures':
+        */
+       xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR;
+
+       /*
+        * Add back in the features that came in from userspace:
          */
-       allowed_features = xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR;
+       xsave->header.xfeatures |= hdr.xfeatures;
  
-       if (xfeatures & ~allowed_features)
+       return 0;
+}
+
+/*
+ * Convert from a ptrace or sigreturn standard-format user-space buffer to
+ * kernel XSAVES format and copy to the target thread. This is called from
+ * xstateregs_set(), as well as potentially from the sigreturn() and
+ * rt_sigreturn() system calls.
+ */
+int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
+{
+       unsigned int offset, size;
+       int i;
+       struct xstate_header hdr;
+
+       offset = offsetof(struct xregs_state, header);
+       size = sizeof(hdr);
+
+       if (__copy_from_user(&hdr, ubuf + offset, size))
+               return -EFAULT;
+
+       if (validate_xstate_header(&hdr))
                 return -EINVAL;
  
         for (i = 0; i < XFEATURE_MAX; i++) {
                 u64 mask = ((u64)1 << i);
  
-               if (xfeatures & mask) {
+               if (hdr.xfeatures & mask) {
                         void *dst = __raw_xsave_addr(xsave, 1 << i);
  
                         offset = xstate_offsets[i];
                         size = xstate_sizes[i];
  
-                       if (kbuf) {
-                               memcpy(dst, kbuf + offset, size);
-                       } else {
-                               if (__copy_from_user(dst, ubuf + offset, size))
-                                       return -EFAULT;
-                       }
+                       if (__copy_from_user(dst, ubuf + offset, size))
+                               return -EFAULT;
                 }
         }
  
+       if (xfeatures_mxcsr_quirk(hdr.xfeatures)) {
+               offset = offsetof(struct fxregs_state, mxcsr);
+               size = MXCSR_AND_FLAGS_SIZE;
+               if (__copy_from_user(&xsave->i387.mxcsr, ubuf + offset, size))
+                       return -EFAULT;
+       }
+
         /*
          * The state that came in from userspace was user-state only.
          * Mask all the user states out of 'xfeatures':
@@ -1076,7 +1238,7 @@ int copyin_to_xsaves(const void *kbuf, const void __user *ubuf,
         /*
          * Add back in the features that came in from userspace:
          */
-       xsave->header.xfeatures |= xfeatures;
+       xsave->header.xfeatures |= hdr.xfeatures;
  
         return 0;
  }
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c

index aa60a08..e675704 100644 (file)
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -140,7 +140,8 @@ void kvm_async_pf_task_wait(u32 token)
  
         n.token = token;
         n.cpu = smp_processor_id();
-       n.halted = is_idle_task(current) || preempt_count() > 1;
+       n.halted = is_idle_task(current) || preempt_count() > 1 ||
+                  rcu_preempt_depth();
         init_swait_queue_head(&n.wq);
         hlist_add_head(&n.link, &b->list);
         raw_spin_unlock(&b->lock);
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c

index e044423..4e188fd 100644 (file)
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -263,7 +263,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
                 sp = (unsigned long) ka->sa.sa_restorer;
         }
  
-       if (fpu->fpstate_active) {
+       if (fpu->initialized) {
                 sp = fpu__alloc_mathframe(sp, IS_ENABLED(CONFIG_X86_32),
                                           &buf_fx, &math_size);
                 *fpstate = (void __user *)sp;
@@ -279,7 +279,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
                 return (void __user *)-1L;
  
         /* save i387 and extended state */
-       if (fpu->fpstate_active &&
+       if (fpu->initialized &&
             copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size) < 0)
                 return (void __user *)-1L;
  
@@ -755,7 +755,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
                 /*
                  * Ensure the signal handler starts with the new fpu state.
                  */
-               if (fpu->fpstate_active)
+               if (fpu->initialized)
                         fpu__clear(fpu);
         }
         signal_setup_done(failed, ksig, stepping);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c

index 0854ff1..ad59edd 100644 (file)
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -232,12 +232,6 @@ static void notrace start_secondary(void *unused)
          */
         if (boot_cpu_has(X86_FEATURE_PCID))
                 __write_cr4(__read_cr4() | X86_CR4_PCIDE);
-       cpu_init();
-       x86_cpuinit.early_percpu_clock_init();
-       preempt_disable();
-       smp_callin();
-
-       enable_start_cpu0 = 0;
  
  #ifdef CONFIG_X86_32
         /* switch away from the initial page table */
@@ -245,6 +239,13 @@ static void notrace start_secondary(void *unused)
         __flush_tlb_all();
  #endif
  
+       cpu_init();
+       x86_cpuinit.early_percpu_clock_init();
+       preempt_disable();
+       smp_callin();
+
+       enable_start_cpu0 = 0;
+
         /* otherwise gcc will move up smp_processor_id before the cpu_init */
         barrier();
         /*
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c

index 16bf665..a36254c 100644 (file)
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -4102,10 +4102,12 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt)
                 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
                 if (efer & EFER_LMA) {
                         u64 maxphyaddr;
-                       u32 eax = 0x80000008;
+                       u32 eax, ebx, ecx, edx;
  
-                       if (ctxt->ops->get_cpuid(ctxt, &eax, NULL, NULL,
-                                                NULL, false))
+                       eax = 0x80000008;
+                       ecx = 0;
+                       if (ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx,
+                                                &edx, false))
                                 maxphyaddr = eax & 0xff;
                         else
                                 maxphyaddr = 36;
@@ -5296,7 +5298,6 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
  
  static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
  {
-       register void *__sp asm(_ASM_SP);
         ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
  
         if (!(ctxt->d & ByteOp))
@@ -5304,7 +5305,7 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
  
         asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
             : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
-             [fastop]"+S"(fop), "+r"(__sp)
+             [fastop]"+S"(fop), ASM_CALL_CONSTRAINT
             : "c"(ctxt->src2.val));
  
         ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c

index 06c0c6d..a2b804e 100644 (file)
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -200,6 +200,8 @@ struct loaded_vmcs {
         int cpu;
         bool launched;
         bool nmi_known_unmasked;
+       unsigned long vmcs_host_cr3;    /* May not match real cr3 */
+       unsigned long vmcs_host_cr4;    /* May not match real cr4 */
         struct list_head loaded_vmcss_on_cpu_link;
  };
  
@@ -600,8 +602,6 @@ struct vcpu_vmx {
                 int           gs_ldt_reload_needed;
                 int           fs_reload_needed;
                 u64           msr_host_bndcfgs;
-               unsigned long vmcs_host_cr3;    /* May not match real cr3 */
-               unsigned long vmcs_host_cr4;    /* May not match real cr4 */
         } host_state;
         struct {
                 int vm86_active;
@@ -2202,46 +2202,44 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
         struct pi_desc old, new;
         unsigned int dest;
  
-       if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
-               !irq_remapping_cap(IRQ_POSTING_CAP)  ||
-               !kvm_vcpu_apicv_active(vcpu))
+       /*
+        * In case of hot-plug or hot-unplug, we may have to undo
+        * vmx_vcpu_pi_put even if there is no assigned device.  And we
+        * always keep PI.NDST up to date for simplicity: it makes the
+        * code easier, and CPU migration is not a fast path.
+        */
+       if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
+               return;
+
+       /*
+        * First handle the simple case where no cmpxchg is necessary; just
+        * allow posting non-urgent interrupts.
+        *
+        * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
+        * PI.NDST: pi_post_block will do it for us and the wakeup_handler
+        * expects the VCPU to be on the blocked_vcpu_list that matches
+        * PI.NDST.
+        */
+       if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR ||
+           vcpu->cpu == cpu) {
+               pi_clear_sn(pi_desc);
                 return;
+       }
  
+       /* The full case.  */
         do {
                 old.control = new.control = pi_desc->control;
  
-               /*
-                * If 'nv' field is POSTED_INTR_WAKEUP_VECTOR, there
-                * are two possible cases:
-                * 1. After running 'pre_block', context switch
-                *    happened. For this case, 'sn' was set in
-                *    vmx_vcpu_put(), so we need to clear it here.
-                * 2. After running 'pre_block', we were blocked,
-                *    and woken up by some other guy. For this case,
-                *    we don't need to do anything, 'pi_post_block'
-                *    will do everything for us. However, we cannot
-                *    check whether it is case #1 or case #2 here
-                *    (maybe, not needed), so we also clear sn here,
-                *    I think it is not a big deal.
-                */
-               if (pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR) {
-                       if (vcpu->cpu != cpu) {
-                               dest = cpu_physical_id(cpu);
-
-                               if (x2apic_enabled())
-                                       new.ndst = dest;
-                               else
-                                       new.ndst = (dest << 8) & 0xFF00;
-                       }
+               dest = cpu_physical_id(cpu);
  
-                       /* set 'NV' to 'notification vector' */
-                       new.nv = POSTED_INTR_VECTOR;
-               }
+               if (x2apic_enabled())
+                       new.ndst = dest;
+               else
+                       new.ndst = (dest << 8) & 0xFF00;
  
-               /* Allow posting non-urgent interrupts */
                 new.sn = 0;
-       } while (cmpxchg(&pi_desc->control, old.control,
-                       new.control) != old.control);
+       } while (cmpxchg64(&pi_desc->control, old.control,
+                          new.control) != old.control);
  }
  
  static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
@@ -5077,21 +5075,30 @@ static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
         int pi_vec = nested ? POSTED_INTR_NESTED_VECTOR : POSTED_INTR_VECTOR;
  
         if (vcpu->mode == IN_GUEST_MODE) {
-               struct vcpu_vmx *vmx = to_vmx(vcpu);
-
                 /*
-                * Currently, we don't support urgent interrupt,
-                * all interrupts are recognized as non-urgent
-                * interrupt, so we cannot post interrupts when
-                * 'SN' is set.
+                * The vector of interrupt to be delivered to vcpu had
+                * been set in PIR before this function.
+                *
+                * Following cases will be reached in this block, and
+                * we always send a notification event in all cases as
+                * explained below.
+                *
+                * Case 1: vcpu keeps in non-root mode. Sending a
+                * notification event posts the interrupt to vcpu.
                  *
-                * If the vcpu is in guest mode, it means it is
-                * running instead of being scheduled out and
-                * waiting in the run queue, and that's the only
-                * case when 'SN' is set currently, warning if
-                * 'SN' is set.
+                * Case 2: vcpu exits to root mode and is still
+                * runnable. PIR will be synced to vIRR before the
+                * next vcpu entry. Sending a notification event in
+                * this case has no effect, as vcpu is not in root
+                * mode.
+                *
+                * Case 3: vcpu exits to root mode and is blocked.
+                * vcpu_block() has already synced PIR to vIRR and
+                * never blocks vcpu if vIRR is not cleared. Therefore,
+                * a blocked vcpu here does not wait for any requested
+                * interrupts in PIR, and sending a notification event
+                * which has no effect is safe here.
                  */
-               WARN_ON_ONCE(pi_test_sn(&vmx->pi_desc));
  
                 apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec);
                 return true;
@@ -5169,12 +5176,12 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
          */
         cr3 = __read_cr3();
         vmcs_writel(HOST_CR3, cr3);             /* 22.2.3  FIXME: shadow tables */
-       vmx->host_state.vmcs_host_cr3 = cr3;
+       vmx->loaded_vmcs->vmcs_host_cr3 = cr3;
  
         /* Save the most likely value for this task's CR4 in the VMCS. */
         cr4 = cr4_read_shadow();
         vmcs_writel(HOST_CR4, cr4);                     /* 22.2.3, 22.2.5 */
-       vmx->host_state.vmcs_host_cr4 = cr4;
+       vmx->loaded_vmcs->vmcs_host_cr4 = cr4;
  
         vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS);  /* 22.2.4 */
  #ifdef CONFIG_X86_64
@@ -9036,7 +9043,6 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
  static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
  {
         u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
-       register void *__sp asm(_ASM_SP);
  
         if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
                         == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
@@ -9065,7 +9071,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
  #ifdef CONFIG_X86_64
                         [sp]"=&r"(tmp),
  #endif
-                       "+r"(__sp)
+                       ASM_CALL_CONSTRAINT
                         :
                         [entry]"r"(entry),
                         [ss]"i"(__KERNEL_DS),
@@ -9265,15 +9271,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
                 vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
  
         cr3 = __get_current_cr3_fast();
-       if (unlikely(cr3 != vmx->host_state.vmcs_host_cr3)) {
+       if (unlikely(cr3 != vmx->loaded_vmcs->vmcs_host_cr3)) {
                 vmcs_writel(HOST_CR3, cr3);
-               vmx->host_state.vmcs_host_cr3 = cr3;
+               vmx->loaded_vmcs->vmcs_host_cr3 = cr3;
         }
  
         cr4 = cr4_read_shadow();
-       if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) {
+       if (unlikely(cr4 != vmx->loaded_vmcs->vmcs_host_cr4)) {
                 vmcs_writel(HOST_CR4, cr4);
-               vmx->host_state.vmcs_host_cr4 = cr4;
+               vmx->loaded_vmcs->vmcs_host_cr4 = cr4;
         }
  
         /* When single-stepping over STI and MOV SS, we must clear the
@@ -9583,6 +9589,13 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
  
         vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
  
+       /*
+        * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR
+        * or POSTED_INTR_WAKEUP_VECTOR.
+        */
+       vmx->pi_desc.nv = POSTED_INTR_VECTOR;
+       vmx->pi_desc.sn = 1;
+
         return &vmx->vcpu;
  
  free_vmcs:
@@ -9831,7 +9844,8 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
  
         WARN_ON(!is_guest_mode(vcpu));
  
-       if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code)) {
+       if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) &&
+               !to_vmx(vcpu)->nested.nested_run_pending) {
                 vmcs12->vm_exit_intr_error_code = fault->error_code;
                 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
                                   PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
@@ -11696,6 +11710,37 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
         kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
  }
  
+static void __pi_post_block(struct kvm_vcpu *vcpu)
+{
+       struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+       struct pi_desc old, new;
+       unsigned int dest;
+
+       do {
+               old.control = new.control = pi_desc->control;
+               WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR,
+                    "Wakeup handler not enabled while the VCPU is blocked\n");
+
+               dest = cpu_physical_id(vcpu->cpu);
+
+               if (x2apic_enabled())
+                       new.ndst = dest;
+               else
+                       new.ndst = (dest << 8) & 0xFF00;
+
+               /* set 'NV' to 'notification vector' */
+               new.nv = POSTED_INTR_VECTOR;
+       } while (cmpxchg64(&pi_desc->control, old.control,
+                          new.control) != old.control);
+
+       if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) {
+               spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+               list_del(&vcpu->blocked_vcpu_list);
+               spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+               vcpu->pre_pcpu = -1;
+       }
+}
+
  /*
   * This routine does the following things for vCPU which is going
   * to be blocked if VT-d PI is enabled.
@@ -11711,7 +11756,6 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
   */
  static int pi_pre_block(struct kvm_vcpu *vcpu)
  {
-       unsigned long flags;
         unsigned int dest;
         struct pi_desc old, new;
         struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
@@ -11721,34 +11765,20 @@ static int pi_pre_block(struct kvm_vcpu *vcpu)
                 !kvm_vcpu_apicv_active(vcpu))
                 return 0;
  
-       vcpu->pre_pcpu = vcpu->cpu;
-       spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
-                         vcpu->pre_pcpu), flags);
-       list_add_tail(&vcpu->blocked_vcpu_list,
-                     &per_cpu(blocked_vcpu_on_cpu,
-                     vcpu->pre_pcpu));
-       spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock,
-                              vcpu->pre_pcpu), flags);
+       WARN_ON(irqs_disabled());
+       local_irq_disable();
+       if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) {
+               vcpu->pre_pcpu = vcpu->cpu;
+               spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+               list_add_tail(&vcpu->blocked_vcpu_list,
+                             &per_cpu(blocked_vcpu_on_cpu,
+                                      vcpu->pre_pcpu));
+               spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+       }
  
         do {
                 old.control = new.control = pi_desc->control;
  
-               /*
-                * We should not block the vCPU if
-                * an interrupt is posted for it.
-                */
-               if (pi_test_on(pi_desc) == 1) {
-                       spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
-                                         vcpu->pre_pcpu), flags);
-                       list_del(&vcpu->blocked_vcpu_list);
-                       spin_unlock_irqrestore(
-                                       &per_cpu(blocked_vcpu_on_cpu_lock,
-                                       vcpu->pre_pcpu), flags);
-                       vcpu->pre_pcpu = -1;
-
-                       return 1;
-               }
-
                 WARN((pi_desc->sn == 1),
                      "Warning: SN field of posted-interrupts "
                      "is set before blocking\n");
@@ -11770,10 +11800,15 @@ static int pi_pre_block(struct kvm_vcpu *vcpu)
  
                 /* set 'NV' to 'wakeup vector' */
                 new.nv = POSTED_INTR_WAKEUP_VECTOR;
-       } while (cmpxchg(&pi_desc->control, old.control,
-                       new.control) != old.control);
+       } while (cmpxchg64(&pi_desc->control, old.control,
+                          new.control) != old.control);
  
-       return 0;
+       /* We should not block the vCPU if an interrupt is posted for it.  */
+       if (pi_test_on(pi_desc) == 1)
+               __pi_post_block(vcpu);
+
+       local_irq_enable();
+       return (vcpu->pre_pcpu == -1);
  }
  
  static int vmx_pre_block(struct kvm_vcpu *vcpu)
@@ -11789,44 +11824,13 @@ static int vmx_pre_block(struct kvm_vcpu *vcpu)
  
  static void pi_post_block(struct kvm_vcpu *vcpu)
  {
-       struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
-       struct pi_desc old, new;
-       unsigned int dest;
-       unsigned long flags;
-
-       if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
-               !irq_remapping_cap(IRQ_POSTING_CAP)  ||
-               !kvm_vcpu_apicv_active(vcpu))
+       if (vcpu->pre_pcpu == -1)
                 return;
  
-       do {
-               old.control = new.control = pi_desc->control;
-
-               dest = cpu_physical_id(vcpu->cpu);
-
-               if (x2apic_enabled())
-                       new.ndst = dest;
-               else
-                       new.ndst = (dest << 8) & 0xFF00;
-
-               /* Allow posting non-urgent interrupts */
-               new.sn = 0;
-
-               /* set 'NV' to 'notification vector' */
-               new.nv = POSTED_INTR_VECTOR;
-       } while (cmpxchg(&pi_desc->control, old.control,
-                       new.control) != old.control);
-
-       if(vcpu->pre_pcpu != -1) {
-               spin_lock_irqsave(
-                       &per_cpu(blocked_vcpu_on_cpu_lock,
-                       vcpu->pre_pcpu), flags);
-               list_del(&vcpu->blocked_vcpu_list);
-               spin_unlock_irqrestore(
-                       &per_cpu(blocked_vcpu_on_cpu_lock,
-                       vcpu->pre_pcpu), flags);
-               vcpu->pre_pcpu = -1;
-       }
+       WARN_ON(irqs_disabled());
+       local_irq_disable();
+       __pi_post_block(vcpu);
+       local_irq_enable();
  }
  
  static void vmx_post_block(struct kvm_vcpu *vcpu)
@@ -11911,12 +11915,8 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
  
                 if (set)
                         ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
-               else {
-                       /* suppress notification event before unposting */
-                       pi_set_sn(vcpu_to_pi_desc(vcpu));
+               else
                         ret = irq_set_vcpu_affinity(host_irq, NULL);
-                       pi_clear_sn(vcpu_to_pi_desc(vcpu));
-               }
  
                 if (ret < 0) {
                         printk(KERN_INFO "%s: failed to update PI IRTE\n",
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index cd17b7d..03869eb 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7225,7 +7225,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
         int r;
         sigset_t sigsaved;
  
-       fpu__activate_curr(fpu);
+       fpu__initialize(fpu);
  
         if (vcpu->sigset_active)
                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c

index d4a7df2..220638a 100644 (file)
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -114,7 +114,7 @@ void math_emulate(struct math_emu_info *info)
         struct desc_struct code_descriptor;
         struct fpu *fpu = &current->thread.fpu;
  
-       fpu__activate_curr(fpu);
+       fpu__initialize(fpu);
  
  #ifdef RE_ENTRANT_CHECKING
         if (emulating) {
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c

index c076f71..c3521e2 100644 (file)
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -2,6 +2,7 @@
  #include <linux/uaccess.h>
  #include <linux/sched/debug.h>
  
+#include <asm/fpu/internal.h>
  #include <asm/traps.h>
  #include <asm/kdebug.h>
  
@@ -78,6 +79,29 @@ bool ex_handler_refcount(const struct exception_table_entry *fixup,
  }
  EXPORT_SYMBOL_GPL(ex_handler_refcount);
  
+/*
+ * Handler for when we fail to restore a task's FPU state.  We should never get
+ * here because the FPU state of a task using the FPU (task->thread.fpu.state)
+ * should always be valid.  However, past bugs have allowed userspace to set
+ * reserved bits in the XSAVE area using PTRACE_SETREGSET or sys_rt_sigreturn().
+ * These caused XRSTOR to fail when switching to the task, leaking the FPU
+ * registers of the task previously executing on the CPU.  Mitigate this class
+ * of vulnerability by restoring from the initial state (essentially, zeroing
+ * out all the FPU registers) if we can't restore from the task's FPU state.
+ */
+bool ex_handler_fprestore(const struct exception_table_entry *fixup,
+                         struct pt_regs *regs, int trapnr)
+{
+       regs->ip = ex_fixup_addr(fixup);
+
+       WARN_ONCE(1, "Bad FPU state detected at %pB, reinitializing FPU registers.",
+                 (void *)instruction_pointer(regs));
+
+       __copy_kernel_to_fpregs(&init_fpstate, -1);
+       return true;
+}
+EXPORT_SYMBOL_GPL(ex_handler_fprestore);
+
  bool ex_handler_ext(const struct exception_table_entry *fixup,
                    struct pt_regs *regs, int trapnr)
  {
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c

index b836a72..39567b5 100644 (file)
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -806,7 +806,6 @@ no_context(struct pt_regs *regs, unsigned long error_code,
         if (is_vmalloc_addr((void *)address) &&
             (((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
              address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
-               register void *__sp asm("rsp");
                 unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *);
                 /*
                  * We're likely to be running with very little stack space
@@ -821,7 +820,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
                 asm volatile ("movq %[stack], %%rsp\n\t"
                               "call handle_stack_overflow\n\t"
                               "1: jmp 1b"
-                             : "+r" (__sp)
+                             : ASM_CALL_CONSTRAINT
                               : "D" ("kernel stack overflow (page fault)"),
                                 "S" (regs), "d" (address),
                                 [stack] "rm" (stack));
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c

index 2dab69a..d7bc0ee 100644 (file)
--- a/arch/x86/mm/pkeys.c
+++ b/arch/x86/mm/pkeys.c
@@ -18,7 +18,6 @@
  
  #include <asm/cpufeature.h>             /* boot_cpu_has, ...            */
  #include <asm/mmu_context.h>            /* vma_pkey()                   */
-#include <asm/fpu/internal.h>           /* fpregs_active()              */
  
  int __execute_only_pkey(struct mm_struct *mm)
  {
@@ -45,7 +44,7 @@ int __execute_only_pkey(struct mm_struct *mm)
          */
         preempt_disable();
         if (!need_to_set_mm_pkey &&
-           fpregs_active() &&
+           current->thread.fpu.initialized &&
             !__pkru_allows_read(read_pkru(), execute_only_pkey)) {
                 preempt_enable();
                 return execute_only_pkey;
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c

index 1ab3821..93fe97c 100644 (file)
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -126,8 +126,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
          * isn't free.
          */
  #ifdef CONFIG_DEBUG_VM
-       if (WARN_ON_ONCE(__read_cr3() !=
-                        (__sme_pa(real_prev->pgd) | prev_asid))) {
+       if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) {
                 /*
                  * If we were to BUG here, we'd be very likely to kill
                  * the system so hard that we don't see the call trace.
@@ -172,7 +171,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
                          */
                         this_cpu_write(cpu_tlbstate.ctxs[prev_asid].tlb_gen,
                                        next_tlb_gen);
-                       write_cr3(__sme_pa(next->pgd) | prev_asid);
+                       write_cr3(build_cr3(next, prev_asid));
                         trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
                                         TLB_FLUSH_ALL);
                 }
@@ -216,12 +215,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
                 if (need_flush) {
                         this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
                         this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
-                       write_cr3(__sme_pa(next->pgd) | new_asid);
+                       write_cr3(build_cr3(next, new_asid));
                         trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
                                         TLB_FLUSH_ALL);
                 } else {
                         /* The new ASID is already up to date. */
-                       write_cr3(__sme_pa(next->pgd) | new_asid | CR3_NOFLUSH);
+                       write_cr3(build_cr3_noflush(next, new_asid));
                         trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
                 }
  
@@ -265,7 +264,7 @@ void initialize_tlbstate_and_flush(void)
                 !(cr4_read_shadow() & X86_CR4_PCIDE));
  
         /* Force ASID 0 and force a TLB flush. */
-       write_cr3(cr3 & ~CR3_PCID_MASK);
+       write_cr3(build_cr3(mm, 0));
  
         /* Reinitialize tlbstate. */
         this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c

index 58b09fc..71495f1 100644 (file)
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -2215,7 +2215,7 @@ static void __init xen_write_cr3_init(unsigned long cr3)
   * not the first page table in the page table pool.
   * Iterate through the initial page tables to find the real page table base.
   */
-static phys_addr_t xen_find_pt_base(pmd_t *pmd)
+static phys_addr_t __init xen_find_pt_base(pmd_t *pmd)
  {
         phys_addr_t pt_base, paddr;
         unsigned pmdidx;
diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h

index 30ee8c6..5b0027d 100644 (file)
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -208,11 +208,6 @@ struct mm_struct;
  /* Free all resources held by a thread. */
  #define release_thread(thread) do { } while(0)
  
-/* Copy and release all segment info associated with a VM */
-#define copy_segments(p, mm)   do { } while(0)
-#define release_segments(mm)   do { } while(0)
-#define forget_segments()      do { } while (0)
-
  extern unsigned long get_wchan(struct task_struct *p);
  
  #define KSTK_EIP(tsk)          (task_pt_regs(tsk)->pc)
diff --git a/block/blk-core.c b/block/blk-core.c

index aebe676..048be4a 100644 (file)
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -854,6 +854,9 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
  
         kobject_init(&q->kobj, &blk_queue_ktype);
  
+#ifdef CONFIG_BLK_DEV_IO_TRACE
+       mutex_init(&q->blk_trace_mutex);
+#endif
         mutex_init(&q->sysfs_lock);
         spin_lock_init(&q->__queue_lock);
  
diff --git a/block/bsg-lib.c b/block/bsg-lib.c

index c82408c..dbddff8 100644 (file)
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -154,7 +154,6 @@ static int bsg_prepare_job(struct device *dev, struct request *req)
  failjob_rls_rqst_payload:
         kfree(job->request_payload.sg_list);
  failjob_rls_job:
-       kfree(job);
         return -ENOMEM;
  }
  
diff --git a/block/partition-generic.c b/block/partition-generic.c

index 86e8fe1..88c555d 100644 (file)
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -112,7 +112,7 @@ ssize_t part_stat_show(struct device *dev,
                        struct device_attribute *attr, char *buf)
  {
         struct hd_struct *p = dev_to_part(dev);
-       struct request_queue *q = dev_to_disk(dev)->queue;
+       struct request_queue *q = part_to_disk(p)->queue;
         unsigned int inflight[2];
         int cpu;
  
diff --git a/crypto/af_alg.c b/crypto/af_alg.c

index ffa9f4c..337cf38 100644 (file)
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -619,14 +619,14 @@ void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst,
         struct af_alg_ctx *ctx = ask->private;
         struct af_alg_tsgl *sgl;
         struct scatterlist *sg;
-       unsigned int i, j;
+       unsigned int i, j = 0;
  
         while (!list_empty(&ctx->tsgl_list)) {
                 sgl = list_first_entry(&ctx->tsgl_list, struct af_alg_tsgl,
                                        list);
                 sg = sgl->sg;
  
-               for (i = 0, j = 0; i < sgl->cur; i++) {
+               for (i = 0; i < sgl->cur; i++) {
                         size_t plen = min_t(size_t, used, sg[i].length);
                         struct page *page = sg_page(sg + i);
  
diff --git a/crypto/drbg.c b/crypto/drbg.c

index 633a88e..7001839 100644 (file)
--- a/crypto/drbg.c
+++ b/crypto/drbg.c
@@ -1133,10 +1133,10 @@ static inline void drbg_dealloc_state(struct drbg_state *drbg)
  {
         if (!drbg)
                 return;
-       kzfree(drbg->V);
-       drbg->Vbuf = NULL;
-       kzfree(drbg->C);
-       drbg->Cbuf = NULL;
+       kzfree(drbg->Vbuf);
+       drbg->V = NULL;
+       kzfree(drbg->Cbuf);
+       drbg->C = NULL;
         kzfree(drbg->scratchpadbuf);
         drbg->scratchpadbuf = NULL;
         drbg->reseed_ctr = 0;
diff --git a/drivers/acpi/acpi_watchdog.c b/drivers/acpi/acpi_watchdog.c

index bf22c29..11b113f 100644 (file)
--- a/drivers/acpi/acpi_watchdog.c
+++ b/drivers/acpi/acpi_watchdog.c
@@ -66,7 +66,7 @@ void __init acpi_watchdog_init(void)
         for (i = 0; i < wdat->entries; i++) {
                 const struct acpi_generic_address *gas;
                 struct resource_entry *rentry;
-               struct resource res;
+               struct resource res = {};
                 bool found;
  
                 gas = &entries[i].register_region;
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c

index 077f9ba..3c3a37b 100644 (file)
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -743,17 +743,19 @@ static int ghes_proc(struct ghes *ghes)
         }
         ghes_do_proc(ghes, ghes->estatus);
  
+out:
+       ghes_clear_estatus(ghes);
+
+       if (rc == -ENOENT)
+               return rc;
+
         /*
          * GHESv2 type HEST entries introduce support for error acknowledgment,
          * so only acknowledge the error if this support is present.
          */
-       if (is_hest_type_generic_v2(ghes)) {
-               rc = ghes_ack_error(ghes->generic_v2);
-               if (rc)
-                       return rc;
-       }
-out:
-       ghes_clear_estatus(ghes);
+       if (is_hest_type_generic_v2(ghes))
+               return ghes_ack_error(ghes->generic_v2);
+
         return rc;
  }
  
diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c

index c1c2161..3fb8ff5 100644 (file)
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -908,11 +908,12 @@ struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode,
                                             struct fwnode_handle *child)
  {
         const struct acpi_device *adev = to_acpi_device_node(fwnode);
-       struct acpi_device *child_adev = NULL;
         const struct list_head *head;
         struct list_head *next;
  
         if (!child || is_acpi_device_node(child)) {
+               struct acpi_device *child_adev;
+
                 if (adev)
                         head = &adev->children;
                 else
@@ -922,8 +923,8 @@ struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode,
                         goto nondev;
  
                 if (child) {
-                       child_adev = to_acpi_device_node(child);
-                       next = child_adev->node.next;
+                       adev = to_acpi_device_node(child);
+                       next = adev->node.next;
                         if (next == head) {
                                 child = NULL;
                                 goto nondev;
@@ -941,8 +942,8 @@ struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode,
                 const struct acpi_data_node *data = to_acpi_data_node(fwnode);
                 struct acpi_data_node *dn;
  
-               if (child_adev)
-                       head = &child_adev->data.subnodes;
+               if (adev)
+                       head = &adev->data.subnodes;
                 else if (data)
                         head = &data->data.subnodes;
                 else
@@ -1293,3 +1294,16 @@ static int acpi_fwnode_graph_parse_endpoint(const struct fwnode_handle *fwnode,
  DECLARE_ACPI_FWNODE_OPS(acpi_device_fwnode_ops);
  DECLARE_ACPI_FWNODE_OPS(acpi_data_fwnode_ops);
  const struct fwnode_operations acpi_static_fwnode_ops;
+
+bool is_acpi_device_node(const struct fwnode_handle *fwnode)
+{
+       return !IS_ERR_OR_NULL(fwnode) &&
+               fwnode->ops == &acpi_device_fwnode_ops;
+}
+EXPORT_SYMBOL(is_acpi_device_node);
+
+bool is_acpi_data_node(const struct fwnode_handle *fwnode)
+{
+       return !IS_ERR_OR_NULL(fwnode) && fwnode->ops == &acpi_data_fwnode_ops;
+}
+EXPORT_SYMBOL(is_acpi_data_node);
diff --git a/drivers/base/dma-coherent.c b/drivers/base/dma-coherent.c

index a39b216..744f64f 100644 (file)
--- a/drivers/base/dma-coherent.c
+++ b/drivers/base/dma-coherent.c
@@ -348,16 +348,15 @@ static int rmem_dma_device_init(struct reserved_mem *rmem, struct device *dev)
         struct dma_coherent_mem *mem = rmem->priv;
         int ret;
  
-       if (!mem)
-               return -ENODEV;
-
-       ret = dma_init_coherent_memory(rmem->base, rmem->base, rmem->size,
-                                      DMA_MEMORY_EXCLUSIVE, &mem);
-
-       if (ret) {
-               pr_err("Reserved memory: failed to init DMA memory pool at %pa, size %ld MiB\n",
-                       &rmem->base, (unsigned long)rmem->size / SZ_1M);
-               return ret;
+       if (!mem) {
+               ret = dma_init_coherent_memory(rmem->base, rmem->base,
+                                              rmem->size,
+                                              DMA_MEMORY_EXCLUSIVE, &mem);
+               if (ret) {
+                       pr_err("Reserved memory: failed to init DMA memory pool at %pa, size %ld MiB\n",
+                               &rmem->base, (unsigned long)rmem->size / SZ_1M);
+                       return ret;
+               }
         }
         mem->use_dev_dma_pfn_offset = true;
         rmem->priv = mem;
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c

index ea1732e..770b153 100644 (file)
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -1860,10 +1860,13 @@ void device_pm_check_callbacks(struct device *dev)
  {
         spin_lock_irq(&dev->power.lock);
         dev->power.no_pm_callbacks =
-               (!dev->bus || pm_ops_is_empty(dev->bus->pm)) &&
-               (!dev->class || pm_ops_is_empty(dev->class->pm)) &&
+               (!dev->bus || (pm_ops_is_empty(dev->bus->pm) &&
+                !dev->bus->suspend && !dev->bus->resume)) &&
+               (!dev->class || (pm_ops_is_empty(dev->class->pm) &&
+                !dev->class->suspend && !dev->class->resume)) &&
                 (!dev->type || pm_ops_is_empty(dev->type->pm)) &&
                 (!dev->pm_domain || pm_ops_is_empty(&dev->pm_domain->ops)) &&
-               (!dev->driver || pm_ops_is_empty(dev->driver->pm));
+               (!dev->driver || (pm_ops_is_empty(dev->driver->pm) &&
+                !dev->driver->suspend && !dev->driver->resume));
         spin_unlock_irq(&dev->power.lock);
  }
diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c

index a8cc14f..a6de325 100644 (file)
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -1581,6 +1581,9 @@ static int _opp_set_availability(struct device *dev, unsigned long freq,
  
         opp->available = availability_req;
  
+       dev_pm_opp_get(opp);
+       mutex_unlock(&opp_table->lock);
+
         /* Notify the change of the OPP availability */
         if (availability_req)
                 blocking_notifier_call_chain(&opp_table->head, OPP_EVENT_ENABLE,
@@ -1589,8 +1592,12 @@ static int _opp_set_availability(struct device *dev, unsigned long freq,
                 blocking_notifier_call_chain(&opp_table->head,
                                              OPP_EVENT_DISABLE, opp);
  
+       dev_pm_opp_put(opp);
+       goto put_table;
+
  unlock:
         mutex_unlock(&opp_table->lock);
+put_table:
         dev_pm_opp_put_opp_table(opp_table);
         return r;
  }
diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c

index f850dae..277d43a 100644 (file)
--- a/drivers/base/power/qos.c
+++ b/drivers/base/power/qos.c
@@ -277,11 +277,11 @@ void dev_pm_qos_constraints_destroy(struct device *dev)
         mutex_unlock(&dev_pm_qos_sysfs_mtx);
  }
  
-static bool dev_pm_qos_invalid_request(struct device *dev,
-                                      struct dev_pm_qos_request *req)
+static bool dev_pm_qos_invalid_req_type(struct device *dev,
+                                       enum dev_pm_qos_req_type type)
  {
-       return !req || (req->type == DEV_PM_QOS_LATENCY_TOLERANCE
-                       && !dev->power.set_latency_tolerance);
+       return type == DEV_PM_QOS_LATENCY_TOLERANCE &&
+              !dev->power.set_latency_tolerance;
  }
  
  static int __dev_pm_qos_add_request(struct device *dev,
@@ -290,7 +290,7 @@ static int __dev_pm_qos_add_request(struct device *dev,
  {
         int ret = 0;
  
-       if (!dev || dev_pm_qos_invalid_request(dev, req))
+       if (!dev || !req || dev_pm_qos_invalid_req_type(dev, type))
                 return -EINVAL;
  
         if (WARN(dev_pm_qos_request_active(req),
diff --git a/drivers/block/brd.c b/drivers/block/brd.c

index bbd0d18..2d7178f 100644 (file)
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -342,7 +342,7 @@ static long __brd_direct_access(struct brd_device *brd, pgoff_t pgoff,
  
         if (!brd)
                 return -ENODEV;
-       page = brd_insert_page(brd, PFN_PHYS(pgoff) / 512);
+       page = brd_insert_page(brd, (sector_t)pgoff << PAGE_SECTORS_SHIFT);
         if (!page)
                 return -ENOSPC;
         *kaddr = page_address(page);
diff --git a/drivers/block/loop.h b/drivers/block/loop.h

index f68c1d5..1f39567 100644 (file)
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -67,10 +67,8 @@ struct loop_device {
  struct loop_cmd {
         struct kthread_work work;
         struct request *rq;
-       union {
-               bool use_aio; /* use AIO interface to handle I/O */
-               atomic_t ref; /* only for aio */
-       };
+       bool use_aio; /* use AIO interface to handle I/O */
+       atomic_t ref; /* only for aio */
         long ret;
         struct kiocb iocb;
         struct bio_vec *bvec;
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c

index 2aa87cb..3684e21 100644 (file)
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -1194,6 +1194,12 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
         if (!capable(CAP_SYS_ADMIN))
                 return -EPERM;
  
+       /* The block layer will pass back some non-nbd ioctls in case we have
+        * special handling for them, but we don't so just return an error.
+        */
+       if (_IOC_TYPE(cmd) != 0xab)
+               return -EINVAL;
+
         mutex_lock(&nbd->config_lock);
  
         /* Don't allow ioctl operations on a nbd device that was created with
diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c

index fe597e6..1d6729b 100644 (file)
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -455,7 +455,7 @@ ssize_t tpm_transmit(struct tpm_chip *chip, struct tpm_space *space,
                         goto out;
                 }
  
-               msleep(TPM_TIMEOUT);    /* CHECK */
+               tpm_msleep(TPM_TIMEOUT);
                 rmb();
         } while (time_before(jiffies, stop));
  
@@ -970,7 +970,7 @@ int tpm_do_selftest(struct tpm_chip *chip)
                         dev_info(
                             &chip->dev, HW_ERR
                             "TPM command timed out during continue self test");
-                       msleep(delay_msec);
+                       tpm_msleep(delay_msec);
                         continue;
                 }
  
@@ -985,7 +985,7 @@ int tpm_do_selftest(struct tpm_chip *chip)
                 }
                 if (rc != TPM_WARN_DOING_SELFTEST)
                         return rc;
-               msleep(delay_msec);
+               tpm_msleep(delay_msec);
         } while (--loops > 0);
  
         return rc;
@@ -1085,7 +1085,7 @@ again:
                 }
         } else {
                 do {
-                       msleep(TPM_TIMEOUT);
+                       tpm_msleep(TPM_TIMEOUT);
                         status = chip->ops->status(chip);
                         if ((status & mask) == mask)
                                 return 0;
@@ -1150,7 +1150,7 @@ int tpm_pm_suspend(struct device *dev)
                  */
                 if (rc != TPM_WARN_RETRY)
                         break;
-               msleep(TPM_TIMEOUT_RETRY);
+               tpm_msleep(TPM_TIMEOUT_RETRY);
         }
  
         if (rc)
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h

index 04fbff2..2d5466a 100644 (file)
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -50,7 +50,8 @@ enum tpm_const {
  
  enum tpm_timeout {
         TPM_TIMEOUT = 5,        /* msecs */
-       TPM_TIMEOUT_RETRY = 100 /* msecs */
+       TPM_TIMEOUT_RETRY = 100, /* msecs */
+       TPM_TIMEOUT_RANGE_US = 300      /* usecs */
  };
  
  /* TPM addresses */
@@ -527,6 +528,12 @@ int tpm_pm_resume(struct device *dev);
  int wait_for_tpm_stat(struct tpm_chip *chip, u8 mask, unsigned long timeout,
                       wait_queue_head_t *queue, bool check_cancel);
  
+static inline void tpm_msleep(unsigned int delay_msec)
+{
+       usleep_range(delay_msec * 1000,
+                    (delay_msec * 1000) + TPM_TIMEOUT_RANGE_US);
+};
+
  struct tpm_chip *tpm_chip_find_get(int chip_num);
  __must_check int tpm_try_get_ops(struct tpm_chip *chip);
  void tpm_put_ops(struct tpm_chip *chip);
diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c

index f7f34b2..e1a41b7 100644 (file)
--- a/drivers/char/tpm/tpm2-cmd.c
+++ b/drivers/char/tpm/tpm2-cmd.c
@@ -899,7 +899,7 @@ static int tpm2_do_selftest(struct tpm_chip *chip)
                 if (rc != TPM2_RC_TESTING)
                         break;
  
-               msleep(delay_msec);
+               tpm_msleep(delay_msec);
         }
  
         return rc;
diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c

index a4ac63a..8f0a98d 100644 (file)
--- a/drivers/char/tpm/tpm_crb.c
+++ b/drivers/char/tpm/tpm_crb.c
@@ -665,7 +665,7 @@ static const struct dev_pm_ops crb_pm = {
         SET_RUNTIME_PM_OPS(crb_pm_runtime_suspend, crb_pm_runtime_resume, NULL)
  };
  
-static struct acpi_device_id crb_device_ids[] = {
+static const struct acpi_device_id crb_device_ids[] = {
         {"MSFT0101", 0},
         {"", 0},
  };
diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c

index f01d083..25f6e26 100644 (file)
--- a/drivers/char/tpm/tpm_ibmvtpm.c
+++ b/drivers/char/tpm/tpm_ibmvtpm.c
@@ -32,26 +32,70 @@
  
  static const char tpm_ibmvtpm_driver_name[] = "tpm_ibmvtpm";
  
-static struct vio_device_id tpm_ibmvtpm_device_table[] = {
+static const struct vio_device_id tpm_ibmvtpm_device_table[] = {
         { "IBM,vtpm", "IBM,vtpm"},
         { "", "" }
  };
  MODULE_DEVICE_TABLE(vio, tpm_ibmvtpm_device_table);
  
  /**
+ *
+ * ibmvtpm_send_crq_word - Send a CRQ request
+ * @vdev:      vio device struct
+ * @w1:                pre-constructed first word of tpm crq (second word is reserved)
+ *
+ * Return:
+ *     0 - Success
+ *     Non-zero - Failure
+ */
+static int ibmvtpm_send_crq_word(struct vio_dev *vdev, u64 w1)
+{
+       return plpar_hcall_norets(H_SEND_CRQ, vdev->unit_address, w1, 0);
+}
+
+/**
+ *
   * ibmvtpm_send_crq - Send a CRQ request
   *
   * @vdev:      vio device struct
- * @w1:                first word
- * @w2:                second word
+ * @valid:     Valid field
+ * @msg:       Type field
+ * @len:       Length field
+ * @data:      Data field
+ *
+ * The ibmvtpm crq is defined as follows:
+ *
+ * Byte  |   0   |   1   |   2   |   3   |   4   |   5   |   6   |   7
+ * -----------------------------------------------------------------------
+ * Word0 | Valid | Type  |     Length    |              Data
+ * -----------------------------------------------------------------------
+ * Word1 |                Reserved
+ * -----------------------------------------------------------------------
+ *
+ * Which matches the following structure (on bigendian host):
+ *
+ * struct ibmvtpm_crq {
+ *         u8 valid;
+ *         u8 msg;
+ *         __be16 len;
+ *         __be32 data;
+ *         __be64 reserved;
+ * } __attribute__((packed, aligned(8)));
+ *
+ * However, the value is passed in a register so just compute the numeric value
+ * to load into the register avoiding byteswap altogether. Endian only affects
+ * memory loads and stores - registers are internally represented the same.
   *
   * Return:
- *     0 -Sucess
+ *     0 (H_SUCCESS) - Success
   *     Non-zero - Failure
   */
-static int ibmvtpm_send_crq(struct vio_dev *vdev, u64 w1, u64 w2)
+static int ibmvtpm_send_crq(struct vio_dev *vdev,
+               u8 valid, u8 msg, u16 len, u32 data)
  {
-       return plpar_hcall_norets(H_SEND_CRQ, vdev->unit_address, w1, w2);
+       u64 w1 = ((u64)valid << 56) | ((u64)msg << 48) | ((u64)len << 32) |
+               (u64)data;
+       return ibmvtpm_send_crq_word(vdev, w1);
  }
  
  /**
@@ -109,8 +153,6 @@ static int tpm_ibmvtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count)
  static int tpm_ibmvtpm_send(struct tpm_chip *chip, u8 *buf, size_t count)
  {
         struct ibmvtpm_dev *ibmvtpm = dev_get_drvdata(&chip->dev);
-       struct ibmvtpm_crq crq;
-       __be64 *word = (__be64 *)&crq;
         int rc, sig;
  
         if (!ibmvtpm->rtce_buf) {
@@ -137,10 +179,6 @@ static int tpm_ibmvtpm_send(struct tpm_chip *chip, u8 *buf, size_t count)
         spin_lock(&ibmvtpm->rtce_lock);
         ibmvtpm->res_len = 0;
         memcpy((void *)ibmvtpm->rtce_buf, (void *)buf, count);
-       crq.valid = (u8)IBMVTPM_VALID_CMD;
-       crq.msg = (u8)VTPM_TPM_COMMAND;
-       crq.len = cpu_to_be16(count);
-       crq.data = cpu_to_be32(ibmvtpm->rtce_dma_handle);
  
         /*
          * set the processing flag before the Hcall, since we may get the
@@ -148,8 +186,9 @@ static int tpm_ibmvtpm_send(struct tpm_chip *chip, u8 *buf, size_t count)
          */
         ibmvtpm->tpm_processing_cmd = true;
  
-       rc = ibmvtpm_send_crq(ibmvtpm->vdev, be64_to_cpu(word[0]),
-                             be64_to_cpu(word[1]));
+       rc = ibmvtpm_send_crq(ibmvtpm->vdev,
+                       IBMVTPM_VALID_CMD, VTPM_TPM_COMMAND,
+                       count, ibmvtpm->rtce_dma_handle);
         if (rc != H_SUCCESS) {
                 dev_err(ibmvtpm->dev, "tpm_ibmvtpm_send failed rc=%d\n", rc);
                 rc = 0;
@@ -182,15 +221,10 @@ static u8 tpm_ibmvtpm_status(struct tpm_chip *chip)
   */
  static int ibmvtpm_crq_get_rtce_size(struct ibmvtpm_dev *ibmvtpm)
  {
-       struct ibmvtpm_crq crq;
-       u64 *buf = (u64 *) &crq;
         int rc;
  
-       crq.valid = (u8)IBMVTPM_VALID_CMD;
-       crq.msg = (u8)VTPM_GET_RTCE_BUFFER_SIZE;
-
-       rc = ibmvtpm_send_crq(ibmvtpm->vdev, cpu_to_be64(buf[0]),
-                             cpu_to_be64(buf[1]));
+       rc = ibmvtpm_send_crq(ibmvtpm->vdev,
+                       IBMVTPM_VALID_CMD, VTPM_GET_RTCE_BUFFER_SIZE, 0, 0);
         if (rc != H_SUCCESS)
                 dev_err(ibmvtpm->dev,
                         "ibmvtpm_crq_get_rtce_size failed rc=%d\n", rc);
@@ -210,15 +244,10 @@ static int ibmvtpm_crq_get_rtce_size(struct ibmvtpm_dev *ibmvtpm)
   */
  static int ibmvtpm_crq_get_version(struct ibmvtpm_dev *ibmvtpm)
  {
-       struct ibmvtpm_crq crq;
-       u64 *buf = (u64 *) &crq;
         int rc;
  
-       crq.valid = (u8)IBMVTPM_VALID_CMD;
-       crq.msg = (u8)VTPM_GET_VERSION;
-
-       rc = ibmvtpm_send_crq(ibmvtpm->vdev, cpu_to_be64(buf[0]),
-                             cpu_to_be64(buf[1]));
+       rc = ibmvtpm_send_crq(ibmvtpm->vdev,
+                       IBMVTPM_VALID_CMD, VTPM_GET_VERSION, 0, 0);
         if (rc != H_SUCCESS)
                 dev_err(ibmvtpm->dev,
                         "ibmvtpm_crq_get_version failed rc=%d\n", rc);
@@ -238,7 +267,7 @@ static int ibmvtpm_crq_send_init_complete(struct ibmvtpm_dev *ibmvtpm)
  {
         int rc;
  
-       rc = ibmvtpm_send_crq(ibmvtpm->vdev, INIT_CRQ_COMP_CMD, 0);
+       rc = ibmvtpm_send_crq_word(ibmvtpm->vdev, INIT_CRQ_COMP_CMD);
         if (rc != H_SUCCESS)
                 dev_err(ibmvtpm->dev,
                         "ibmvtpm_crq_send_init_complete failed rc=%d\n", rc);
@@ -258,7 +287,7 @@ static int ibmvtpm_crq_send_init(struct ibmvtpm_dev *ibmvtpm)
  {
         int rc;
  
-       rc = ibmvtpm_send_crq(ibmvtpm->vdev, INIT_CRQ_CMD, 0);
+       rc = ibmvtpm_send_crq_word(ibmvtpm->vdev, INIT_CRQ_CMD);
         if (rc != H_SUCCESS)
                 dev_err(ibmvtpm->dev,
                         "ibmvtpm_crq_send_init failed rc=%d\n", rc);
@@ -340,15 +369,10 @@ static int tpm_ibmvtpm_suspend(struct device *dev)
  {
         struct tpm_chip *chip = dev_get_drvdata(dev);
         struct ibmvtpm_dev *ibmvtpm = dev_get_drvdata(&chip->dev);
-       struct ibmvtpm_crq crq;
-       u64 *buf = (u64 *) &crq;
         int rc = 0;
  
-       crq.valid = (u8)IBMVTPM_VALID_CMD;
-       crq.msg = (u8)VTPM_PREPARE_TO_SUSPEND;
-
-       rc = ibmvtpm_send_crq(ibmvtpm->vdev, cpu_to_be64(buf[0]),
-                             cpu_to_be64(buf[1]));
+       rc = ibmvtpm_send_crq(ibmvtpm->vdev,
+                       IBMVTPM_VALID_CMD, VTPM_PREPARE_TO_SUSPEND, 0, 0);
         if (rc != H_SUCCESS)
                 dev_err(ibmvtpm->dev,
                         "tpm_ibmvtpm_suspend failed rc=%d\n", rc);
diff --git a/drivers/char/tpm/tpm_infineon.c b/drivers/char/tpm/tpm_infineon.c

index 3b1b9f9..d8f1004 100644 (file)
--- a/drivers/char/tpm/tpm_infineon.c
+++ b/drivers/char/tpm/tpm_infineon.c
@@ -191,7 +191,7 @@ static int wait(struct tpm_chip *chip, int wait_for_bit)
                 /* check the status-register if wait_for_bit is set */
                 if (status & 1 << wait_for_bit)
                         break;
-               msleep(TPM_MSLEEP_TIME);
+               tpm_msleep(TPM_MSLEEP_TIME);
         }
         if (i == TPM_MAX_TRIES) {       /* timeout occurs */
                 if (wait_for_bit == STAT_XFE)
@@ -226,7 +226,7 @@ static void tpm_wtx(struct tpm_chip *chip)
         wait_and_send(chip, TPM_CTRL_WTX);
         wait_and_send(chip, 0x00);
         wait_and_send(chip, 0x00);
-       msleep(TPM_WTX_MSLEEP_TIME);
+       tpm_msleep(TPM_WTX_MSLEEP_TIME);
  }
  
  static void tpm_wtx_abort(struct tpm_chip *chip)
@@ -237,7 +237,7 @@ static void tpm_wtx_abort(struct tpm_chip *chip)
         wait_and_send(chip, 0x00);
         wait_and_send(chip, 0x00);
         number_of_wtx = 0;
-       msleep(TPM_WTX_MSLEEP_TIME);
+       tpm_msleep(TPM_WTX_MSLEEP_TIME);
  }
  
  static int tpm_inf_recv(struct tpm_chip *chip, u8 * buf, size_t count)
diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c

index b617b2e..63bc6c3 100644 (file)
--- a/drivers/char/tpm/tpm_tis_core.c
+++ b/drivers/char/tpm/tpm_tis_core.c
@@ -51,7 +51,7 @@ static int wait_startup(struct tpm_chip *chip, int l)
  
                 if (access & TPM_ACCESS_VALID)
                         return 0;
-               msleep(TPM_TIMEOUT);
+               tpm_msleep(TPM_TIMEOUT);
         } while (time_before(jiffies, stop));
         return -1;
  }
@@ -117,7 +117,7 @@ again:
                 do {
                         if (check_locality(chip, l))
                                 return l;
-                       msleep(TPM_TIMEOUT);
+                       tpm_msleep(TPM_TIMEOUT);
                 } while (time_before(jiffies, stop));
         }
         return -1;
@@ -164,7 +164,7 @@ static int get_burstcount(struct tpm_chip *chip)
                 burstcnt = (value >> 8) & 0xFFFF;
                 if (burstcnt)
                         return burstcnt;
-               msleep(TPM_TIMEOUT);
+               tpm_msleep(TPM_TIMEOUT);
         } while (time_before(jiffies, stop));
         return -EBUSY;
  }
@@ -396,7 +396,7 @@ static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len)
         priv->irq = irq;
         chip->flags |= TPM_CHIP_FLAG_IRQ;
         if (!priv->irq_tested)
-               msleep(1);
+               tpm_msleep(1);
         if (!priv->irq_tested)
                 disable_interrupts(chip);
         priv->irq_tested = true;
diff --git a/drivers/clocksource/timer-integrator-ap.c b/drivers/clocksource/timer-integrator-ap.c

index 2ff64d9..62d2469 100644 (file)
--- a/drivers/clocksource/timer-integrator-ap.c
+++ b/drivers/clocksource/timer-integrator-ap.c
@@ -36,8 +36,8 @@ static u64 notrace integrator_read_sched_clock(void)
         return -readl(sched_clk_base + TIMER_VALUE);
  }
  
-static int integrator_clocksource_init(unsigned long inrate,
-                                      void __iomem *base)
+static int __init integrator_clocksource_init(unsigned long inrate,
+                                             void __iomem *base)
  {
         u32 ctrl = TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC;
         unsigned long rate = inrate;
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c

index a020da7..a753c50 100644 (file)
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -106,6 +106,22 @@ static const struct of_device_id whitelist[] __initconst = {
   * platforms using "operating-points-v2" property.
   */
  static const struct of_device_id blacklist[] __initconst = {
+       { .compatible = "calxeda,highbank", },
+       { .compatible = "calxeda,ecx-2000", },
+
+       { .compatible = "marvell,armadaxp", },
+
+       { .compatible = "nvidia,tegra124", },
+
+       { .compatible = "st,stih407", },
+       { .compatible = "st,stih410", },
+
+       { .compatible = "sigma,tango4", },
+
+       { .compatible = "ti,am33xx", },
+       { .compatible = "ti,am43", },
+       { .compatible = "ti,dra7", },
+
         { }
  };
  
diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c

index b29cd33..4bf47de 100644 (file)
--- a/drivers/cpufreq/ti-cpufreq.c
+++ b/drivers/cpufreq/ti-cpufreq.c
@@ -190,7 +190,7 @@ static int ti_cpufreq_setup_syscon_register(struct ti_cpufreq_data *opp_data)
  
  static const struct of_device_id ti_cpufreq_of_match[] = {
         { .compatible = "ti,am33xx", .data = &am3x_soc_data, },
-       { .compatible = "ti,am4372", .data = &am4x_soc_data, },
+       { .compatible = "ti,am43", .data = &am4x_soc_data, },
         { .compatible = "ti,dra7", .data = &dra7_soc_data },
         {},
  };
diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c

index 7080c38..52a7505 100644 (file)
--- a/drivers/cpuidle/cpuidle-arm.c
+++ b/drivers/cpuidle/cpuidle-arm.c
@@ -104,13 +104,13 @@ static int __init arm_idle_init(void)
                 ret = dt_init_idle_driver(drv, arm_idle_state_match, 1);
                 if (ret <= 0) {
                         ret = ret ? : -ENODEV;
-                       goto out_fail;
+                       goto init_fail;
                 }
  
                 ret = cpuidle_register_driver(drv);
                 if (ret) {
                         pr_err("Failed to register cpuidle driver\n");
-                       goto out_fail;
+                       goto init_fail;
                 }
  
                 /*
@@ -149,6 +149,8 @@ static int __init arm_idle_init(void)
         }
  
         return 0;
+init_fail:
+       kfree(drv);
  out_fail:
         while (--cpu >= 0) {
                 dev = per_cpu(cpuidle_devices, cpu);
diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig

index e36aeac..1eb8527 100644 (file)
--- a/drivers/crypto/caam/Kconfig
+++ b/drivers/crypto/caam/Kconfig
@@ -1,6 +1,7 @@
  config CRYPTO_DEV_FSL_CAAM
         tristate "Freescale CAAM-Multicore driver backend"
         depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE
+       select SOC_BUS
         help
           Enables the driver module for Freescale's Cryptographic Accelerator
           and Assurance Module (CAAM), also known as the SEC version 4 (SEC4).
@@ -141,10 +142,6 @@ config CRYPTO_DEV_FSL_CAAM_RNG_API
           To compile this as a module, choose M here: the module
           will be called caamrng.
  
-config CRYPTO_DEV_FSL_CAAM_IMX
-       def_bool SOC_IMX6 || SOC_IMX7D
-       depends on CRYPTO_DEV_FSL_CAAM
-
  config CRYPTO_DEV_FSL_CAAM_DEBUG
         bool "Enable debug output in CAAM driver"
         depends on CRYPTO_DEV_FSL_CAAM
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c

index dacb53f..027e121 100644 (file)
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -7,6 +7,7 @@
  #include <linux/device.h>
  #include <linux/of_address.h>
  #include <linux/of_irq.h>
+#include <linux/sys_soc.h>
  
  #include "compat.h"
  #include "regs.h"
@@ -19,6 +20,8 @@ bool caam_little_end;
  EXPORT_SYMBOL(caam_little_end);
  bool caam_dpaa2;
  EXPORT_SYMBOL(caam_dpaa2);
+bool caam_imx;
+EXPORT_SYMBOL(caam_imx);
  
  #ifdef CONFIG_CAAM_QI
  #include "qi.h"
@@ -28,19 +31,11 @@ EXPORT_SYMBOL(caam_dpaa2);
   * i.MX targets tend to have clock control subsystems that can
   * enable/disable clocking to our device.
   */
-#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX
  static inline struct clk *caam_drv_identify_clk(struct device *dev,
                                                 char *clk_name)
  {
-       return devm_clk_get(dev, clk_name);
+       return caam_imx ? devm_clk_get(dev, clk_name) : NULL;
  }
-#else
-static inline struct clk *caam_drv_identify_clk(struct device *dev,
-                                               char *clk_name)
-{
-       return NULL;
-}
-#endif
  
  /*
   * Descriptor to instantiate RNG State Handle 0 in normal mode and
@@ -430,6 +425,10 @@ static int caam_probe(struct platform_device *pdev)
  {
         int ret, ring, gen_sk, ent_delay = RTSDCTL_ENT_DLY_MIN;
         u64 caam_id;
+       static const struct soc_device_attribute imx_soc[] = {
+               {.family = "Freescale i.MX"},
+               {},
+       };
         struct device *dev;
         struct device_node *nprop, *np;
         struct caam_ctrl __iomem *ctrl;
@@ -451,6 +450,8 @@ static int caam_probe(struct platform_device *pdev)
         dev_set_drvdata(dev, ctrlpriv);
         nprop = pdev->dev.of_node;
  
+       caam_imx = (bool)soc_device_match(imx_soc);
+
         /* Enable clocking */
         clk = caam_drv_identify_clk(&pdev->dev, "ipg");
         if (IS_ERR(clk)) {
diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h

index 2b5efff..17cfd23 100644 (file)
--- a/drivers/crypto/caam/regs.h
+++ b/drivers/crypto/caam/regs.h
@@ -67,6 +67,7 @@
   */
  
  extern bool caam_little_end;
+extern bool caam_imx;
  
  #define caam_to_cpu(len)                               \
  static inline u##len caam##len ## _to_cpu(u##len val)  \
@@ -154,13 +155,10 @@ static inline u64 rd_reg64(void __iomem *reg)
  #else /* CONFIG_64BIT */
  static inline void wr_reg64(void __iomem *reg, u64 data)
  {
-#ifndef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX
-       if (caam_little_end) {
+       if (!caam_imx && caam_little_end) {
                 wr_reg32((u32 __iomem *)(reg) + 1, data >> 32);
                 wr_reg32((u32 __iomem *)(reg), data);
-       } else
-#endif
-       {
+       } else {
                 wr_reg32((u32 __iomem *)(reg), data >> 32);
                 wr_reg32((u32 __iomem *)(reg) + 1, data);
         }
@@ -168,41 +166,40 @@ static inline void wr_reg64(void __iomem *reg, u64 data)
  
  static inline u64 rd_reg64(void __iomem *reg)
  {
-#ifndef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX
-       if (caam_little_end)
+       if (!caam_imx && caam_little_end)
                 return ((u64)rd_reg32((u32 __iomem *)(reg) + 1) << 32 |
                         (u64)rd_reg32((u32 __iomem *)(reg)));
-       else
-#endif
-               return ((u64)rd_reg32((u32 __iomem *)(reg)) << 32 |
-                       (u64)rd_reg32((u32 __iomem *)(reg) + 1));
+
+       return ((u64)rd_reg32((u32 __iomem *)(reg)) << 32 |
+               (u64)rd_reg32((u32 __iomem *)(reg) + 1));
  }
  #endif /* CONFIG_64BIT  */
  
+static inline u64 cpu_to_caam_dma64(dma_addr_t value)
+{
+       if (caam_imx)
+               return (((u64)cpu_to_caam32(lower_32_bits(value)) << 32) |
+                        (u64)cpu_to_caam32(upper_32_bits(value)));
+
+       return cpu_to_caam64(value);
+}
+
+static inline u64 caam_dma64_to_cpu(u64 value)
+{
+       if (caam_imx)
+               return (((u64)caam32_to_cpu(lower_32_bits(value)) << 32) |
+                        (u64)caam32_to_cpu(upper_32_bits(value)));
+
+       return caam64_to_cpu(value);
+}
+
  #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-#ifdef CONFIG_SOC_IMX7D
-#define cpu_to_caam_dma(value) \
-               (((u64)cpu_to_caam32(lower_32_bits(value)) << 32) | \
-                 (u64)cpu_to_caam32(upper_32_bits(value)))
-#define caam_dma_to_cpu(value) \
-               (((u64)caam32_to_cpu(lower_32_bits(value)) << 32) | \
-                 (u64)caam32_to_cpu(upper_32_bits(value)))
-#else
-#define cpu_to_caam_dma(value) cpu_to_caam64(value)
-#define caam_dma_to_cpu(value) caam64_to_cpu(value)
-#endif /* CONFIG_SOC_IMX7D */
+#define cpu_to_caam_dma(value) cpu_to_caam_dma64(value)
+#define caam_dma_to_cpu(value) caam_dma64_to_cpu(value)
  #else
  #define cpu_to_caam_dma(value) cpu_to_caam32(value)
  #define caam_dma_to_cpu(value) caam32_to_cpu(value)
-#endif /* CONFIG_ARCH_DMA_ADDR_T_64BIT  */
-
-#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX
-#define cpu_to_caam_dma64(value) \
-               (((u64)cpu_to_caam32(lower_32_bits(value)) << 32) | \
-                (u64)cpu_to_caam32(upper_32_bits(value)))
-#else
-#define cpu_to_caam_dma64(value) cpu_to_caam64(value)
-#endif
+#endif /* CONFIG_ARCH_DMA_ADDR_T_64BIT */
  
  /*
   * jr_outentry
diff --git a/drivers/crypto/inside-secure/safexcel_cipher.c b/drivers/crypto/inside-secure/safexcel_cipher.c

index d2207ac..5438552 100644 (file)
--- a/drivers/crypto/inside-secure/safexcel_cipher.c
+++ b/drivers/crypto/inside-secure/safexcel_cipher.c
@@ -386,7 +386,7 @@ static int safexcel_cipher_exit_inv(struct crypto_tfm *tfm)
         struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
         struct safexcel_crypto_priv *priv = ctx->priv;
         struct skcipher_request req;
-       struct safexcel_inv_result result = { 0 };
+       struct safexcel_inv_result result = {};
         int ring = ctx->base.ring;
  
         memset(&req, 0, sizeof(struct skcipher_request));
diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c

index 3f81939..3980f94 100644 (file)
--- a/drivers/crypto/inside-secure/safexcel_hash.c
+++ b/drivers/crypto/inside-secure/safexcel_hash.c
@@ -419,7 +419,7 @@ static int safexcel_ahash_exit_inv(struct crypto_tfm *tfm)
         struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm);
         struct safexcel_crypto_priv *priv = ctx->priv;
         struct ahash_request req;
-       struct safexcel_inv_result result = { 0 };
+       struct safexcel_inv_result result = {};
         int ring = ctx->base.ring;
  
         memset(&req, 0, sizeof(struct ahash_request));
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c

index 79791c6..dff8883 100644 (file)
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -1756,9 +1756,9 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
                 req_ctx->swinit = 0;
         } else {
                 desc->ptr[1] = zero_entry;
-               /* Indicate next op is not the first. */
-               req_ctx->first = 0;
         }
+       /* Indicate next op is not the first. */
+       req_ctx->first = 0;
  
         /* HMAC key */
         if (ctx->keylen)
@@ -1769,7 +1769,7 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
  
         sg_count = edesc->src_nents ?: 1;
         if (is_sec1 && sg_count > 1)
-               sg_copy_to_buffer(areq->src, sg_count, edesc->buf, length);
+               sg_copy_to_buffer(req_ctx->psrc, sg_count, edesc->buf, length);
         else
                 sg_count = dma_map_sg(dev, req_ctx->psrc, sg_count,
                                       DMA_TO_DEVICE);
@@ -3057,7 +3057,8 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev,
                 t_alg->algt.alg.hash.final = ahash_final;
                 t_alg->algt.alg.hash.finup = ahash_finup;
                 t_alg->algt.alg.hash.digest = ahash_digest;
-               t_alg->algt.alg.hash.setkey = ahash_setkey;
+               if (!strncmp(alg->cra_name, "hmac", 4))
+                       t_alg->algt.alg.hash.setkey = ahash_setkey;
                 t_alg->algt.alg.hash.import = ahash_import;
                 t_alg->algt.alg.hash.export = ahash_export;
  
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c

index 681b639..0649dd4 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -183,7 +183,7 @@ static void uninitialize(struct kernel_queue *kq)
  {
         if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ)
                 kq->mqd->destroy_mqd(kq->mqd,
-                                       NULL,
+                                       kq->queue->mqd,
                                         false,
                                         QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS,
                                         kq->queue->pipe,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c

index 1cae95e..03bec76 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -143,7 +143,6 @@ int pqm_create_queue(struct process_queue_manager *pqm,
         int num_queues = 0;
         struct queue *cur;
  
-       memset(&q_properties, 0, sizeof(struct queue_properties));
         memcpy(&q_properties, properties, sizeof(struct queue_properties));
         q = NULL;
         kq = NULL;
diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c

index 730b8d9..6be5b53 100644 (file)
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -14,6 +14,7 @@
  #include <linux/clk.h>
  #include <linux/component.h>
  #include <linux/iopoll.h>
+#include <linux/irq.h>
  #include <linux/mfd/syscon.h>
  #include <linux/of_device.h>
  #include <linux/of_gpio.h>
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c

index b1f7299..e651a58 100644 (file)
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -168,23 +168,19 @@ static struct drm_driver exynos_drm_driver = {
  static int exynos_drm_suspend(struct device *dev)
  {
         struct drm_device *drm_dev = dev_get_drvdata(dev);
-       struct drm_connector *connector;
-       struct drm_connector_list_iter conn_iter;
+       struct exynos_drm_private *private = drm_dev->dev_private;
  
         if (pm_runtime_suspended(dev) || !drm_dev)
                 return 0;
  
-       drm_connector_list_iter_begin(drm_dev, &conn_iter);
-       drm_for_each_connector_iter(connector, &conn_iter) {
-               int old_dpms = connector->dpms;
-
-               if (connector->funcs->dpms)
-                       connector->funcs->dpms(connector, DRM_MODE_DPMS_OFF);
-
-               /* Set the old mode back to the connector for resume */
-               connector->dpms = old_dpms;
+       drm_kms_helper_poll_disable(drm_dev);
+       exynos_drm_fbdev_suspend(drm_dev);
+       private->suspend_state = drm_atomic_helper_suspend(drm_dev);
+       if (IS_ERR(private->suspend_state)) {
+               exynos_drm_fbdev_resume(drm_dev);
+               drm_kms_helper_poll_enable(drm_dev);
+               return PTR_ERR(private->suspend_state);
         }
-       drm_connector_list_iter_end(&conn_iter);
  
         return 0;
  }
@@ -192,22 +188,14 @@ static int exynos_drm_suspend(struct device *dev)
  static int exynos_drm_resume(struct device *dev)
  {
         struct drm_device *drm_dev = dev_get_drvdata(dev);
-       struct drm_connector *connector;
-       struct drm_connector_list_iter conn_iter;
+       struct exynos_drm_private *private = drm_dev->dev_private;
  
         if (pm_runtime_suspended(dev) || !drm_dev)
                 return 0;
  
-       drm_connector_list_iter_begin(drm_dev, &conn_iter);
-       drm_for_each_connector_iter(connector, &conn_iter) {
-               if (connector->funcs->dpms) {
-                       int dpms = connector->dpms;
-
-                       connector->dpms = DRM_MODE_DPMS_OFF;
-                       connector->funcs->dpms(connector, dpms);
-               }
-       }
-       drm_connector_list_iter_end(&conn_iter);
+       drm_atomic_helper_resume(drm_dev, private->suspend_state);
+       exynos_drm_fbdev_resume(drm_dev);
+       drm_kms_helper_poll_enable(drm_dev);
  
         return 0;
  }
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h

index cf131c2..f8bae4c 100644 (file)
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h
@@ -202,6 +202,7 @@ struct drm_exynos_file_private {
   */
  struct exynos_drm_private {
         struct drm_fb_helper *fb_helper;
+       struct drm_atomic_state *suspend_state;
  
         struct device *dma_dev;
         void *mapping;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c

index c3a0684..dfb66ec 100644 (file)
--- a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
@@ -18,6 +18,8 @@
  #include <drm/drm_crtc_helper.h>
  #include <drm/exynos_drm.h>
  
+#include <linux/console.h>
+
  #include "exynos_drm_drv.h"
  #include "exynos_drm_fb.h"
  #include "exynos_drm_fbdev.h"
@@ -285,3 +287,21 @@ void exynos_drm_output_poll_changed(struct drm_device *dev)
  
         drm_fb_helper_hotplug_event(fb_helper);
  }
+
+void exynos_drm_fbdev_suspend(struct drm_device *dev)
+{
+       struct exynos_drm_private *private = dev->dev_private;
+
+       console_lock();
+       drm_fb_helper_set_suspend(private->fb_helper, 1);
+       console_unlock();
+}
+
+void exynos_drm_fbdev_resume(struct drm_device *dev)
+{
+       struct exynos_drm_private *private = dev->dev_private;
+
+       console_lock();
+       drm_fb_helper_set_suspend(private->fb_helper, 0);
+       console_unlock();
+}
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.h b/drivers/gpu/drm/exynos/exynos_drm_fbdev.h

index 330eef8..645d1bb 100644 (file)
--- a/drivers/gpu/drm/exynos/exynos_drm_fbdev.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.h
@@ -21,6 +21,8 @@ int exynos_drm_fbdev_init(struct drm_device *dev);
  void exynos_drm_fbdev_fini(struct drm_device *dev);
  void exynos_drm_fbdev_restore_mode(struct drm_device *dev);
  void exynos_drm_output_poll_changed(struct drm_device *dev);
+void exynos_drm_fbdev_suspend(struct drm_device *drm);
+void exynos_drm_fbdev_resume(struct drm_device *drm);
  
  #else
  
@@ -39,6 +41,14 @@ static inline void exynos_drm_fbdev_restore_mode(struct drm_device *dev)
  
  #define exynos_drm_output_poll_changed (NULL)
  
+static inline void exynos_drm_fbdev_suspend(struct drm_device *drm)
+{
+}
+
+static inline void exynos_drm_fbdev_resume(struct drm_device *drm)
+{
+}
+
  #endif
  
  #endif
diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c

index 214fa5e..0109ff4 100644 (file)
--- a/drivers/gpu/drm/exynos/exynos_hdmi.c
+++ b/drivers/gpu/drm/exynos/exynos_hdmi.c
@@ -944,22 +944,27 @@ static bool hdmi_mode_fixup(struct drm_encoder *encoder,
         struct drm_device *dev = encoder->dev;
         struct drm_connector *connector;
         struct drm_display_mode *m;
+       struct drm_connector_list_iter conn_iter;
         int mode_ok;
  
         drm_mode_set_crtcinfo(adjusted_mode, 0);
  
-       list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+       drm_connector_list_iter_begin(dev, &conn_iter);
+       drm_for_each_connector_iter(connector, &conn_iter) {
                 if (connector->encoder == encoder)
                         break;
         }
+       if (connector)
+               drm_connector_get(connector);
+       drm_connector_list_iter_end(&conn_iter);
  
-       if (connector->encoder != encoder)
+       if (!connector)
                 return true;
  
         mode_ok = hdmi_mode_valid(connector, adjusted_mode);
  
         if (mode_ok == MODE_OK)
-               return true;
+               goto cleanup;
  
         /*
          * Find the most suitable mode and copy it to adjusted_mode.
@@ -979,6 +984,9 @@ static bool hdmi_mode_fixup(struct drm_encoder *encoder,
                 }
         }
  
+cleanup:
+       drm_connector_put(connector);
+
         return true;
  }
  
diff --git a/drivers/gpu/drm/i915/gvt/cfg_space.c b/drivers/gpu/drm/i915/gvt/cfg_space.c

index 40af17e..ff3154f 100644 (file)
--- a/drivers/gpu/drm/i915/gvt/cfg_space.c
+++ b/drivers/gpu/drm/i915/gvt/cfg_space.c
@@ -197,78 +197,65 @@ static int emulate_pci_command_write(struct intel_vgpu *vgpu,
  static int emulate_pci_bar_write(struct intel_vgpu *vgpu, unsigned int offset,
         void *p_data, unsigned int bytes)
  {
-       unsigned int bar_index =
-               (rounddown(offset, 8) % PCI_BASE_ADDRESS_0) / 8;
         u32 new = *(u32 *)(p_data);
         bool lo = IS_ALIGNED(offset, 8);
         u64 size;
         int ret = 0;
         bool mmio_enabled =
                 vgpu_cfg_space(vgpu)[PCI_COMMAND] & PCI_COMMAND_MEMORY;
+       struct intel_vgpu_pci_bar *bars = vgpu->cfg_space.bar;
  
-       if (WARN_ON(bar_index >= INTEL_GVT_PCI_BAR_MAX))
-               return -EINVAL;
-
+       /*
+        * Power-up software can determine how much address
+        * space the device requires by writing a value of
+        * all 1's to the register and then reading the value
+        * back. The device will return 0's in all don't-care
+        * address bits.
+        */
         if (new == 0xffffffff) {
-               /*
-                * Power-up software can determine how much address
-                * space the device requires by writing a value of
-                * all 1's to the register and then reading the value
-                * back. The device will return 0's in all don't-care
-                * address bits.
-                */
-               size = vgpu->cfg_space.bar[bar_index].size;
-               if (lo) {
-                       new = rounddown(new, size);
-               } else {
-                       u32 val = vgpu_cfg_space(vgpu)[rounddown(offset, 8)];
-                       /* for 32bit mode bar it returns all-0 in upper 32
-                        * bit, for 64bit mode bar it will calculate the
-                        * size with lower 32bit and return the corresponding
-                        * value
+               switch (offset) {
+               case PCI_BASE_ADDRESS_0:
+               case PCI_BASE_ADDRESS_1:
+                       size = ~(bars[INTEL_GVT_PCI_BAR_GTTMMIO].size -1);
+                       intel_vgpu_write_pci_bar(vgpu, offset,
+                                               size >> (lo ? 0 : 32), lo);
+                       /*
+                        * Untrap the BAR, since guest hasn't configured a
+                        * valid GPA
                          */
-                       if (val & PCI_BASE_ADDRESS_MEM_TYPE_64)
-                               new &= (~(size-1)) >> 32;
-                       else
-                               new = 0;
-               }
-               /*
-                * Unmapp & untrap the BAR, since guest hasn't configured a
-                * valid GPA
-                */
-               switch (bar_index) {
-               case INTEL_GVT_PCI_BAR_GTTMMIO:
                         ret = trap_gttmmio(vgpu, false);
                         break;
-               case INTEL_GVT_PCI_BAR_APERTURE:
+               case PCI_BASE_ADDRESS_2:
+               case PCI_BASE_ADDRESS_3:
+                       size = ~(bars[INTEL_GVT_PCI_BAR_APERTURE].size -1);
+                       intel_vgpu_write_pci_bar(vgpu, offset,
+                                               size >> (lo ? 0 : 32), lo);
                         ret = map_aperture(vgpu, false);
                         break;
+               default:
+                       /* Unimplemented BARs */
+                       intel_vgpu_write_pci_bar(vgpu, offset, 0x0, false);
                 }
-               intel_vgpu_write_pci_bar(vgpu, offset, new, lo);
         } else {
-               /*
-                * Unmapp & untrap the old BAR first, since guest has
-                * re-configured the BAR
-                */
-               switch (bar_index) {
-               case INTEL_GVT_PCI_BAR_GTTMMIO:
-                       ret = trap_gttmmio(vgpu, false);
+               switch (offset) {
+               case PCI_BASE_ADDRESS_0:
+               case PCI_BASE_ADDRESS_1:
+                       /*
+                        * Untrap the old BAR first, since guest has
+                        * re-configured the BAR
+                        */
+                       trap_gttmmio(vgpu, false);
+                       intel_vgpu_write_pci_bar(vgpu, offset, new, lo);
+                       ret = trap_gttmmio(vgpu, mmio_enabled);
                         break;
-               case INTEL_GVT_PCI_BAR_APERTURE:
-                       ret = map_aperture(vgpu, false);
+               case PCI_BASE_ADDRESS_2:
+               case PCI_BASE_ADDRESS_3:
+                       map_aperture(vgpu, false);
+                       intel_vgpu_write_pci_bar(vgpu, offset, new, lo);
+                       ret = map_aperture(vgpu, mmio_enabled);
                         break;
-               }
-               intel_vgpu_write_pci_bar(vgpu, offset, new, lo);
-               /* Track the new BAR */
-               if (mmio_enabled) {
-                       switch (bar_index) {
-                       case INTEL_GVT_PCI_BAR_GTTMMIO:
-                               ret = trap_gttmmio(vgpu, true);
-                               break;
-                       case INTEL_GVT_PCI_BAR_APERTURE:
-                               ret = map_aperture(vgpu, true);
-                               break;
-                       }
+               default:
+                       intel_vgpu_write_pci_bar(vgpu, offset, new, lo);
                 }
         }
         return ret;
@@ -299,10 +286,7 @@ int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset,
         }
  
         switch (rounddown(offset, 4)) {
-       case PCI_BASE_ADDRESS_0:
-       case PCI_BASE_ADDRESS_1:
-       case PCI_BASE_ADDRESS_2:
-       case PCI_BASE_ADDRESS_3:
+       case PCI_BASE_ADDRESS_0 ... PCI_BASE_ADDRESS_5:
                 if (WARN_ON(!IS_ALIGNED(offset, 4)))
                         return -EINVAL;
                 return emulate_pci_bar_write(vgpu, offset, p_data, bytes);
@@ -344,7 +328,6 @@ void intel_vgpu_init_cfg_space(struct intel_vgpu *vgpu,
         struct intel_gvt *gvt = vgpu->gvt;
         const struct intel_gvt_device_info *info = &gvt->device_info;
         u16 *gmch_ctl;
-       int i;
  
         memcpy(vgpu_cfg_space(vgpu), gvt->firmware.cfg_space,
                info->cfg_space_size);
@@ -371,13 +354,13 @@ void intel_vgpu_init_cfg_space(struct intel_vgpu *vgpu,
          */
         memset(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_1, 0, 4);
         memset(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_3, 0, 4);
+       memset(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_4, 0, 8);
         memset(vgpu_cfg_space(vgpu) + INTEL_GVT_PCI_OPREGION, 0, 4);
  
-       for (i = 0; i < INTEL_GVT_MAX_BAR_NUM; i++) {
-               vgpu->cfg_space.bar[i].size = pci_resource_len(
-                                             gvt->dev_priv->drm.pdev, i * 2);
-               vgpu->cfg_space.bar[i].tracked = false;
-       }
+       vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_GTTMMIO].size =
+                               pci_resource_len(gvt->dev_priv->drm.pdev, 0);
+       vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_APERTURE].size =
+                               pci_resource_len(gvt->dev_priv->drm.pdev, 2);
  }
  
  /**
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c

index e21ce9c..b63893e 100644 (file)
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -839,7 +839,6 @@ static bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
                                                                 pipe);
         int position;
         int vbl_start, vbl_end, hsync_start, htotal, vtotal;
-       bool in_vbl = true;
         unsigned long irqflags;
  
         if (WARN_ON(!mode->crtc_clock)) {
@@ -922,8 +921,6 @@ static bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
  
         spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
  
-       in_vbl = position >= vbl_start && position < vbl_end;
-
         /*
          * While in vblank, position will be negative
          * counting up towards 0 at vbl_end. And outside
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c

index f172755..00cd17c 100644 (file)
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -14030,7 +14030,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
  
                 if (mode_cmd->handles[i] != mode_cmd->handles[0]) {
                         DRM_DEBUG_KMS("bad plane %d handle\n", i);
-                       return -EINVAL;
+                       goto err;
                 }
  
                 stride_alignment = intel_fb_stride_alignment(fb, i);
diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c

index f0c11ae..7442891 100644 (file)
--- a/drivers/gpu/drm/i915/intel_dsi.c
+++ b/drivers/gpu/drm/i915/intel_dsi.c
@@ -892,8 +892,6 @@ static void intel_dsi_disable(struct intel_encoder *encoder,
                               struct intel_crtc_state *old_crtc_state,
                               struct drm_connector_state *old_conn_state)
  {
-       struct drm_device *dev = encoder->base.dev;
-       struct drm_i915_private *dev_priv = dev->dev_private;
         struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
         enum port port;
  
@@ -902,15 +900,6 @@ static void intel_dsi_disable(struct intel_encoder *encoder,
         intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_OFF);
         intel_panel_disable_backlight(old_conn_state);
  
-       /*
-        * Disable Device ready before the port shutdown in order
-        * to avoid split screen
-        */
-       if (IS_BROXTON(dev_priv)) {
-               for_each_dsi_port(port, intel_dsi->ports)
-                       I915_WRITE(MIPI_DEVICE_READY(port), 0);
-       }
-
         /*
          * According to the spec we should send SHUTDOWN before
          * MIPI_SEQ_DISPLAY_OFF only for v3+ VBTs, but field testing
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c

index a17b1de..3b1c5d7 100644 (file)
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -1699,6 +1699,8 @@ bxt_setup_backlight(struct intel_connector *connector, enum pipe unused)
         if (!panel->backlight.max)
                 return -ENODEV;
  
+       panel->backlight.min = get_backlight_min_vbt(connector);
+
         val = bxt_get_backlight(connector);
         val = intel_panel_compute_brightness(connector, val);
         panel->backlight.level = clamp(val, panel->backlight.min,
@@ -1735,6 +1737,8 @@ cnp_setup_backlight(struct intel_connector *connector, enum pipe unused)
         if (!panel->backlight.max)
                 return -ENODEV;
  
+       panel->backlight.min = get_backlight_min_vbt(connector);
+
         val = bxt_get_backlight(connector);
         val = intel_panel_compute_brightness(connector, val);
         panel->backlight.level = clamp(val, panel->backlight.min,
diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c

index 70ad19c..88bdafb 100644 (file)
--- a/drivers/infiniband/core/security.c
+++ b/drivers/infiniband/core/security.c
@@ -432,8 +432,10 @@ int ib_create_qp_security(struct ib_qp *qp, struct ib_device *dev)
         atomic_set(&qp->qp_sec->error_list_count, 0);
         init_completion(&qp->qp_sec->error_complete);
         ret = security_ib_alloc_security(&qp->qp_sec->security);
-       if (ret)
+       if (ret) {
                 kfree(qp->qp_sec);
+               qp->qp_sec = NULL;
+       }
  
         return ret;
  }
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c

index 4ab30d8..52a2cf2 100644 (file)
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -3869,15 +3869,15 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
         resp.raw_packet_caps = attr.raw_packet_caps;
         resp.response_length += sizeof(resp.raw_packet_caps);
  
-       if (ucore->outlen < resp.response_length + sizeof(resp.xrq_caps))
+       if (ucore->outlen < resp.response_length + sizeof(resp.tm_caps))
                 goto end;
  
-       resp.xrq_caps.max_rndv_hdr_size = attr.xrq_caps.max_rndv_hdr_size;
-       resp.xrq_caps.max_num_tags      = attr.xrq_caps.max_num_tags;
-       resp.xrq_caps.max_ops           = attr.xrq_caps.max_ops;
-       resp.xrq_caps.max_sge           = attr.xrq_caps.max_sge;
-       resp.xrq_caps.flags             = attr.xrq_caps.flags;
-       resp.response_length += sizeof(resp.xrq_caps);
+       resp.tm_caps.max_rndv_hdr_size  = attr.tm_caps.max_rndv_hdr_size;
+       resp.tm_caps.max_num_tags       = attr.tm_caps.max_num_tags;
+       resp.tm_caps.max_ops            = attr.tm_caps.max_ops;
+       resp.tm_caps.max_sge            = attr.tm_caps.max_sge;
+       resp.tm_caps.flags              = attr.tm_caps.flags;
+       resp.response_length += sizeof(resp.tm_caps);
  end:
         err = ib_copy_to_udata(ucore, &resp, resp.response_length);
         return err;
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c

index ee9e27d..de57d6c 100644 (file)
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1646,7 +1646,7 @@ static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid)
          */
         if (!ib_query_qp(qp, &attr, IB_QP_STATE | IB_QP_PORT, &init_attr)) {
                 if (attr.qp_state >= IB_QPS_INIT) {
-                       if (qp->device->get_link_layer(qp->device, attr.port_num) !=
+                       if (rdma_port_get_link_layer(qp->device, attr.port_num) !=
                             IB_LINK_LAYER_INFINIBAND)
                                 return true;
                         goto lid_check;
@@ -1655,7 +1655,7 @@ static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid)
  
         /* Can't get a quick answer, iterate over all ports */
         for (port = 0; port < qp->device->phys_port_cnt; port++)
-               if (qp->device->get_link_layer(qp->device, port) !=
+               if (rdma_port_get_link_layer(qp->device, port) !=
                     IB_LINK_LAYER_INFINIBAND)
                         num_eth_ports++;
  
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h

index b3ad37f..ecbac91 100644 (file)
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -93,11 +93,13 @@ struct bnxt_re_dev {
         struct ib_device                ibdev;
         struct list_head                list;
         unsigned long                   flags;
-#define BNXT_RE_FLAG_NETDEV_REGISTERED 0
-#define BNXT_RE_FLAG_IBDEV_REGISTERED  1
-#define BNXT_RE_FLAG_GOT_MSIX          2
-#define BNXT_RE_FLAG_RCFW_CHANNEL_EN   8
-#define BNXT_RE_FLAG_QOS_WORK_REG      16
+#define BNXT_RE_FLAG_NETDEV_REGISTERED         0
+#define BNXT_RE_FLAG_IBDEV_REGISTERED          1
+#define BNXT_RE_FLAG_GOT_MSIX                  2
+#define BNXT_RE_FLAG_HAVE_L2_REF               3
+#define BNXT_RE_FLAG_RCFW_CHANNEL_EN           4
+#define BNXT_RE_FLAG_QOS_WORK_REG              5
+#define BNXT_RE_FLAG_TASK_IN_PROG              6
         struct net_device               *netdev;
         unsigned int                    version, major, minor;
         struct bnxt_en_dev              *en_dev;
@@ -108,6 +110,8 @@ struct bnxt_re_dev {
  
         struct delayed_work             worker;
         u8                              cur_prio_map;
+       u8                              active_speed;
+       u8                              active_width;
  
         /* FP Notification Queue (CQ & SRQ) */
         struct tasklet_struct           nq_task;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c

index 01eee15..0d89621 100644 (file)
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -259,14 +259,9 @@ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num,
         port_attr->sm_sl = 0;
         port_attr->subnet_timeout = 0;
         port_attr->init_type_reply = 0;
-       /* call the underlying netdev's ethtool hooks to query speed settings
-        * for which we acquire rtnl_lock _only_ if it's registered with
-        * IB stack to avoid race in the NETDEV_UNREG path
-        */
-       if (test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags))
-               if (ib_get_eth_speed(ibdev, port_num, &port_attr->active_speed,
-                                    &port_attr->active_width))
-                       return -EINVAL;
+       port_attr->active_speed = rdev->active_speed;
+       port_attr->active_width = rdev->active_width;
+
         return 0;
  }
  
@@ -319,6 +314,7 @@ int bnxt_re_del_gid(struct ib_device *ibdev, u8 port_num,
         struct bnxt_re_gid_ctx *ctx, **ctx_tbl;
         struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
         struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl;
+       struct bnxt_qplib_gid *gid_to_del;
  
         /* Delete the entry from the hardware */
         ctx = *context;
@@ -328,11 +324,25 @@ int bnxt_re_del_gid(struct ib_device *ibdev, u8 port_num,
         if (sgid_tbl && sgid_tbl->active) {
                 if (ctx->idx >= sgid_tbl->max)
                         return -EINVAL;
+               gid_to_del = &sgid_tbl->tbl[ctx->idx];
+               /* DEL_GID is called in WQ context(netdevice_event_work_handler)
+                * or via the ib_unregister_device path. In the former case QP1
+                * may not be destroyed yet, in which case just return as FW
+                * needs that entry to be present and will fail it's deletion.
+                * We could get invoked again after QP1 is destroyed OR get an
+                * ADD_GID call with a different GID value for the same index
+                * where we issue MODIFY_GID cmd to update the GID entry -- TBD
+                */
+               if (ctx->idx == 0 &&
+                   rdma_link_local_addr((struct in6_addr *)gid_to_del) &&
+                   ctx->refcnt == 1 && rdev->qp1_sqp) {
+                       dev_dbg(rdev_to_dev(rdev),
+                               "Trying to delete GID0 while QP1 is alive\n");
+                       return -EFAULT;
+               }
                 ctx->refcnt--;
                 if (!ctx->refcnt) {
-                       rc = bnxt_qplib_del_sgid(sgid_tbl,
-                                                &sgid_tbl->tbl[ctx->idx],
-                                                true);
+                       rc = bnxt_qplib_del_sgid(sgid_tbl, gid_to_del, true);
                         if (rc) {
                                 dev_err(rdev_to_dev(rdev),
                                         "Failed to remove GID: %#x", rc);
@@ -816,6 +826,8 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp)
  
                 kfree(rdev->sqp_ah);
                 kfree(rdev->qp1_sqp);
+               rdev->qp1_sqp = NULL;
+               rdev->sqp_ah = NULL;
         }
  
         if (!IS_ERR_OR_NULL(qp->rumem))
@@ -1436,11 +1448,14 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
                 qp->qplib_qp.modify_flags |=
                                 CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU;
                 qp->qplib_qp.path_mtu = __from_ib_mtu(qp_attr->path_mtu);
+               qp->qplib_qp.mtu = ib_mtu_enum_to_int(qp_attr->path_mtu);
         } else if (qp_attr->qp_state == IB_QPS_RTR) {
                 qp->qplib_qp.modify_flags |=
                         CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU;
                 qp->qplib_qp.path_mtu =
                         __from_ib_mtu(iboe_get_mtu(rdev->netdev->mtu));
+               qp->qplib_qp.mtu =
+                       ib_mtu_enum_to_int(iboe_get_mtu(rdev->netdev->mtu));
         }
  
         if (qp_attr_mask & IB_QP_TIMEOUT) {
@@ -1551,43 +1566,46 @@ int bnxt_re_query_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
  {
         struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
         struct bnxt_re_dev *rdev = qp->rdev;
-       struct bnxt_qplib_qp qplib_qp;
+       struct bnxt_qplib_qp *qplib_qp;
         int rc;
  
-       memset(&qplib_qp, 0, sizeof(struct bnxt_qplib_qp));
-       qplib_qp.id = qp->qplib_qp.id;
-       qplib_qp.ah.host_sgid_index = qp->qplib_qp.ah.host_sgid_index;
+       qplib_qp = kzalloc(sizeof(*qplib_qp), GFP_KERNEL);
+       if (!qplib_qp)
+               return -ENOMEM;
+
+       qplib_qp->id = qp->qplib_qp.id;
+       qplib_qp->ah.host_sgid_index = qp->qplib_qp.ah.host_sgid_index;
  
-       rc = bnxt_qplib_query_qp(&rdev->qplib_res, &qplib_qp);
+       rc = bnxt_qplib_query_qp(&rdev->qplib_res, qplib_qp);
         if (rc) {
                 dev_err(rdev_to_dev(rdev), "Failed to query HW QP");
-               return rc;
+               goto out;
         }
-       qp_attr->qp_state = __to_ib_qp_state(qplib_qp.state);
-       qp_attr->en_sqd_async_notify = qplib_qp.en_sqd_async_notify ? 1 : 0;
-       qp_attr->qp_access_flags = __to_ib_access_flags(qplib_qp.access);
-       qp_attr->pkey_index = qplib_qp.pkey_index;
-       qp_attr->qkey = qplib_qp.qkey;
+       qp_attr->qp_state = __to_ib_qp_state(qplib_qp->state);
+       qp_attr->en_sqd_async_notify = qplib_qp->en_sqd_async_notify ? 1 : 0;
+       qp_attr->qp_access_flags = __to_ib_access_flags(qplib_qp->access);
+       qp_attr->pkey_index = qplib_qp->pkey_index;
+       qp_attr->qkey = qplib_qp->qkey;
         qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
-       rdma_ah_set_grh(&qp_attr->ah_attr, NULL, qplib_qp.ah.flow_label,
-                       qplib_qp.ah.host_sgid_index,
-                       qplib_qp.ah.hop_limit,
-                       qplib_qp.ah.traffic_class);
-       rdma_ah_set_dgid_raw(&qp_attr->ah_attr, qplib_qp.ah.dgid.data);
-       rdma_ah_set_sl(&qp_attr->ah_attr, qplib_qp.ah.sl);
-       ether_addr_copy(qp_attr->ah_attr.roce.dmac, qplib_qp.ah.dmac);
-       qp_attr->path_mtu = __to_ib_mtu(qplib_qp.path_mtu);
-       qp_attr->timeout = qplib_qp.timeout;
-       qp_attr->retry_cnt = qplib_qp.retry_cnt;
-       qp_attr->rnr_retry = qplib_qp.rnr_retry;
-       qp_attr->min_rnr_timer = qplib_qp.min_rnr_timer;
-       qp_attr->rq_psn = qplib_qp.rq.psn;
-       qp_attr->max_rd_atomic = qplib_qp.max_rd_atomic;
-       qp_attr->sq_psn = qplib_qp.sq.psn;
-       qp_attr->max_dest_rd_atomic = qplib_qp.max_dest_rd_atomic;
-       qp_init_attr->sq_sig_type = qplib_qp.sig_type ? IB_SIGNAL_ALL_WR :
-                                                       IB_SIGNAL_REQ_WR;
-       qp_attr->dest_qp_num = qplib_qp.dest_qpn;
+       rdma_ah_set_grh(&qp_attr->ah_attr, NULL, qplib_qp->ah.flow_label,
+                       qplib_qp->ah.host_sgid_index,
+                       qplib_qp->ah.hop_limit,
+                       qplib_qp->ah.traffic_class);
+       rdma_ah_set_dgid_raw(&qp_attr->ah_attr, qplib_qp->ah.dgid.data);
+       rdma_ah_set_sl(&qp_attr->ah_attr, qplib_qp->ah.sl);
+       ether_addr_copy(qp_attr->ah_attr.roce.dmac, qplib_qp->ah.dmac);
+       qp_attr->path_mtu = __to_ib_mtu(qplib_qp->path_mtu);
+       qp_attr->timeout = qplib_qp->timeout;
+       qp_attr->retry_cnt = qplib_qp->retry_cnt;
+       qp_attr->rnr_retry = qplib_qp->rnr_retry;
+       qp_attr->min_rnr_timer = qplib_qp->min_rnr_timer;
+       qp_attr->rq_psn = qplib_qp->rq.psn;
+       qp_attr->max_rd_atomic = qplib_qp->max_rd_atomic;
+       qp_attr->sq_psn = qplib_qp->sq.psn;
+       qp_attr->max_dest_rd_atomic = qplib_qp->max_dest_rd_atomic;
+       qp_init_attr->sq_sig_type = qplib_qp->sig_type ? IB_SIGNAL_ALL_WR :
+                                                        IB_SIGNAL_REQ_WR;
+       qp_attr->dest_qp_num = qplib_qp->dest_qpn;
  
         qp_attr->cap.max_send_wr = qp->qplib_qp.sq.max_wqe;
         qp_attr->cap.max_send_sge = qp->qplib_qp.sq.max_sge;
@@ -1596,7 +1614,9 @@ int bnxt_re_query_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
         qp_attr->cap.max_inline_data = qp->qplib_qp.max_inline_data;
         qp_init_attr->cap = qp_attr->cap;
  
-       return 0;
+out:
+       kfree(qplib_qp);
+       return rc;
  }
  
  /* Routine for sending QP1 packets for RoCE V1 an V2
@@ -1908,6 +1928,7 @@ static int bnxt_re_build_atomic_wqe(struct ib_send_wr *wr,
         switch (wr->opcode) {
         case IB_WR_ATOMIC_CMP_AND_SWP:
                 wqe->type = BNXT_QPLIB_SWQE_TYPE_ATOMIC_CMP_AND_SWP;
+               wqe->atomic.cmp_data = atomic_wr(wr)->compare_add;
                 wqe->atomic.swap_data = atomic_wr(wr)->swap;
                 break;
         case IB_WR_ATOMIC_FETCH_AND_ADD:
@@ -3062,7 +3083,7 @@ int bnxt_re_dereg_mr(struct ib_mr *ib_mr)
                 return rc;
         }
  
-       if (mr->npages && mr->pages) {
+       if (mr->pages) {
                 rc = bnxt_qplib_free_fast_reg_page_list(&rdev->qplib_res,
                                                         &mr->qplib_frpl);
                 kfree(mr->pages);
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c

index 82d1cbc..e7450ea 100644 (file)
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -1161,6 +1161,8 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
                 }
         }
         set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
+       ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed,
+                        &rdev->active_width);
         bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_PORT_ACTIVE);
         bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_GID_CHANGE);
  
@@ -1255,10 +1257,14 @@ static void bnxt_re_task(struct work_struct *work)
                 else if (netif_carrier_ok(rdev->netdev))
                         bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
                                                IB_EVENT_PORT_ACTIVE);
+               ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed,
+                                &rdev->active_width);
                 break;
         default:
                 break;
         }
+       smp_mb__before_atomic();
+       clear_bit(BNXT_RE_FLAG_TASK_IN_PROG, &rdev->flags);
         kfree(re_work);
  }
  
@@ -1317,6 +1323,11 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier,
                 break;
  
         case NETDEV_UNREGISTER:
+               /* netdev notifier will call NETDEV_UNREGISTER again later since
+                * we are still holding the reference to the netdev
+                */
+               if (test_bit(BNXT_RE_FLAG_TASK_IN_PROG, &rdev->flags))
+                       goto exit;
                 bnxt_re_ib_unreg(rdev, false);
                 bnxt_re_remove_one(rdev);
                 bnxt_re_dev_unreg(rdev);
@@ -1335,6 +1346,7 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier,
                         re_work->vlan_dev = (real_dev == netdev ?
                                              NULL : netdev);
                         INIT_WORK(&re_work->work, bnxt_re_task);
+                       set_bit(BNXT_RE_FLAG_TASK_IN_PROG, &rdev->flags);
                         queue_work(bnxt_re_wq, &re_work->work);
                 }
         }
@@ -1375,6 +1387,22 @@ err_netdev:
  
  static void __exit bnxt_re_mod_exit(void)
  {
+       struct bnxt_re_dev *rdev;
+       LIST_HEAD(to_be_deleted);
+
+       mutex_lock(&bnxt_re_dev_lock);
+       /* Free all adapter allocated resources */
+       if (!list_empty(&bnxt_re_dev_list))
+               list_splice_init(&bnxt_re_dev_list, &to_be_deleted);
+       mutex_unlock(&bnxt_re_dev_lock);
+
+       list_for_each_entry(rdev, &to_be_deleted, list) {
+               dev_info(rdev_to_dev(rdev), "Unregistering Device");
+               bnxt_re_dev_stop(rdev);
+               bnxt_re_ib_unreg(rdev, true);
+               bnxt_re_remove_one(rdev);
+               bnxt_re_dev_unreg(rdev);
+       }
         unregister_netdevice_notifier(&bnxt_re_netdev_notifier);
         if (bnxt_re_wq)
                 destroy_workqueue(bnxt_re_wq);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c

index 391bb70..2bdb156 100644 (file)
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -107,6 +107,9 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
                 return -EINVAL;
         }
  
+       if (test_bit(FIRMWARE_TIMED_OUT, &rcfw->flags))
+               return -ETIMEDOUT;
+
         /* Cmdq are in 16-byte units, each request can consume 1 or more
          * cmdqe
          */
@@ -226,6 +229,7 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
                 /* timed out */
                 dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x timedout (%d)msec",
                         cookie, opcode, RCFW_CMD_WAIT_TIME_MS);
+               set_bit(FIRMWARE_TIMED_OUT, &rcfw->flags);
                 return rc;
         }
  
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h

index 0ed312f..85b16da 100644 (file)
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
@@ -162,8 +162,9 @@ struct bnxt_qplib_rcfw {
         unsigned long           *cmdq_bitmap;
         u32                     bmap_size;
         unsigned long           flags;
-#define FIRMWARE_INITIALIZED_FLAG      1
+#define FIRMWARE_INITIALIZED_FLAG      BIT(0)
  #define FIRMWARE_FIRST_FLAG            BIT(31)
+#define FIRMWARE_TIMED_OUT             BIT(3)
         wait_queue_head_t       waitq;
         int                     (*aeq_handler)(struct bnxt_qplib_rcfw *,
                                                struct creq_func_event *);
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c

index ceaa2fa..daf7a56 100644 (file)
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -2333,9 +2333,14 @@ static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
         unsigned int stid = GET_TID(rpl);
         struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid);
  
+       if (!ep) {
+               pr_debug("%s stid %d lookup failure!\n", __func__, stid);
+               goto out;
+       }
         pr_debug("%s ep %p\n", __func__, ep);
         c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status));
         c4iw_put_ep(&ep->com);
+out:
         return 0;
  }
  
@@ -2594,9 +2599,9 @@ fail:
         c4iw_put_ep(&child_ep->com);
  reject:
         reject_cr(dev, hwtid, skb);
+out:
         if (parent_ep)
                 c4iw_put_ep(&parent_ep->com);
-out:
         return 0;
  }
  
@@ -3457,7 +3462,7 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
                 cm_id->provider_data = ep;
                 goto out;
         }
-
+       remove_handle(ep->com.dev, &ep->com.dev->stid_idr, ep->stid);
         cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
                         ep->com.local_addr.ss_family);
  fail2:
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c

index b2ed4b9..0be4278 100644 (file)
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1066,6 +1066,8 @@ static int read_idle_sma(struct hfi1_devdata *dd, u64 *data);
  static int thermal_init(struct hfi1_devdata *dd);
  
  static void update_statusp(struct hfi1_pportdata *ppd, u32 state);
+static int wait_phys_link_offline_substates(struct hfi1_pportdata *ppd,
+                                           int msecs);
  static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
                                   int msecs);
  static void log_state_transition(struct hfi1_pportdata *ppd, u32 state);
@@ -8238,6 +8240,7 @@ static irqreturn_t general_interrupt(int irq, void *data)
         u64 regs[CCE_NUM_INT_CSRS];
         u32 bit;
         int i;
+       irqreturn_t handled = IRQ_NONE;
  
         this_cpu_inc(*dd->int_counter);
  
@@ -8258,9 +8261,10 @@ static irqreturn_t general_interrupt(int irq, void *data)
         for_each_set_bit(bit, (unsigned long *)&regs[0],
                          CCE_NUM_INT_CSRS * 64) {
                 is_interrupt(dd, bit);
+               handled = IRQ_HANDLED;
         }
  
-       return IRQ_HANDLED;
+       return handled;
  }
  
  static irqreturn_t sdma_interrupt(int irq, void *data)
@@ -9413,7 +9417,7 @@ static void set_qsfp_int_n(struct hfi1_pportdata *ppd, u8 enable)
         write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK, mask);
  }
  
-void reset_qsfp(struct hfi1_pportdata *ppd)
+int reset_qsfp(struct hfi1_pportdata *ppd)
  {
         struct hfi1_devdata *dd = ppd->dd;
         u64 mask, qsfp_mask;
@@ -9443,6 +9447,13 @@ void reset_qsfp(struct hfi1_pportdata *ppd)
          * for alarms and warnings
          */
         set_qsfp_int_n(ppd, 1);
+
+       /*
+        * After the reset, AOC transmitters are enabled by default. They need
+        * to be turned off to complete the QSFP setup before they can be
+        * enabled again.
+        */
+       return set_qsfp_tx(ppd, 0);
  }
  
  static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
@@ -10305,6 +10316,7 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
  {
         struct hfi1_devdata *dd = ppd->dd;
         u32 previous_state;
+       int offline_state_ret;
         int ret;
  
         update_lcb_cache(dd);
@@ -10326,28 +10338,11 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
                 ppd->offline_disabled_reason =
                 HFI1_ODR_MASK(OPA_LINKDOWN_REASON_TRANSIENT);
  
-       /*
-        * Wait for offline transition. It can take a while for
-        * the link to go down.
-        */
-       ret = wait_physical_linkstate(ppd, PLS_OFFLINE, 10000);
-       if (ret < 0)
-               return ret;
-
-       /*
-        * Now in charge of LCB - must be after the physical state is
-        * offline.quiet and before host_link_state is changed.
-        */
-       set_host_lcb_access(dd);
-       write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
-
-       /* make sure the logical state is also down */
-       ret = wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
-       if (ret)
-               force_logical_link_state_down(ppd);
-
-       ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
+       offline_state_ret = wait_phys_link_offline_substates(ppd, 10000);
+       if (offline_state_ret < 0)
+               return offline_state_ret;
  
+       /* Disabling AOC transmitters */
         if (ppd->port_type == PORT_TYPE_QSFP &&
             ppd->qsfp_info.limiting_active &&
             qsfp_mod_present(ppd)) {
@@ -10364,6 +10359,30 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
                 }
         }
  
+       /*
+        * Wait for the offline.Quiet transition if it hasn't happened yet. It
+        * can take a while for the link to go down.
+        */
+       if (offline_state_ret != PLS_OFFLINE_QUIET) {
+               ret = wait_physical_linkstate(ppd, PLS_OFFLINE, 30000);
+               if (ret < 0)
+                       return ret;
+       }
+
+       /*
+        * Now in charge of LCB - must be after the physical state is
+        * offline.quiet and before host_link_state is changed.
+        */
+       set_host_lcb_access(dd);
+       write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
+
+       /* make sure the logical state is also down */
+       ret = wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
+       if (ret)
+               force_logical_link_state_down(ppd);
+
+       ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
+
         /*
          * The LNI has a mandatory wait time after the physical state
          * moves to Offline.Quiet.  The wait time may be different
@@ -10396,6 +10415,9 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
                         & (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) {
                 /* went down while attempting link up */
                 check_lni_states(ppd);
+
+               /* The QSFP doesn't need to be reset on LNI failure */
+               ppd->qsfp_info.reset_needed = 0;
         }
  
         /* the active link width (downgrade) is 0 on link down */
@@ -12804,6 +12826,39 @@ static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state,
         return 0;
  }
  
+/*
+ * wait_phys_link_offline_quiet_substates - wait for any offline substate
+ * @ppd: port device
+ * @msecs: the number of milliseconds to wait
+ *
+ * Wait up to msecs milliseconds for any offline physical link
+ * state change to occur.
+ * Returns 0 if at least one state is reached, otherwise -ETIMEDOUT.
+ */
+static int wait_phys_link_offline_substates(struct hfi1_pportdata *ppd,
+                                           int msecs)
+{
+       u32 read_state;
+       unsigned long timeout;
+
+       timeout = jiffies + msecs_to_jiffies(msecs);
+       while (1) {
+               read_state = read_physical_state(ppd->dd);
+               if ((read_state & 0xF0) == PLS_OFFLINE)
+                       break;
+               if (time_after(jiffies, timeout)) {
+                       dd_dev_err(ppd->dd,
+                                  "timeout waiting for phy link offline.quiet substates. Read state 0x%x, %dms\n",
+                                  read_state, msecs);
+                       return -ETIMEDOUT;
+               }
+               usleep_range(1950, 2050); /* sleep 2ms-ish */
+       }
+
+       log_state_transition(ppd, read_state);
+       return read_state;
+}
+
  #define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \
  (r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
  
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h

index b8345a6..50b8645 100644 (file)
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -204,6 +204,7 @@
  #define PLS_OFFLINE_READY_TO_QUIET_LT     0x92
  #define PLS_OFFLINE_REPORT_FAILURE                0x93
  #define PLS_OFFLINE_READY_TO_QUIET_BCC    0x94
+#define PLS_OFFLINE_QUIET_DURATION        0x95
  #define PLS_POLLING                               0x20
  #define PLS_POLLING_QUIET                         0x20
  #define PLS_POLLING_ACTIVE                        0x21
@@ -722,7 +723,7 @@ void handle_link_downgrade(struct work_struct *work);
  void handle_link_bounce(struct work_struct *work);
  void handle_start_link(struct work_struct *work);
  void handle_sma_message(struct work_struct *work);
-void reset_qsfp(struct hfi1_pportdata *ppd);
+int reset_qsfp(struct hfi1_pportdata *ppd);
  void qsfp_event(struct work_struct *work);
  void start_freeze_handling(struct hfi1_pportdata *ppd, int flags);
  int send_idle_sma(struct hfi1_devdata *dd, u64 message);
diff --git a/drivers/infiniband/hw/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c

index d46b171..1613af1 100644 (file)
--- a/drivers/infiniband/hw/hfi1/eprom.c
+++ b/drivers/infiniband/hw/hfi1/eprom.c
@@ -204,7 +204,10 @@ done_asic:
         return ret;
  }
  
-/* magic character sequence that trails an image */
+/* magic character sequence that begins an image */
+#define IMAGE_START_MAGIC "APO="
+
+/* magic character sequence that might trail an image */
  #define IMAGE_TRAIL_MAGIC "egamiAPO"
  
  /* EPROM file types */
@@ -250,6 +253,7 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data,
  {
         void *buffer;
         void *p;
+       u32 length;
         int ret;
  
         buffer = kmalloc(P1_SIZE, GFP_KERNEL);
@@ -262,15 +266,21 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data,
                 return ret;
         }
  
-       /* scan for image magic that may trail the actual data */
-       p = strnstr(buffer, IMAGE_TRAIL_MAGIC, P1_SIZE);
-       if (!p) {
+       /* config partition is valid only if it starts with IMAGE_START_MAGIC */
+       if (memcmp(buffer, IMAGE_START_MAGIC, strlen(IMAGE_START_MAGIC))) {
                 kfree(buffer);
                 return -ENOENT;
         }
  
+       /* scan for image magic that may trail the actual data */
+       p = strnstr(buffer, IMAGE_TRAIL_MAGIC, P1_SIZE);
+       if (p)
+               length = p - buffer;
+       else
+               length = P1_SIZE;
+
         *data = buffer;
-       *size = p - buffer;
+       *size = length;
         return 0;
  }
  
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c

index 2bc8926..d9a1e98 100644 (file)
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -930,15 +930,8 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo)
         switch (ret) {
         case 0:
                 ret = setup_base_ctxt(fd, uctxt);
-               if (uctxt->subctxt_cnt) {
-                       /*
-                        * Base context is done (successfully or not), notify
-                        * anybody using a sub-context that is waiting for
-                        * this completion.
-                        */
-                       clear_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags);
-                       wake_up(&uctxt->wait);
-               }
+               if (ret)
+                       deallocate_ctxt(uctxt);
                 break;
         case 1:
                 ret = complete_subctxt(fd);
@@ -1305,25 +1298,25 @@ static int setup_base_ctxt(struct hfi1_filedata *fd,
         /* Now allocate the RcvHdr queue and eager buffers. */
         ret = hfi1_create_rcvhdrq(dd, uctxt);
         if (ret)
-               return ret;
+               goto done;
  
         ret = hfi1_setup_eagerbufs(uctxt);
         if (ret)
-               goto setup_failed;
+               goto done;
  
         /* If sub-contexts are enabled, do the appropriate setup */
         if (uctxt->subctxt_cnt)
                 ret = setup_subctxt(uctxt);
         if (ret)
-               goto setup_failed;
+               goto done;
  
         ret = hfi1_alloc_ctxt_rcv_groups(uctxt);
         if (ret)
-               goto setup_failed;
+               goto done;
  
         ret = init_user_ctxt(fd, uctxt);
         if (ret)
-               goto setup_failed;
+               goto done;
  
         user_init(uctxt);
  
@@ -1331,12 +1324,22 @@ static int setup_base_ctxt(struct hfi1_filedata *fd,
         fd->uctxt = uctxt;
         hfi1_rcd_get(uctxt);
  
-       return 0;
+done:
+       if (uctxt->subctxt_cnt) {
+               /*
+                * On error, set the failed bit so sub-contexts will clean up
+                * correctly.
+                */
+               if (ret)
+                       set_bit(HFI1_CTXT_BASE_FAILED, &uctxt->event_flags);
  
-setup_failed:
-       /* Set the failed bit so sub-context init can do the right thing */
-       set_bit(HFI1_CTXT_BASE_FAILED, &uctxt->event_flags);
-       deallocate_ctxt(uctxt);
+               /*
+                * Base context is done (successfully or not), notify anybody
+                * using a sub-context that is waiting for this completion.
+                */
+               clear_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags);
+               wake_up(&uctxt->wait);
+       }
  
         return ret;
  }
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c

index 82447b7..09e50fd 100644 (file)
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -68,7 +68,7 @@
  /*
   * Code to adjust PCIe capabilities.
   */
-static int tune_pcie_caps(struct hfi1_devdata *);
+static void tune_pcie_caps(struct hfi1_devdata *);
  
  /*
   * Do all the common PCIe setup and initialization.
@@ -351,7 +351,7 @@ int pcie_speeds(struct hfi1_devdata *dd)
   */
  int request_msix(struct hfi1_devdata *dd, u32 msireq)
  {
-       int nvec, ret;
+       int nvec;
  
         nvec = pci_alloc_irq_vectors(dd->pcidev, 1, msireq,
                                      PCI_IRQ_MSIX | PCI_IRQ_LEGACY);
@@ -360,12 +360,7 @@ int request_msix(struct hfi1_devdata *dd, u32 msireq)
                 return nvec;
         }
  
-       ret = tune_pcie_caps(dd);
-       if (ret) {
-               dd_dev_err(dd, "tune_pcie_caps() failed: %d\n", ret);
-               pci_free_irq_vectors(dd->pcidev);
-               return ret;
-       }
+       tune_pcie_caps(dd);
  
         /* check for legacy IRQ */
         if (nvec == 1 && !dd->pcidev->msix_enabled)
@@ -502,7 +497,7 @@ uint aspm_mode = ASPM_MODE_DISABLED;
  module_param_named(aspm, aspm_mode, uint, S_IRUGO);
  MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic");
  
-static int tune_pcie_caps(struct hfi1_devdata *dd)
+static void tune_pcie_caps(struct hfi1_devdata *dd)
  {
         struct pci_dev *parent;
         u16 rc_mpss, rc_mps, ep_mpss, ep_mps;
@@ -513,22 +508,14 @@ static int tune_pcie_caps(struct hfi1_devdata *dd)
          * Turn on extended tags in DevCtl in case the BIOS has turned it off
          * to improve WFR SDMA bandwidth
          */
-       ret = pcie_capability_read_word(dd->pcidev,
-                                       PCI_EXP_DEVCTL, &ectl);
-       if (ret) {
-               dd_dev_err(dd, "Unable to read from PCI config\n");
-               return ret;
-       }
-
-       if (!(ectl & PCI_EXP_DEVCTL_EXT_TAG)) {
+       ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &ectl);
+       if ((!ret) && !(ectl & PCI_EXP_DEVCTL_EXT_TAG)) {
                 dd_dev_info(dd, "Enabling PCIe extended tags\n");
                 ectl |= PCI_EXP_DEVCTL_EXT_TAG;
                 ret = pcie_capability_write_word(dd->pcidev,
                                                  PCI_EXP_DEVCTL, ectl);
-               if (ret) {
-                       dd_dev_err(dd, "Unable to write to PCI config\n");
-                       return ret;
-               }
+               if (ret)
+                       dd_dev_info(dd, "Unable to write to PCI config\n");
         }
         /* Find out supported and configured values for parent (root) */
         parent = dd->pcidev->bus->self;
@@ -536,15 +523,22 @@ static int tune_pcie_caps(struct hfi1_devdata *dd)
          * The driver cannot perform the tuning if it does not have
          * access to the upstream component.
          */
-       if (!parent)
-               return -EINVAL;
+       if (!parent) {
+               dd_dev_info(dd, "Parent not found\n");
+               return;
+       }
         if (!pci_is_root_bus(parent->bus)) {
                 dd_dev_info(dd, "Parent not root\n");
-               return -EINVAL;
+               return;
+       }
+       if (!pci_is_pcie(parent)) {
+               dd_dev_info(dd, "Parent is not PCI Express capable\n");
+               return;
+       }
+       if (!pci_is_pcie(dd->pcidev)) {
+               dd_dev_info(dd, "PCI device is not PCI Express capable\n");
+               return;
         }
-
-       if (!pci_is_pcie(parent) || !pci_is_pcie(dd->pcidev))
-               return -EINVAL;
         rc_mpss = parent->pcie_mpss;
         rc_mps = ffs(pcie_get_mps(parent)) - 8;
         /* Find out supported and configured values for endpoint (us) */
@@ -590,8 +584,6 @@ static int tune_pcie_caps(struct hfi1_devdata *dd)
                 ep_mrrs = max_mrrs;
                 pcie_set_readrq(dd->pcidev, ep_mrrs);
         }
-
-       return 0;
  }
  
  /* End of PCIe capability tuning */
diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c

index a8af96d..d486355 100644 (file)
--- a/drivers/infiniband/hw/hfi1/platform.c
+++ b/drivers/infiniband/hw/hfi1/platform.c
@@ -790,7 +790,9 @@ static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset,
          * reuse of stale settings established in our previous pass through.
          */
         if (ppd->qsfp_info.reset_needed) {
-               reset_qsfp(ppd);
+               ret = reset_qsfp(ppd);
+               if (ret)
+                       return ret;
                 refresh_qsfp_cache(ppd, &ppd->qsfp_info);
         } else {
                 ppd->qsfp_info.reset_needed = 1;
diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h

index 9b15664..a65e4cb 100644 (file)
--- a/drivers/infiniband/hw/i40iw/i40iw.h
+++ b/drivers/infiniband/hw/i40iw/i40iw.h
@@ -201,7 +201,6 @@ enum init_completion_state {
         CEQ_CREATED,
         ILQ_CREATED,
         IEQ_CREATED,
-       INET_NOTIFIER,
         IP_ADDR_REGISTERED,
         RDMA_DEV_REGISTERED
  };
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c

index 14f36ba..5230dd3 100644 (file)
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -1504,23 +1504,40 @@ static void i40iw_add_hte_node(struct i40iw_cm_core *cm_core,
  }
  
  /**
- * listen_port_in_use - determine if port is in use
- * @port: Listen port number
+ * i40iw_port_in_use - determine if port is in use
+ * @port: port number
+ * @active_side: flag for listener side vs active side
   */
-static bool i40iw_listen_port_in_use(struct i40iw_cm_core *cm_core, u16 port)
+static bool i40iw_port_in_use(struct i40iw_cm_core *cm_core, u16 port, bool active_side)
  {
         struct i40iw_cm_listener *listen_node;
+       struct i40iw_cm_node *cm_node;
         unsigned long flags;
         bool ret = false;
  
-       spin_lock_irqsave(&cm_core->listen_list_lock, flags);
-       list_for_each_entry(listen_node, &cm_core->listen_nodes, list) {
-               if (listen_node->loc_port == port) {
-                       ret = true;
-                       break;
+       if (active_side) {
+               /* search connected node list */
+               spin_lock_irqsave(&cm_core->ht_lock, flags);
+               list_for_each_entry(cm_node, &cm_core->connected_nodes, list) {
+                       if (cm_node->loc_port == port) {
+                               ret = true;
+                               break;
+                       }
+               }
+                       if (!ret)
+                               clear_bit(port, cm_core->active_side_ports);
+               spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+       } else {
+               spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+               list_for_each_entry(listen_node, &cm_core->listen_nodes, list) {
+                       if (listen_node->loc_port == port) {
+                               ret = true;
+                               break;
+                       }
                 }
+               spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
         }
-       spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+
         return ret;
  }
  
@@ -1868,7 +1885,7 @@ static int i40iw_dec_refcnt_listen(struct i40iw_cm_core *cm_core,
                 spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
  
                 if (listener->iwdev) {
-                       if (apbvt_del && !i40iw_listen_port_in_use(cm_core, listener->loc_port))
+                       if (apbvt_del && !i40iw_port_in_use(cm_core, listener->loc_port, false))
                                 i40iw_manage_apbvt(listener->iwdev,
                                                    listener->loc_port,
                                                    I40IW_MANAGE_APBVT_DEL);
@@ -2247,21 +2264,21 @@ static void i40iw_rem_ref_cm_node(struct i40iw_cm_node *cm_node)
         if (cm_node->listener) {
                 i40iw_dec_refcnt_listen(cm_core, cm_node->listener, 0, true);
         } else {
-               if (!i40iw_listen_port_in_use(cm_core, cm_node->loc_port) &&
-                   cm_node->apbvt_set) {
+               if (!i40iw_port_in_use(cm_core, cm_node->loc_port, true) && cm_node->apbvt_set) {
                         i40iw_manage_apbvt(cm_node->iwdev,
                                            cm_node->loc_port,
                                            I40IW_MANAGE_APBVT_DEL);
-                       i40iw_get_addr_info(cm_node, &nfo);
-                       if (cm_node->qhash_set) {
-                               i40iw_manage_qhash(cm_node->iwdev,
-                                                  &nfo,
-                                                  I40IW_QHASH_TYPE_TCP_ESTABLISHED,
-                                                  I40IW_QHASH_MANAGE_TYPE_DELETE,
-                                                  NULL,
-                                                  false);
-                               cm_node->qhash_set = 0;
-                       }
+                       cm_node->apbvt_set = 0;
+               }
+               i40iw_get_addr_info(cm_node, &nfo);
+               if (cm_node->qhash_set) {
+                       i40iw_manage_qhash(cm_node->iwdev,
+                                          &nfo,
+                                          I40IW_QHASH_TYPE_TCP_ESTABLISHED,
+                                          I40IW_QHASH_MANAGE_TYPE_DELETE,
+                                          NULL,
+                                          false);
+                       cm_node->qhash_set = 0;
                 }
         }
  
@@ -3255,7 +3272,8 @@ static void i40iw_init_tcp_ctx(struct i40iw_cm_node *cm_node,
         tcp_info->snd_mss = cpu_to_le32(((u32)cm_node->tcp_cntxt.mss));
         if (cm_node->vlan_id < VLAN_TAG_PRESENT) {
                 tcp_info->insert_vlan_tag = true;
-               tcp_info->vlan_tag = cpu_to_le16(cm_node->vlan_id);
+               tcp_info->vlan_tag = cpu_to_le16(((u16)cm_node->user_pri << I40IW_VLAN_PRIO_SHIFT) |
+                                                 cm_node->vlan_id);
         }
         if (cm_node->ipv4) {
                 tcp_info->src_port = cpu_to_le16(cm_node->loc_port);
@@ -3737,10 +3755,8 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
         struct sockaddr_in *raddr;
         struct sockaddr_in6 *laddr6;
         struct sockaddr_in6 *raddr6;
-       bool qhash_set = false;
-       int apbvt_set = 0;
-       int err = 0;
-       enum i40iw_status_code status;
+       int ret = 0;
+       unsigned long flags;
  
         ibqp = i40iw_get_qp(cm_id->device, conn_param->qpn);
         if (!ibqp)
@@ -3789,32 +3805,6 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
         cm_info.user_pri = rt_tos2priority(cm_id->tos);
         i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_DCB, "%s TOS:[%d] UP:[%d]\n",
                     __func__, cm_id->tos, cm_info.user_pri);
-       if ((cm_info.ipv4 && (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr)) ||
-           (!cm_info.ipv4 && memcmp(laddr6->sin6_addr.in6_u.u6_addr32,
-                                    raddr6->sin6_addr.in6_u.u6_addr32,
-                                    sizeof(laddr6->sin6_addr.in6_u.u6_addr32)))) {
-               status = i40iw_manage_qhash(iwdev,
-                                           &cm_info,
-                                           I40IW_QHASH_TYPE_TCP_ESTABLISHED,
-                                           I40IW_QHASH_MANAGE_TYPE_ADD,
-                                           NULL,
-                                           true);
-               if (status)
-                       return -EINVAL;
-               qhash_set = true;
-       }
-       status = i40iw_manage_apbvt(iwdev, cm_info.loc_port, I40IW_MANAGE_APBVT_ADD);
-       if (status) {
-               i40iw_manage_qhash(iwdev,
-                                  &cm_info,
-                                  I40IW_QHASH_TYPE_TCP_ESTABLISHED,
-                                  I40IW_QHASH_MANAGE_TYPE_DELETE,
-                                  NULL,
-                                  false);
-               return -EINVAL;
-       }
-
-       apbvt_set = 1;
         cm_id->add_ref(cm_id);
         cm_node = i40iw_create_cm_node(&iwdev->cm_core, iwdev,
                                        conn_param->private_data_len,
@@ -3822,17 +3812,40 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
                                        &cm_info);
  
         if (IS_ERR(cm_node)) {
-               err = PTR_ERR(cm_node);
-               goto err_out;
+               ret = PTR_ERR(cm_node);
+               cm_id->rem_ref(cm_id);
+               return ret;
+       }
+
+       if ((cm_info.ipv4 && (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr)) ||
+           (!cm_info.ipv4 && memcmp(laddr6->sin6_addr.in6_u.u6_addr32,
+                                    raddr6->sin6_addr.in6_u.u6_addr32,
+                                    sizeof(laddr6->sin6_addr.in6_u.u6_addr32)))) {
+               if (i40iw_manage_qhash(iwdev, &cm_info, I40IW_QHASH_TYPE_TCP_ESTABLISHED,
+                                      I40IW_QHASH_MANAGE_TYPE_ADD, NULL, true)) {
+                       ret = -EINVAL;
+                       goto err;
+               }
+               cm_node->qhash_set = true;
         }
  
+       spin_lock_irqsave(&iwdev->cm_core.ht_lock, flags);
+       if (!test_and_set_bit(cm_info.loc_port, iwdev->cm_core.active_side_ports)) {
+               spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags);
+               if (i40iw_manage_apbvt(iwdev, cm_info.loc_port, I40IW_MANAGE_APBVT_ADD)) {
+                       ret =  -EINVAL;
+                       goto err;
+               }
+       } else {
+               spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags);
+       }
+
+       cm_node->apbvt_set = true;
         i40iw_record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord);
         if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO &&
             !cm_node->ord_size)
                 cm_node->ord_size = 1;
  
-       cm_node->apbvt_set = apbvt_set;
-       cm_node->qhash_set = qhash_set;
         iwqp->cm_node = cm_node;
         cm_node->iwqp = iwqp;
         iwqp->cm_id = cm_id;
@@ -3840,11 +3853,9 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
  
         if (cm_node->state != I40IW_CM_STATE_OFFLOADED) {
                 cm_node->state = I40IW_CM_STATE_SYN_SENT;
-               err = i40iw_send_syn(cm_node, 0);
-               if (err) {
-                       i40iw_rem_ref_cm_node(cm_node);
-                       goto err_out;
-               }
+               ret = i40iw_send_syn(cm_node, 0);
+               if (ret)
+                       goto err;
         }
  
         i40iw_debug(cm_node->dev,
@@ -3853,9 +3864,10 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
                     cm_node->rem_port,
                     cm_node,
                     cm_node->cm_id);
+
         return 0;
  
-err_out:
+err:
         if (cm_info.ipv4)
                 i40iw_debug(&iwdev->sc_dev,
                             I40IW_DEBUG_CM,
@@ -3867,22 +3879,10 @@ err_out:
                             "Api - connect() FAILED: dest addr=%pI6",
                             cm_info.rem_addr);
  
-       if (qhash_set)
-               i40iw_manage_qhash(iwdev,
-                                  &cm_info,
-                                  I40IW_QHASH_TYPE_TCP_ESTABLISHED,
-                                  I40IW_QHASH_MANAGE_TYPE_DELETE,
-                                  NULL,
-                                  false);
-
-       if (apbvt_set && !i40iw_listen_port_in_use(&iwdev->cm_core,
-                                                  cm_info.loc_port))
-               i40iw_manage_apbvt(iwdev,
-                                  cm_info.loc_port,
-                                  I40IW_MANAGE_APBVT_DEL);
+       i40iw_rem_ref_cm_node(cm_node);
         cm_id->rem_ref(cm_id);
         iwdev->cm_core.stats_connect_errs++;
-       return err;
+       return ret;
  }
  
  /**
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.h b/drivers/infiniband/hw/i40iw/i40iw_cm.h

index 2e52e38..45abef7 100644 (file)
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.h
@@ -71,6 +71,9 @@
  #define        I40IW_HW_IRD_SETTING_32 32
  #define        I40IW_HW_IRD_SETTING_64 64
  
+#define MAX_PORTS              65536
+#define I40IW_VLAN_PRIO_SHIFT   13
+
  enum ietf_mpa_flags {
         IETF_MPA_FLAGS_MARKERS = 0x80,  /* receive Markers */
         IETF_MPA_FLAGS_CRC = 0x40,      /* receive Markers */
@@ -411,6 +414,8 @@ struct i40iw_cm_core {
         spinlock_t ht_lock; /* manage hash table */
         spinlock_t listen_list_lock; /* listen list */
  
+       unsigned long active_side_ports[BITS_TO_LONGS(MAX_PORTS)];
+
         u64     stats_nodes_created;
         u64     stats_nodes_destroyed;
         u64     stats_listen_created;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c

index cc742c3..27590ae 100644 (file)
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -99,8 +99,6 @@ static struct notifier_block i40iw_net_notifier = {
         .notifier_call = i40iw_net_event
  };
  
-static atomic_t i40iw_notifiers_registered;
-
  /**
   * i40iw_find_i40e_handler - find a handler given a client info
   * @ldev: pointer to a client info
@@ -1376,11 +1374,20 @@ error:
   */
  static void i40iw_register_notifiers(void)
  {
-       if (atomic_inc_return(&i40iw_notifiers_registered) == 1) {
-               register_inetaddr_notifier(&i40iw_inetaddr_notifier);
-               register_inet6addr_notifier(&i40iw_inetaddr6_notifier);
-               register_netevent_notifier(&i40iw_net_notifier);
-       }
+       register_inetaddr_notifier(&i40iw_inetaddr_notifier);
+       register_inet6addr_notifier(&i40iw_inetaddr6_notifier);
+       register_netevent_notifier(&i40iw_net_notifier);
+}
+
+/**
+ * i40iw_unregister_notifiers - unregister tcp ip notifiers
+ */
+
+static void i40iw_unregister_notifiers(void)
+{
+       unregister_netevent_notifier(&i40iw_net_notifier);
+       unregister_inetaddr_notifier(&i40iw_inetaddr_notifier);
+       unregister_inet6addr_notifier(&i40iw_inetaddr6_notifier);
  }
  
  /**
@@ -1400,6 +1407,11 @@ static enum i40iw_status_code i40iw_save_msix_info(struct i40iw_device *iwdev,
         u32 i;
         u32 size;
  
+       if (!ldev->msix_count) {
+               i40iw_pr_err("No MSI-X vectors\n");
+               return I40IW_ERR_CONFIG;
+       }
+
         iwdev->msix_count = ldev->msix_count;
  
         size = sizeof(struct i40iw_msix_vector) * iwdev->msix_count;
@@ -1462,12 +1474,6 @@ static void i40iw_deinit_device(struct i40iw_device *iwdev)
                 if (!iwdev->reset)
                         i40iw_del_macip_entry(iwdev, (u8)iwdev->mac_ip_table_idx);
                 /* fallthrough */
-       case INET_NOTIFIER:
-               if (!atomic_dec_return(&i40iw_notifiers_registered)) {
-                       unregister_netevent_notifier(&i40iw_net_notifier);
-                       unregister_inetaddr_notifier(&i40iw_inetaddr_notifier);
-                       unregister_inet6addr_notifier(&i40iw_inetaddr6_notifier);
-               }
                 /* fallthrough */
         case PBLE_CHUNK_MEM:
                 i40iw_destroy_pble_pool(dev, iwdev->pble_rsrc);
@@ -1550,7 +1556,7 @@ static enum i40iw_status_code i40iw_setup_init_state(struct i40iw_handler *hdl,
  
         status = i40iw_save_msix_info(iwdev, ldev);
         if (status)
-               goto exit;
+               return status;
         iwdev->hw.dev_context = (void *)ldev->pcidev;
         iwdev->hw.hw_addr = ldev->hw_addr;
         status = i40iw_allocate_dma_mem(&iwdev->hw,
@@ -1667,8 +1673,6 @@ static int i40iw_open(struct i40e_info *ldev, struct i40e_client *client)
                         break;
                 iwdev->init_state = PBLE_CHUNK_MEM;
                 iwdev->virtchnl_wq = alloc_ordered_workqueue("iwvch", WQ_MEM_RECLAIM);
-               i40iw_register_notifiers();
-               iwdev->init_state = INET_NOTIFIER;
                 status = i40iw_add_mac_ip(iwdev);
                 if (status)
                         break;
@@ -2018,6 +2022,8 @@ static int __init i40iw_init_module(void)
         i40iw_client.type = I40E_CLIENT_IWARP;
         spin_lock_init(&i40iw_handler_lock);
         ret = i40e_register_client(&i40iw_client);
+       i40iw_register_notifiers();
+
         return ret;
  }
  
@@ -2029,6 +2035,7 @@ static int __init i40iw_init_module(void)
   */
  static void __exit i40iw_exit_module(void)
  {
+       i40iw_unregister_notifiers();
         i40e_unregister_client(&i40iw_client);
  }
  
diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c

index 62f1f45..e52dbbb 100644 (file)
--- a/drivers/infiniband/hw/i40iw/i40iw_utils.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c
@@ -160,7 +160,7 @@ int i40iw_inetaddr_event(struct notifier_block *notifier,
                 return NOTIFY_DONE;
  
         iwdev = &hdl->device;
-       if (iwdev->init_state < INET_NOTIFIER)
+       if (iwdev->init_state < IP_ADDR_REGISTERED || iwdev->closing)
                 return NOTIFY_DONE;
  
         netdev = iwdev->ldev->netdev;
@@ -217,7 +217,7 @@ int i40iw_inet6addr_event(struct notifier_block *notifier,
                 return NOTIFY_DONE;
  
         iwdev = &hdl->device;
-       if (iwdev->init_state < INET_NOTIFIER)
+       if (iwdev->init_state < IP_ADDR_REGISTERED || iwdev->closing)
                 return NOTIFY_DONE;
  
         netdev = iwdev->ldev->netdev;
@@ -266,7 +266,7 @@ int i40iw_net_event(struct notifier_block *notifier, unsigned long event, void *
                 if (!iwhdl)
                         return NOTIFY_DONE;
                 iwdev = &iwhdl->device;
-               if (iwdev->init_state < INET_NOTIFIER)
+               if (iwdev->init_state < IP_ADDR_REGISTERED || iwdev->closing)
                         return NOTIFY_DONE;
                 p = (__be32 *)neigh->primary_key;
                 i40iw_copy_ip_ntohl(local_ipaddr, p);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c

index 1aa4110..28b3d02 100644 (file)
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -1027,7 +1027,19 @@ int i40iw_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
                                 iwqp->hw_tcp_state = I40IW_TCP_STATE_CLOSED;
                                 iwqp->last_aeq = I40IW_AE_RESET_SENT;
                                 spin_unlock_irqrestore(&iwqp->lock, flags);
+                               i40iw_cm_disconn(iwqp);
                         }
+               } else {
+                       spin_lock_irqsave(&iwqp->lock, flags);
+                       if (iwqp->cm_id) {
+                               if (atomic_inc_return(&iwqp->close_timer_started) == 1) {
+                                       iwqp->cm_id->add_ref(iwqp->cm_id);
+                                       i40iw_schedule_cm_timer(iwqp->cm_node,
+                                                               (struct i40iw_puda_buf *)iwqp,
+                                                                I40IW_TIMER_TYPE_CLOSE, 1, 0);
+                               }
+                       }
+                       spin_unlock_irqrestore(&iwqp->lock, flags);
                 }
         }
         return 0;
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c

index ab3c562..d6fbad8 100644 (file)
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -778,13 +778,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
         }
  
         if (MLX5_CAP_GEN(mdev, tag_matching)) {
-               props->xrq_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE;
-               props->xrq_caps.max_num_tags =
+               props->tm_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE;
+               props->tm_caps.max_num_tags =
                         (1 << MLX5_CAP_GEN(mdev, log_tag_matching_list_sz)) - 1;
-               props->xrq_caps.flags = IB_TM_CAP_RC;
-               props->xrq_caps.max_ops =
+               props->tm_caps.flags = IB_TM_CAP_RC;
+               props->tm_caps.max_ops =
                         1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
-               props->xrq_caps.max_sge = MLX5_TM_MAX_SGE;
+               props->tm_caps.max_sge = MLX5_TM_MAX_SGE;
         }
  
         if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) {
@@ -3837,11 +3837,13 @@ static int delay_drop_debugfs_init(struct mlx5_ib_dev *dev)
         if (!dbg)
                 return -ENOMEM;
  
+       dev->delay_drop.dbg = dbg;
+
         dbg->dir_debugfs =
                 debugfs_create_dir("delay_drop",
                                    dev->mdev->priv.dbg_root);
         if (!dbg->dir_debugfs)
-               return -ENOMEM;
+               goto out_debugfs;
  
         dbg->events_cnt_debugfs =
                 debugfs_create_atomic_t("num_timeout_events", 0400,
@@ -3865,8 +3867,6 @@ static int delay_drop_debugfs_init(struct mlx5_ib_dev *dev)
         if (!dbg->timeout_debugfs)
                 goto out_debugfs;
  
-       dev->delay_drop.dbg = dbg;
-
         return 0;
  
  out_debugfs:
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c

index 914f212..f3dbd75 100644 (file)
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -50,13 +50,9 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
  {
         unsigned long tmp;
         unsigned long m;
-       int i, k;
-       u64 base = 0;
-       int p = 0;
-       int skip;
-       int mask;
-       u64 len;
-       u64 pfn;
+       u64 base = ~0, p = 0;
+       u64 len, pfn;
+       int i = 0;
         struct scatterlist *sg;
         int entry;
         unsigned long page_shift = umem->page_shift;
@@ -76,33 +72,24 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
         m = find_first_bit(&tmp, BITS_PER_LONG);
         if (max_page_shift)
                 m = min_t(unsigned long, max_page_shift - page_shift, m);
-       skip = 1 << m;
-       mask = skip - 1;
-       i = 0;
+
         for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
                 len = sg_dma_len(sg) >> page_shift;
                 pfn = sg_dma_address(sg) >> page_shift;
-               for (k = 0; k < len; k++) {
-                       if (!(i & mask)) {
-                               tmp = (unsigned long)pfn;
-                               m = min_t(unsigned long, m, find_first_bit(&tmp, BITS_PER_LONG));
-                               skip = 1 << m;
-                               mask = skip - 1;
-                               base = pfn;
-                               p = 0;
-                       } else {
-                               if (base + p != pfn) {
-                                       tmp = (unsigned long)p;
-                                       m = find_first_bit(&tmp, BITS_PER_LONG);
-                                       skip = 1 << m;
-                                       mask = skip - 1;
-                                       base = pfn;
-                                       p = 0;
-                               }
-                       }
-                       p++;
-                       i++;
+               if (base + p != pfn) {
+                       /* If either the offset or the new
+                        * base are unaligned update m
+                        */
+                       tmp = (unsigned long)(pfn | p);
+                       if (!IS_ALIGNED(tmp, 1 << m))
+                               m = find_first_bit(&tmp, BITS_PER_LONG);
+
+                       base = pfn;
+                       p = 0;
                 }
+
+               p += len;
+               i += len;
         }
  
         if (i) {
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c

index 0e2789d..37bbc54 100644 (file)
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -47,7 +47,8 @@ enum {
  
  #define MLX5_UMR_ALIGN 2048
  
-static int clean_mr(struct mlx5_ib_mr *mr);
+static int clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
+static int dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
  static int mr_cache_max_order(struct mlx5_ib_dev *dev);
  static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
  
@@ -1270,8 +1271,9 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
  
                 err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift,
                                          update_xlt_flags);
+
                 if (err) {
-                       mlx5_ib_dereg_mr(&mr->ibmr);
+                       dereg_mr(dev, mr);
                         return ERR_PTR(err);
                 }
         }
@@ -1356,7 +1358,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
                 err = mr_umem_get(pd, addr, len, access_flags, &mr->umem,
                                   &npages, &page_shift, &ncont, &order);
                 if (err < 0) {
-                       clean_mr(mr);
+                       clean_mr(dev, mr);
                         return err;
                 }
         }
@@ -1410,7 +1412,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
                 if (err) {
                         mlx5_ib_warn(dev, "Failed to rereg UMR\n");
                         ib_umem_release(mr->umem);
-                       clean_mr(mr);
+                       clean_mr(dev, mr);
                         return err;
                 }
         }
@@ -1469,9 +1471,8 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
         }
  }
  
-static int clean_mr(struct mlx5_ib_mr *mr)
+static int clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
  {
-       struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
         int allocated_from_cache = mr->allocated_from_cache;
         int err;
  
@@ -1507,10 +1508,8 @@ static int clean_mr(struct mlx5_ib_mr *mr)
         return 0;
  }
  
-int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
+static int dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
  {
-       struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
-       struct mlx5_ib_mr *mr = to_mmr(ibmr);
         int npages = mr->npages;
         struct ib_umem *umem = mr->umem;
  
@@ -1539,7 +1538,7 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
         }
  #endif
  
-       clean_mr(mr);
+       clean_mr(dev, mr);
  
         if (umem) {
                 ib_umem_release(umem);
@@ -1549,6 +1548,14 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
         return 0;
  }
  
+int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
+{
+       struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
+       struct mlx5_ib_mr *mr = to_mmr(ibmr);
+
+       return dereg_mr(dev, mr);
+}
+
  struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
                                enum ib_mr_type mr_type,
                                u32 max_num_sg)
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c

index f0dc5f4..442b9bd 100644 (file)
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -3232,7 +3232,7 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
                                             mr->ibmr.iova);
                         set_wqe_32bit_value(wqe->wqe_words,
                                             NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX,
-                                           mr->ibmr.length);
+                                           lower_32_bits(mr->ibmr.length));
                         set_wqe_32bit_value(wqe->wqe_words,
                                             NES_IWARP_SQ_FMR_WQE_LENGTH_HIGH_IDX, 0);
                         set_wqe_32bit_value(wqe->wqe_words,
@@ -3274,7 +3274,7 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
                                             mr->npages * 8);
  
                         nes_debug(NES_DBG_IW_TX, "SQ_REG_MR: iova_start: %llx, "
-                                 "length: %d, rkey: %0x, pgl_paddr: %llx, "
+                                 "length: %lld, rkey: %0x, pgl_paddr: %llx, "
                                   "page_list_len: %u, wqe_misc: %x\n",
                                   (unsigned long long) mr->ibmr.iova,
                                   mr->ibmr.length,
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c

index dcb5942..65b166c 100644 (file)
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -252,7 +252,10 @@ static int ocrdma_get_mbx_errno(u32 status)
                 case OCRDMA_MBX_ADDI_STATUS_INSUFFICIENT_RESOURCES:
                         err_num = -EAGAIN;
                         break;
+               default:
+                       err_num = -EFAULT;
                 }
+               break;
         default:
                 err_num = -EFAULT;
         }
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h

index 663a0c3..984aa34 100644 (file)
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
@@ -416,9 +416,34 @@ static inline enum ib_wc_status pvrdma_wc_status_to_ib(
         return (enum ib_wc_status)status;
  }
  
-static inline int pvrdma_wc_opcode_to_ib(int opcode)
-{
-       return opcode;
+static inline int pvrdma_wc_opcode_to_ib(unsigned int opcode)
+{
+       switch (opcode) {
+       case PVRDMA_WC_SEND:
+               return IB_WC_SEND;
+       case PVRDMA_WC_RDMA_WRITE:
+               return IB_WC_RDMA_WRITE;
+       case PVRDMA_WC_RDMA_READ:
+               return IB_WC_RDMA_READ;
+       case PVRDMA_WC_COMP_SWAP:
+               return IB_WC_COMP_SWAP;
+       case PVRDMA_WC_FETCH_ADD:
+               return IB_WC_FETCH_ADD;
+       case PVRDMA_WC_LOCAL_INV:
+               return IB_WC_LOCAL_INV;
+       case PVRDMA_WC_FAST_REG_MR:
+               return IB_WC_REG_MR;
+       case PVRDMA_WC_MASKED_COMP_SWAP:
+               return IB_WC_MASKED_COMP_SWAP;
+       case PVRDMA_WC_MASKED_FETCH_ADD:
+               return IB_WC_MASKED_FETCH_ADD;
+       case PVRDMA_WC_RECV:
+               return IB_WC_RECV;
+       case PVRDMA_WC_RECV_RDMA_WITH_IMM:
+               return IB_WC_RECV_RDMA_WITH_IMM;
+       default:
+               return IB_WC_SEND;
+       }
  }
  
  static inline int pvrdma_wc_flags_to_ib(int flags)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c

index 14b62f7..7774654 100644 (file)
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -823,12 +823,18 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
             wc->status != IB_WC_WR_FLUSH_ERR) {
                 struct ipoib_neigh *neigh;
  
-               if (wc->status != IB_WC_RNR_RETRY_EXC_ERR)
-                       ipoib_warn(priv, "failed cm send event (status=%d, wrid=%d vend_err %x)\n",
-                                  wc->status, wr_id, wc->vendor_err);
+               /* IB_WC[_RNR]_RETRY_EXC_ERR error is part of the life cycle,
+                * so don't make waves.
+                */
+               if (wc->status == IB_WC_RNR_RETRY_EXC_ERR ||
+                   wc->status == IB_WC_RETRY_EXC_ERR)
+                       ipoib_dbg(priv,
+                                 "%s: failed cm send event (status=%d, wrid=%d vend_err 0x%x)\n",
+                                  __func__, wc->status, wr_id, wc->vendor_err);
                 else
-                       ipoib_dbg(priv, "failed cm send event (status=%d, wrid=%d vend_err %x)\n",
-                                 wc->status, wr_id, wc->vendor_err);
+                       ipoib_warn(priv,
+                                   "%s: failed cm send event (status=%d, wrid=%d vend_err 0x%x)\n",
+                                  __func__, wc->status, wr_id, wc->vendor_err);
  
                 spin_lock_irqsave(&priv->lock, flags);
                 neigh = tx->neigh;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c

index 2e07537..6cd6163 100644 (file)
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -1000,19 +1000,6 @@ static inline int update_parent_pkey(struct ipoib_dev_priv *priv)
                  */
                 priv->dev->broadcast[8] = priv->pkey >> 8;
                 priv->dev->broadcast[9] = priv->pkey & 0xff;
-
-               /*
-                * Update the broadcast address in the priv->broadcast object,
-                * in case it already exists, otherwise no one will do that.
-                */
-               if (priv->broadcast) {
-                       spin_lock_irq(&priv->lock);
-                       memcpy(priv->broadcast->mcmember.mgid.raw,
-                              priv->dev->broadcast + 4,
-                       sizeof(union ib_gid));
-                       spin_unlock_irq(&priv->lock);
-               }
-
                 return 0;
         }
  
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c

index bac95b5..dcc7701 100644 (file)
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -2180,6 +2180,7 @@ static struct net_device *ipoib_add_port(const char *format,
  {
         struct ipoib_dev_priv *priv;
         struct ib_port_attr attr;
+       struct rdma_netdev *rn;
         int result = -ENOMEM;
  
         priv = ipoib_intf_alloc(hca, port, format);
@@ -2279,7 +2280,8 @@ register_failed:
         ipoib_dev_cleanup(priv->dev);
  
  device_init_failed:
-       free_netdev(priv->dev);
+       rn = netdev_priv(priv->dev);
+       rn->free_rdma_netdev(priv->dev);
         kfree(priv);
  
  alloc_mem_failed:
@@ -2328,7 +2330,7 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
                 return;
  
         list_for_each_entry_safe(priv, tmp, dev_list, list) {
-               struct rdma_netdev *rn = netdev_priv(priv->dev);
+               struct rdma_netdev *parent_rn = netdev_priv(priv->dev);
  
                 ib_unregister_event_handler(&priv->event_handler);
                 flush_workqueue(ipoib_workqueue);
@@ -2350,10 +2352,15 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
                 unregister_netdev(priv->dev);
                 mutex_unlock(&priv->sysfs_mutex);
  
-               rn->free_rdma_netdev(priv->dev);
+               parent_rn->free_rdma_netdev(priv->dev);
+
+               list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
+                       struct rdma_netdev *child_rn;
  
-               list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list)
+                       child_rn = netdev_priv(cpriv->dev);
+                       child_rn->free_rdma_netdev(cpriv->dev);
                         kfree(cpriv);
+               }
  
                 kfree(priv);
         }
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c

index 9927cd6..55a9b71 100644 (file)
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -141,14 +141,17 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
                 return restart_syscall();
         }
  
-       priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name);
-       if (!priv) {
+       if (!down_write_trylock(&ppriv->vlan_rwsem)) {
                 rtnl_unlock();
                 mutex_unlock(&ppriv->sysfs_mutex);
-               return -ENOMEM;
+               return restart_syscall();
         }
  
-       down_write(&ppriv->vlan_rwsem);
+       priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name);
+       if (!priv) {
+               result = -ENOMEM;
+               goto out;
+       }
  
         /*
          * First ensure this isn't a duplicate. We check the parent device and
@@ -175,8 +178,11 @@ out:
         rtnl_unlock();
         mutex_unlock(&ppriv->sysfs_mutex);
  
-       if (result) {
-               free_netdev(priv->dev);
+       if (result && priv) {
+               struct rdma_netdev *rn;
+
+               rn = netdev_priv(priv->dev);
+               rn->free_rdma_netdev(priv->dev);
                 kfree(priv);
         }
  
@@ -204,7 +210,12 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
                 return restart_syscall();
         }
  
-       down_write(&ppriv->vlan_rwsem);
+       if (!down_write_trylock(&ppriv->vlan_rwsem)) {
+               rtnl_unlock();
+               mutex_unlock(&ppriv->sysfs_mutex);
+               return restart_syscall();
+       }
+
         list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) {
                 if (priv->pkey == pkey &&
                     priv->child_type == IPOIB_LEGACY_CHILD) {
@@ -224,7 +235,10 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
         mutex_unlock(&ppriv->sysfs_mutex);
  
         if (dev) {
-               free_netdev(dev);
+               struct rdma_netdev *rn;
+
+               rn = netdev_priv(dev);
+               rn->free_rdma_netdev(priv->dev);
                 kfree(priv);
                 return 0;
         }
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c

index 9c3e9ab..322209d 100644 (file)
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -154,7 +154,7 @@ static void iser_dump_page_vec(struct iser_page_vec *page_vec)
  {
         int i;
  
-       iser_err("page vec npages %d data length %d\n",
+       iser_err("page vec npages %d data length %lld\n",
                  page_vec->npages, page_vec->fake_mr.length);
         for (i = 0; i < page_vec->npages; i++)
                 iser_err("vec[%d]: %llx\n", i, page_vec->pages[i]);
diff --git a/drivers/input/ff-core.c b/drivers/input/ff-core.c

index 8f20424..66a46c8 100644 (file)
--- a/drivers/input/ff-core.c
+++ b/drivers/input/ff-core.c
@@ -237,9 +237,15 @@ int input_ff_erase(struct input_dev *dev, int effect_id, struct file *file)
  EXPORT_SYMBOL_GPL(input_ff_erase);
  
  /*
- * flush_effects - erase all effects owned by a file handle
+ * input_ff_flush - erase all effects owned by a file handle
+ * @dev: input device to erase effect from
+ * @file: purported owner of the effects
+ *
+ * This function erases all force-feedback effects associated with
+ * the given owner from specified device. Note that @file may be %NULL,
+ * in which case all effects will be erased.
   */
-static int flush_effects(struct input_dev *dev, struct file *file)
+int input_ff_flush(struct input_dev *dev, struct file *file)
  {
         struct ff_device *ff = dev->ff;
         int i;
@@ -255,6 +261,7 @@ static int flush_effects(struct input_dev *dev, struct file *file)
  
         return 0;
  }
+EXPORT_SYMBOL_GPL(input_ff_flush);
  
  /**
   * input_ff_event() - generic handler for force-feedback events
@@ -343,7 +350,7 @@ int input_ff_create(struct input_dev *dev, unsigned int max_effects)
         mutex_init(&ff->mutex);
  
         dev->ff = ff;
-       dev->flush = flush_effects;
+       dev->flush = input_ff_flush;
         dev->event = input_ff_event;
         __set_bit(EV_FF, dev->evbit);
  
diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c

index 022be0e..443151d 100644 (file)
--- a/drivers/input/misc/uinput.c
+++ b/drivers/input/misc/uinput.c
@@ -98,14 +98,15 @@ static int uinput_request_reserve_slot(struct uinput_device *udev,
                                         uinput_request_alloc_id(udev, request));
  }
  
-static void uinput_request_done(struct uinput_device *udev,
-                               struct uinput_request *request)
+static void uinput_request_release_slot(struct uinput_device *udev,
+                                       unsigned int id)
  {
         /* Mark slot as available */
-       udev->requests[request->id] = NULL;
-       wake_up(&udev->requests_waitq);
+       spin_lock(&udev->requests_lock);
+       udev->requests[id] = NULL;
+       spin_unlock(&udev->requests_lock);
  
-       complete(&request->done);
+       wake_up(&udev->requests_waitq);
  }
  
  static int uinput_request_send(struct uinput_device *udev,
@@ -138,20 +139,22 @@ static int uinput_request_send(struct uinput_device *udev,
  static int uinput_request_submit(struct uinput_device *udev,
                                  struct uinput_request *request)
  {
-       int error;
+       int retval;
  
-       error = uinput_request_reserve_slot(udev, request);
-       if (error)
-               return error;
+       retval = uinput_request_reserve_slot(udev, request);
+       if (retval)
+               return retval;
  
-       error = uinput_request_send(udev, request);
-       if (error) {
-               uinput_request_done(udev, request);
-               return error;
-       }
+       retval = uinput_request_send(udev, request);
+       if (retval)
+               goto out;
  
         wait_for_completion(&request->done);
-       return request->retval;
+       retval = request->retval;
+
+ out:
+       uinput_request_release_slot(udev, request->id);
+       return retval;
  }
  
  /*
@@ -169,7 +172,7 @@ static void uinput_flush_requests(struct uinput_device *udev)
                 request = udev->requests[i];
                 if (request) {
                         request->retval = -ENODEV;
-                       uinput_request_done(udev, request);
+                       complete(&request->done);
                 }
         }
  
@@ -230,6 +233,18 @@ static int uinput_dev_erase_effect(struct input_dev *dev, int effect_id)
         return uinput_request_submit(udev, &request);
  }
  
+static int uinput_dev_flush(struct input_dev *dev, struct file *file)
+{
+       /*
+        * If we are called with file == NULL that means we are tearing
+        * down the device, and therefore we can not handle FF erase
+        * requests: either we are handling UI_DEV_DESTROY (and holding
+        * the udev->mutex), or the file descriptor is closed and there is
+        * nobody on the other side anymore.
+        */
+       return file ? input_ff_flush(dev, file) : 0;
+}
+
  static void uinput_destroy_device(struct uinput_device *udev)
  {
         const char *name, *phys;
@@ -297,6 +312,12 @@ static int uinput_create_device(struct uinput_device *udev)
                 dev->ff->playback = uinput_dev_playback;
                 dev->ff->set_gain = uinput_dev_set_gain;
                 dev->ff->set_autocenter = uinput_dev_set_autocenter;
+               /*
+                * The standard input_ff_flush() implementation does
+                * not quite work for uinput as we can't reasonably
+                * handle FF requests during device teardown.
+                */
+               dev->flush = uinput_dev_flush;
         }
  
         error = input_register_device(udev->dev);
@@ -939,7 +960,7 @@ static long uinput_ioctl_handler(struct file *file, unsigned int cmd,
                         }
  
                         req->retval = ff_up.retval;
-                       uinput_request_done(udev, req);
+                       complete(&req->done);
                         goto out;
  
                 case UI_END_FF_ERASE:
@@ -955,7 +976,7 @@ static long uinput_ioctl_handler(struct file *file, unsigned int cmd,
                         }
  
                         req->retval = ff_erase.retval;
-                       uinput_request_done(udev, req);
+                       complete(&req->done);
                         goto out;
         }
  
diff --git a/drivers/input/mouse/elan_i2c_i2c.c b/drivers/input/mouse/elan_i2c_i2c.c

index 15b1330..e19eb60 100644 (file)
--- a/drivers/input/mouse/elan_i2c_i2c.c
+++ b/drivers/input/mouse/elan_i2c_i2c.c
@@ -598,7 +598,7 @@ static int elan_i2c_write_fw_block(struct i2c_client *client,
         }
  
         /* Wait for F/W to update one page ROM data. */
-       msleep(20);
+       msleep(35);
  
         error = elan_i2c_read_cmd(client, ETP_I2C_IAP_CTRL_CMD, val);
         if (error) {
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig

index 49bd2ab..f3a2134 100644 (file)
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -278,7 +278,7 @@ config EXYNOS_IOMMU_DEBUG
  config IPMMU_VMSA
         bool "Renesas VMSA-compatible IPMMU"
         depends on ARM || IOMMU_DMA
-       depends on ARCH_RENESAS || COMPILE_TEST
+       depends on ARCH_RENESAS || (COMPILE_TEST && !GENERIC_ATOMIC64)
         select IOMMU_API
         select IOMMU_IO_PGTABLE_LPAE
         select ARM_DMA_USE_IOMMU
@@ -373,7 +373,8 @@ config MTK_IOMMU_V1
  config QCOM_IOMMU
         # Note: iommu drivers cannot (yet?) be built as modules
         bool "Qualcomm IOMMU Support"
-       depends on ARCH_QCOM || COMPILE_TEST
+       depends on ARCH_QCOM || (COMPILE_TEST && !GENERIC_ATOMIC64)
+       depends on HAS_DMA
         select IOMMU_API
         select IOMMU_IO_PGTABLE_LPAE
         select ARM_DMA_USE_IOMMU
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c

index ca5ebae..57c920c 100644 (file)
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -497,7 +497,7 @@ static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
  #define        dmar_parse_one_rhsa             dmar_res_noop
  #endif
  
-static void __init
+static void
  dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
  {
         struct acpi_dmar_hardware_unit *drhd;
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c

index e60e3db..50947eb 100644 (file)
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -157,10 +157,7 @@ static int of_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data)
  
         err = of_iommu_xlate(info->dev, &iommu_spec);
         of_node_put(iommu_spec.np);
-       if (err)
-               return err;
-
-       return info->np == pdev->bus->dev.of_node;
+       return err;
  }
  
  const struct iommu_ops *of_iommu_configure(struct device *dev,
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c

index 519149e..b5df99c 100644 (file)
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -1042,7 +1042,7 @@ static int get_cpu_number(struct device_node *dn)
  {
         const __be32 *cell;
         u64 hwid;
-       int i;
+       int cpu;
  
         cell = of_get_property(dn, "reg", NULL);
         if (!cell)
@@ -1056,9 +1056,9 @@ static int get_cpu_number(struct device_node *dn)
         if (hwid & ~MPIDR_HWID_BITMASK)
                 return -1;
  
-       for (i = 0; i < num_possible_cpus(); i++)
-               if (cpu_logical_map(i) == hwid)
-                       return i;
+       for_each_possible_cpu(cpu)
+               if (cpu_logical_map(cpu) == hwid)
+                       return cpu;
  
         return -1;
  }
diff --git a/drivers/irqchip/irq-gic-v4.c b/drivers/irqchip/irq-gic-v4.c

index 2370e6d..cd0bcc3 100644 (file)
--- a/drivers/irqchip/irq-gic-v4.c
+++ b/drivers/irqchip/irq-gic-v4.c
@@ -173,7 +173,9 @@ int its_map_vlpi(int irq, struct its_vlpi_map *map)
  {
         struct its_cmd_info info = {
                 .cmd_type = MAP_VLPI,
-               .map      = map,
+               {
+                       .map      = map,
+               },
         };
  
         /*
@@ -189,7 +191,9 @@ int its_get_vlpi(int irq, struct its_vlpi_map *map)
  {
         struct its_cmd_info info = {
                 .cmd_type = GET_VLPI,
-               .map      = map,
+               {
+                       .map      = map,
+               },
         };
  
         return irq_set_vcpu_affinity(irq, &info);
@@ -205,7 +209,9 @@ int its_prop_update_vlpi(int irq, u8 config, bool inv)
  {
         struct its_cmd_info info = {
                 .cmd_type = inv ? PROP_UPDATE_AND_INV_VLPI : PROP_UPDATE_VLPI,
-               .config   = config,
+               {
+                       .config   = config,
+               },
         };
  
         return irq_set_vcpu_affinity(irq, &info);
diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c

index 6e52a88..40159ac 100644 (file)
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -169,7 +169,7 @@ static void gic_mask_irq(struct irq_data *d)
  {
         unsigned int intr = GIC_HWIRQ_TO_SHARED(d->hwirq);
  
-       write_gic_rmask(BIT(intr));
+       write_gic_rmask(intr);
         gic_clear_pcpu_masks(intr);
  }
  
@@ -179,7 +179,7 @@ static void gic_unmask_irq(struct irq_data *d)
         unsigned int intr = GIC_HWIRQ_TO_SHARED(d->hwirq);
         unsigned int cpu;
  
-       write_gic_smask(BIT(intr));
+       write_gic_smask(intr);
  
         gic_clear_pcpu_masks(intr);
         cpu = cpumask_first_and(affinity, cpu_online_mask);
@@ -767,7 +767,7 @@ static int __init gic_of_init(struct device_node *node,
         for (i = 0; i < gic_shared_intrs; i++) {
                 change_gic_pol(i, GIC_POL_ACTIVE_HIGH);
                 change_gic_trig(i, GIC_TRIG_LEVEL);
-               write_gic_rmask(BIT(i));
+               write_gic_rmask(i);
         }
  
         for (i = 0; i < gic_vpes; i++) {
diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c

index 6c44609..cd2b3c6 100644 (file)
--- a/drivers/isdn/i4l/isdn_ppp.c
+++ b/drivers/isdn/i4l/isdn_ppp.c
@@ -825,7 +825,6 @@ isdn_ppp_write(int min, struct file *file, const char __user *buf, int count)
         isdn_net_local *lp;
         struct ippp_struct *is;
         int proto;
-       unsigned char protobuf[4];
  
         is = file->private_data;
  
@@ -839,24 +838,28 @@ isdn_ppp_write(int min, struct file *file, const char __user *buf, int count)
         if (!lp)
                 printk(KERN_DEBUG "isdn_ppp_write: lp == NULL\n");
         else {
-               /*
-                * Don't reset huptimer for
-                * LCP packets. (Echo requests).
-                */
-               if (copy_from_user(protobuf, buf, 4))
-                       return -EFAULT;
-               proto = PPP_PROTOCOL(protobuf);
-               if (proto != PPP_LCP)
-                       lp->huptimer = 0;
+               if (lp->isdn_device < 0 || lp->isdn_channel < 0) {
+                       unsigned char protobuf[4];
+                       /*
+                        * Don't reset huptimer for
+                        * LCP packets. (Echo requests).
+                        */
+                       if (copy_from_user(protobuf, buf, 4))
+                               return -EFAULT;
+
+                       proto = PPP_PROTOCOL(protobuf);
+                       if (proto != PPP_LCP)
+                               lp->huptimer = 0;
  
-               if (lp->isdn_device < 0 || lp->isdn_channel < 0)
                         return 0;
+               }
  
                 if ((dev->drv[lp->isdn_device]->flags & DRV_FLAG_RUNNING) &&
                     lp->dialstate == 0 &&
                     (lp->flags & ISDN_NET_CONNECTED)) {
                         unsigned short hl;
                         struct sk_buff *skb;
+                       unsigned char *cpy_buf;
                         /*
                          * we need to reserve enough space in front of
                          * sk_buff. old call to dev_alloc_skb only reserved
@@ -869,11 +872,21 @@ isdn_ppp_write(int min, struct file *file, const char __user *buf, int count)
                                 return count;
                         }
                         skb_reserve(skb, hl);
-                       if (copy_from_user(skb_put(skb, count), buf, count))
+                       cpy_buf = skb_put(skb, count);
+                       if (copy_from_user(cpy_buf, buf, count))
                         {
                                 kfree_skb(skb);
                                 return -EFAULT;
                         }
+
+                       /*
+                        * Don't reset huptimer for
+                        * LCP packets. (Echo requests).
+                        */
+                       proto = PPP_PROTOCOL(cpy_buf);
+                       if (proto != PPP_LCP)
+                               lp->huptimer = 0;
+
                         if (is->debug & 0x40) {
                                 printk(KERN_DEBUG "ppp xmit: len %d\n", (int) skb->len);
                                 isdn_ppp_frame_log("xmit", skb->data, skb->len, 32, is->unit, lp->ppp_slot);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c

index 4188a48..0764094 100644 (file)
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -811,6 +811,14 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh
                         spin_unlock(&head->batch_head->batch_lock);
                         goto unlock_out;
                 }
+               /*
+                * We must assign batch_head of this stripe within the
+                * batch_lock, otherwise clear_batch_ready of batch head
+                * stripe could clear BATCH_READY bit of this stripe and
+                * this stripe->batch_head doesn't get assigned, which
+                * could confuse clear_batch_ready for this stripe
+                */
+               sh->batch_head = head->batch_head;
  
                 /*
                  * at this point, head's BATCH_READY could be cleared, but we
@@ -818,8 +826,6 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh
                  */
                 list_add(&sh->batch_list, &head->batch_list);
                 spin_unlock(&head->batch_head->batch_lock);
-
-               sh->batch_head = head->batch_head;
         } else {
                 head->batch_head = head;
                 sh->batch_head = head->batch_head;
@@ -4599,7 +4605,8 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
  
                 set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS |
                                             (1 << STRIPE_PREREAD_ACTIVE) |
-                                           (1 << STRIPE_DEGRADED)),
+                                           (1 << STRIPE_DEGRADED) |
+                                           (1 << STRIPE_ON_UNPLUG_LIST)),
                               head_sh->state & (1 << STRIPE_INSYNC));
  
                 sh->check_state = head_sh->check_state;
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c

index affa737..74c663b 100644 (file)
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -242,6 +242,12 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
         if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask)
                 limit = (u64)dma_max_pfn(mmc_dev(host)) << PAGE_SHIFT;
  
+       /*
+        * mmc_init_request() depends on card->bouncesz so it must be calculated
+        * before blk_init_allocated_queue() starts allocating requests.
+        */
+       card->bouncesz = mmc_queue_calc_bouncesz(host);
+
         mq->card = card;
         mq->queue = blk_alloc_queue(GFP_KERNEL);
         if (!mq->queue)
@@ -265,7 +271,6 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
         if (mmc_can_erase(card))
                 mmc_queue_setup_discard(mq->queue, card);
  
-       card->bouncesz = mmc_queue_calc_bouncesz(host);
         if (card->bouncesz) {
                 blk_queue_max_hw_sectors(mq->queue, card->bouncesz / 512);
                 blk_queue_max_segments(mq->queue, card->bouncesz / 512);
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig

index 02179ed..8c15637 100644 (file)
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -5,7 +5,7 @@
  comment "MMC/SD/SDIO Host Controller Drivers"
  
  config MMC_DEBUG
-       bool "MMC host drivers debugginG"
+       bool "MMC host drivers debugging"
         depends on MMC != n
         help
           This is an option for use by developers; most people should
diff --git a/drivers/mmc/host/cavium-thunderx.c b/drivers/mmc/host/cavium-thunderx.c

index b9cc959..eee08d8 100644 (file)
--- a/drivers/mmc/host/cavium-thunderx.c
+++ b/drivers/mmc/host/cavium-thunderx.c
@@ -7,6 +7,7 @@
   *
   * Copyright (C) 2016 Cavium Inc.
   */
+#include <linux/device.h>
  #include <linux/dma-mapping.h>
  #include <linux/interrupt.h>
  #include <linux/mmc/mmc.h>
@@ -149,8 +150,11 @@ error:
         for (i = 0; i < CAVIUM_MAX_MMC; i++) {
                 if (host->slot[i])
                         cvm_mmc_of_slot_remove(host->slot[i]);
-               if (host->slot_pdev[i])
+               if (host->slot_pdev[i]) {
+                       get_device(&host->slot_pdev[i]->dev);
                         of_platform_device_destroy(&host->slot_pdev[i]->dev, NULL);
+                       put_device(&host->slot_pdev[i]->dev);
+               }
         }
         clk_disable_unprepare(host->clk);
         return ret;
diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c

index bbaddf1..d0ccc67 100644 (file)
--- a/drivers/mmc/host/sdhci-pci-core.c
+++ b/drivers/mmc/host/sdhci-pci-core.c
@@ -392,6 +392,7 @@ static const struct sdhci_pci_fixes sdhci_intel_pch_sdio = {
  
  enum {
         INTEL_DSM_FNS           =  0,
+       INTEL_DSM_V18_SWITCH    =  3,
         INTEL_DSM_DRV_STRENGTH  =  9,
         INTEL_DSM_D3_RETUNE     = 10,
  };
@@ -557,6 +558,19 @@ static void intel_hs400_enhanced_strobe(struct mmc_host *mmc,
         sdhci_writel(host, val, INTEL_HS400_ES_REG);
  }
  
+static void sdhci_intel_voltage_switch(struct sdhci_host *host)
+{
+       struct sdhci_pci_slot *slot = sdhci_priv(host);
+       struct intel_host *intel_host = sdhci_pci_priv(slot);
+       struct device *dev = &slot->chip->pdev->dev;
+       u32 result = 0;
+       int err;
+
+       err = intel_dsm(intel_host, dev, INTEL_DSM_V18_SWITCH, &result);
+       pr_debug("%s: %s DSM error %d result %u\n",
+                mmc_hostname(host->mmc), __func__, err, result);
+}
+
  static const struct sdhci_ops sdhci_intel_byt_ops = {
         .set_clock              = sdhci_set_clock,
         .set_power              = sdhci_intel_set_power,
@@ -565,6 +579,7 @@ static const struct sdhci_ops sdhci_intel_byt_ops = {
         .reset                  = sdhci_reset,
         .set_uhs_signaling      = sdhci_set_uhs_signaling,
         .hw_reset               = sdhci_pci_hw_reset,
+       .voltage_switch         = sdhci_intel_voltage_switch,
  };
  
  static void byt_read_dsm(struct sdhci_pci_slot *slot)
diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c

index 12cf828..a7293e1 100644 (file)
--- a/drivers/mmc/host/tmio_mmc_core.c
+++ b/drivers/mmc/host/tmio_mmc_core.c
@@ -129,50 +129,6 @@ static int tmio_mmc_next_sg(struct tmio_mmc_host *host)
  
  #define CMDREQ_TIMEOUT 5000
  
-#ifdef CONFIG_MMC_DEBUG
-
-#define STATUS_TO_TEXT(a, status, i) \
-       do { \
-               if ((status) & TMIO_STAT_##a) { \
-                       if ((i)++) \
-                               printk(KERN_DEBUG " | "); \
-                       printk(KERN_DEBUG #a); \
-               } \
-       } while (0)
-
-static void pr_debug_status(u32 status)
-{
-       int i = 0;
-
-       pr_debug("status: %08x = ", status);
-       STATUS_TO_TEXT(CARD_REMOVE, status, i);
-       STATUS_TO_TEXT(CARD_INSERT, status, i);
-       STATUS_TO_TEXT(SIGSTATE, status, i);
-       STATUS_TO_TEXT(WRPROTECT, status, i);
-       STATUS_TO_TEXT(CARD_REMOVE_A, status, i);
-       STATUS_TO_TEXT(CARD_INSERT_A, status, i);
-       STATUS_TO_TEXT(SIGSTATE_A, status, i);
-       STATUS_TO_TEXT(CMD_IDX_ERR, status, i);
-       STATUS_TO_TEXT(STOPBIT_ERR, status, i);
-       STATUS_TO_TEXT(ILL_FUNC, status, i);
-       STATUS_TO_TEXT(CMD_BUSY, status, i);
-       STATUS_TO_TEXT(CMDRESPEND, status, i);
-       STATUS_TO_TEXT(DATAEND, status, i);
-       STATUS_TO_TEXT(CRCFAIL, status, i);
-       STATUS_TO_TEXT(DATATIMEOUT, status, i);
-       STATUS_TO_TEXT(CMDTIMEOUT, status, i);
-       STATUS_TO_TEXT(RXOVERFLOW, status, i);
-       STATUS_TO_TEXT(TXUNDERRUN, status, i);
-       STATUS_TO_TEXT(RXRDY, status, i);
-       STATUS_TO_TEXT(TXRQ, status, i);
-       STATUS_TO_TEXT(ILL_ACCESS, status, i);
-       printk("\n");
-}
-
-#else
-#define pr_debug_status(s)  do { } while (0)
-#endif
-
  static void tmio_mmc_enable_sdio_irq(struct mmc_host *mmc, int enable)
  {
         struct tmio_mmc_host *host = mmc_priv(mmc);
@@ -762,9 +718,6 @@ irqreturn_t tmio_mmc_irq(int irq, void *devid)
         status = sd_ctrl_read16_and_16_as_32(host, CTL_STATUS);
         ireg = status & TMIO_MASK_IRQ & ~host->sdcard_irq_mask;
  
-       pr_debug_status(status);
-       pr_debug_status(ireg);
-
         /* Clear the status except the interrupt status */
         sd_ctrl_write32_as_16_and_16(host, CTL_STATUS, TMIO_MASK_IRQ);
  
diff --git a/drivers/mtd/nand/lpc32xx_mlc.c b/drivers/mtd/nand/lpc32xx_mlc.c

index c3bb358..5796468 100644 (file)
--- a/drivers/mtd/nand/lpc32xx_mlc.c
+++ b/drivers/mtd/nand/lpc32xx_mlc.c
@@ -707,7 +707,7 @@ static int lpc32xx_nand_probe(struct platform_device *pdev)
         }
         res = clk_prepare_enable(host->clk);
         if (res)
-               goto err_exit1;
+               goto err_put_clk;
  
         nand_chip->cmd_ctrl = lpc32xx_nand_cmd_ctrl;
         nand_chip->dev_ready = lpc32xx_nand_device_ready;
@@ -814,6 +814,7 @@ err_exit3:
                 dma_release_channel(host->dma_chan);
  err_exit2:
         clk_disable_unprepare(host->clk);
+err_put_clk:
         clk_put(host->clk);
  err_exit1:
         lpc32xx_wp_enable(host);
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c

index bcc8cef..12edaae 100644 (file)
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -2668,7 +2668,7 @@ static uint8_t *nand_fill_oob(struct mtd_info *mtd, uint8_t *oob, size_t len,
  static int nand_do_write_ops(struct mtd_info *mtd, loff_t to,
                              struct mtd_oob_ops *ops)
  {
-       int chipnr, realpage, page, blockmask, column;
+       int chipnr, realpage, page, column;
         struct nand_chip *chip = mtd_to_nand(mtd);
         uint32_t writelen = ops->len;
  
@@ -2704,7 +2704,6 @@ static int nand_do_write_ops(struct mtd_info *mtd, loff_t to,
  
         realpage = (int)(to >> chip->page_shift);
         page = realpage & chip->pagemask;
-       blockmask = (1 << (chip->phys_erase_shift - chip->page_shift)) - 1;
  
         /* Invalidate the page cache, when we write to the cached page */
         if (to <= ((loff_t)chip->pagebuf << chip->page_shift) &&
diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c

index cf1d4a1..19c0007 100644 (file)
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -1784,7 +1784,7 @@ spi_nor_set_pp_settings(struct spi_nor_pp_command *pp,
   * @nor:       pointer to a 'struct spi_nor'
   * @addr:      offset in the SFDP area to start reading data from
   * @len:       number of bytes to read
- * @buf:       buffer where the SFDP data are copied into
+ * @buf:       buffer where the SFDP data are copied into (dma-safe memory)
   *
   * Whatever the actual numbers of bytes for address and dummy cycles are
   * for (Fast) Read commands, the Read SFDP (5Ah) instruction is always
@@ -1829,6 +1829,36 @@ read_err:
         return ret;
  }
  
+/**
+ * spi_nor_read_sfdp_dma_unsafe() - read Serial Flash Discoverable Parameters.
+ * @nor:       pointer to a 'struct spi_nor'
+ * @addr:      offset in the SFDP area to start reading data from
+ * @len:       number of bytes to read
+ * @buf:       buffer where the SFDP data are copied into
+ *
+ * Wrap spi_nor_read_sfdp() using a kmalloc'ed bounce buffer as @buf is now not
+ * guaranteed to be dma-safe.
+ *
+ * Return: -ENOMEM if kmalloc() fails, the return code of spi_nor_read_sfdp()
+ *          otherwise.
+ */
+static int spi_nor_read_sfdp_dma_unsafe(struct spi_nor *nor, u32 addr,
+                                       size_t len, void *buf)
+{
+       void *dma_safe_buf;
+       int ret;
+
+       dma_safe_buf = kmalloc(len, GFP_KERNEL);
+       if (!dma_safe_buf)
+               return -ENOMEM;
+
+       ret = spi_nor_read_sfdp(nor, addr, len, dma_safe_buf);
+       memcpy(buf, dma_safe_buf, len);
+       kfree(dma_safe_buf);
+
+       return ret;
+}
+
  struct sfdp_parameter_header {
         u8              id_lsb;
         u8              minor;
@@ -2101,7 +2131,7 @@ static int spi_nor_parse_bfpt(struct spi_nor *nor,
                     bfpt_header->length * sizeof(u32));
         addr = SFDP_PARAM_HEADER_PTP(bfpt_header);
         memset(&bfpt, 0, sizeof(bfpt));
-       err = spi_nor_read_sfdp(nor,  addr, len, &bfpt);
+       err = spi_nor_read_sfdp_dma_unsafe(nor,  addr, len, &bfpt);
         if (err < 0)
                 return err;
  
@@ -2127,6 +2157,15 @@ static int spi_nor_parse_bfpt(struct spi_nor *nor,
         params->size = bfpt.dwords[BFPT_DWORD(2)];
         if (params->size & BIT(31)) {
                 params->size &= ~BIT(31);
+
+               /*
+                * Prevent overflows on params->size. Anyway, a NOR of 2^64
+                * bits is unlikely to exist so this error probably means
+                * the BFPT we are reading is corrupted/wrong.
+                */
+               if (params->size > 63)
+                       return -EINVAL;
+
                 params->size = 1ULL << params->size;
         } else {
                 params->size++;
@@ -2243,7 +2282,7 @@ static int spi_nor_parse_sfdp(struct spi_nor *nor,
         int i, err;
  
         /* Get the SFDP header. */
-       err = spi_nor_read_sfdp(nor, 0, sizeof(header), &header);
+       err = spi_nor_read_sfdp_dma_unsafe(nor, 0, sizeof(header), &header);
         if (err < 0)
                 return err;
  
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c

index c3c53f6..83eec9a 100644 (file)
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -432,6 +432,27 @@ static void bcm_sysport_update_mib_counters(struct bcm_sysport_priv *priv)
         netif_dbg(priv, hw, priv->netdev, "updated MIB counters\n");
  }
  
+static void bcm_sysport_update_tx_stats(struct bcm_sysport_priv *priv,
+                                       u64 *tx_bytes, u64 *tx_packets)
+{
+       struct bcm_sysport_tx_ring *ring;
+       u64 bytes = 0, packets = 0;
+       unsigned int start;
+       unsigned int q;
+
+       for (q = 0; q < priv->netdev->num_tx_queues; q++) {
+               ring = &priv->tx_rings[q];
+               do {
+                       start = u64_stats_fetch_begin_irq(&priv->syncp);
+                       bytes = ring->bytes;
+                       packets = ring->packets;
+               } while (u64_stats_fetch_retry_irq(&priv->syncp, start));
+
+               *tx_bytes += bytes;
+               *tx_packets += packets;
+       }
+}
+
  static void bcm_sysport_get_stats(struct net_device *dev,
                                   struct ethtool_stats *stats, u64 *data)
  {
@@ -439,11 +460,16 @@ static void bcm_sysport_get_stats(struct net_device *dev,
         struct bcm_sysport_stats64 *stats64 = &priv->stats64;
         struct u64_stats_sync *syncp = &priv->syncp;
         struct bcm_sysport_tx_ring *ring;
+       u64 tx_bytes = 0, tx_packets = 0;
         unsigned int start;
         int i, j;
  
-       if (netif_running(dev))
+       if (netif_running(dev)) {
                 bcm_sysport_update_mib_counters(priv);
+               bcm_sysport_update_tx_stats(priv, &tx_bytes, &tx_packets);
+               stats64->tx_bytes = tx_bytes;
+               stats64->tx_packets = tx_packets;
+       }
  
         for (i =  0, j = 0; i < BCM_SYSPORT_STATS_LEN; i++) {
                 const struct bcm_sysport_stats *s;
@@ -461,12 +487,13 @@ static void bcm_sysport_get_stats(struct net_device *dev,
                         continue;
                 p += s->stat_offset;
  
-               if (s->stat_sizeof == sizeof(u64))
+               if (s->stat_sizeof == sizeof(u64) &&
+                   s->type == BCM_SYSPORT_STAT_NETDEV64) {
                         do {
                                 start = u64_stats_fetch_begin_irq(syncp);
                                 data[i] = *(u64 *)p;
                         } while (u64_stats_fetch_retry_irq(syncp, start));
-               else
+               } else
                         data[i] = *(u32 *)p;
                 j++;
         }
@@ -1716,27 +1743,12 @@ static void bcm_sysport_get_stats64(struct net_device *dev,
  {
         struct bcm_sysport_priv *priv = netdev_priv(dev);
         struct bcm_sysport_stats64 *stats64 = &priv->stats64;
-       struct bcm_sysport_tx_ring *ring;
-       u64 tx_packets = 0, tx_bytes = 0;
         unsigned int start;
-       unsigned int q;
  
         netdev_stats_to_stats64(stats, &dev->stats);
  
-       for (q = 0; q < dev->num_tx_queues; q++) {
-               ring = &priv->tx_rings[q];
-               do {
-                       start = u64_stats_fetch_begin_irq(&priv->syncp);
-                       tx_bytes = ring->bytes;
-                       tx_packets = ring->packets;
-               } while (u64_stats_fetch_retry_irq(&priv->syncp, start));
-
-               stats->tx_bytes += tx_bytes;
-               stats->tx_packets += tx_packets;
-       }
-
-       stats64->tx_bytes = stats->tx_bytes;
-       stats64->tx_packets = stats->tx_packets;
+       bcm_sysport_update_tx_stats(priv, &stats->tx_bytes,
+                                   &stats->tx_packets);
  
         do {
                 start = u64_stats_fetch_begin_irq(&priv->syncp);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c

index ccd699f..7dd3d13 100644 (file)
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
@@ -750,6 +750,10 @@ int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid,
  {
         int rc = 0;
  
+       if (!is_classid_clsact_ingress(cls_flower->common.classid) ||
+           cls_flower->common.chain_index)
+               return -EOPNOTSUPP;
+
         switch (cls_flower->command) {
         case TC_CLSFLOWER_REPLACE:
                 rc = bnxt_tc_add_flow(bp, src_fid, cls_flower);
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h

index 38c7b21..ede1876 100644 (file)
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -374,8 +374,8 @@ struct bufdesc_ex {
  #define FEC_ENET_TS_AVAIL       ((uint)0x00010000)
  #define FEC_ENET_TS_TIMER       ((uint)0x00008000)
  
-#define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF | FEC_ENET_MII | FEC_ENET_TS_TIMER)
-#define FEC_NAPI_IMASK (FEC_ENET_MII | FEC_ENET_TS_TIMER)
+#define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF | FEC_ENET_MII)
+#define FEC_NAPI_IMASK FEC_ENET_MII
  #define FEC_RX_DISABLED_IMASK (FEC_DEFAULT_IMASK & (~FEC_ENET_RXF))
  
  /* ENET interrupt coalescing macro define */
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c

index 56f56d6..3dc2d77 100644 (file)
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1559,14 +1559,14 @@ fec_enet_collect_events(struct fec_enet_private *fep, uint int_events)
         if (int_events == 0)
                 return false;
  
-       if (int_events & FEC_ENET_RXF)
+       if (int_events & FEC_ENET_RXF_0)
                 fep->work_rx |= (1 << 2);
         if (int_events & FEC_ENET_RXF_1)
                 fep->work_rx |= (1 << 0);
         if (int_events & FEC_ENET_RXF_2)
                 fep->work_rx |= (1 << 1);
  
-       if (int_events & FEC_ENET_TXF)
+       if (int_events & FEC_ENET_TXF_0)
                 fep->work_tx |= (1 << 2);
         if (int_events & FEC_ENET_TXF_1)
                 fep->work_tx |= (1 << 0);
@@ -1604,8 +1604,8 @@ fec_enet_interrupt(int irq, void *dev_id)
         }
  
         if (fep->ptp_clock)
-               fec_ptp_check_pps_event(fep);
-
+               if (fec_ptp_check_pps_event(fep))
+                       ret = IRQ_HANDLED;
         return ret;
  }
  
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.c b/drivers/net/ethernet/hisilicon/hns3/hnae3.c

index 59efbd6..5bcb223 100644 (file)
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.c
@@ -37,20 +37,15 @@ static bool hnae3_client_match(enum hnae3_client_type client_type,
  }
  
  static int hnae3_match_n_instantiate(struct hnae3_client *client,
-                                    struct hnae3_ae_dev *ae_dev,
-                                    bool is_reg, bool *matched)
+                                    struct hnae3_ae_dev *ae_dev, bool is_reg)
  {
         int ret;
  
-       *matched = false;
-
         /* check if this client matches the type of ae_dev */
         if (!(hnae3_client_match(client->type, ae_dev->dev_type) &&
               hnae_get_bit(ae_dev->flag, HNAE3_DEV_INITED_B))) {
                 return 0;
         }
-       /* there is a match of client and dev */
-       *matched = true;
  
         /* now, (un-)instantiate client by calling lower layer */
         if (is_reg) {
@@ -69,7 +64,6 @@ int hnae3_register_client(struct hnae3_client *client)
  {
         struct hnae3_client *client_tmp;
         struct hnae3_ae_dev *ae_dev;
-       bool matched;
         int ret = 0;
  
         mutex_lock(&hnae3_common_lock);
@@ -86,7 +80,7 @@ int hnae3_register_client(struct hnae3_client *client)
                 /* if the client could not be initialized on current port, for
                  * any error reasons, move on to next available port
                  */
-               ret = hnae3_match_n_instantiate(client, ae_dev, true, &matched);
+               ret = hnae3_match_n_instantiate(client, ae_dev, true);
                 if (ret)
                         dev_err(&ae_dev->pdev->dev,
                                 "match and instantiation failed for port\n");
@@ -102,12 +96,11 @@ EXPORT_SYMBOL(hnae3_register_client);
  void hnae3_unregister_client(struct hnae3_client *client)
  {
         struct hnae3_ae_dev *ae_dev;
-       bool matched;
  
         mutex_lock(&hnae3_common_lock);
         /* un-initialize the client on every matched port */
         list_for_each_entry(ae_dev, &hnae3_ae_dev_list, node) {
-               hnae3_match_n_instantiate(client, ae_dev, false, &matched);
+               hnae3_match_n_instantiate(client, ae_dev, false);
         }
  
         list_del(&client->node);
@@ -124,7 +117,6 @@ int hnae3_register_ae_algo(struct hnae3_ae_algo *ae_algo)
         const struct pci_device_id *id;
         struct hnae3_ae_dev *ae_dev;
         struct hnae3_client *client;
-       bool matched;
         int ret = 0;
  
         mutex_lock(&hnae3_common_lock);
@@ -151,13 +143,10 @@ int hnae3_register_ae_algo(struct hnae3_ae_algo *ae_algo)
                  * initialize the figure out client instance
                  */
                 list_for_each_entry(client, &hnae3_client_list, node) {
-                       ret = hnae3_match_n_instantiate(client, ae_dev, true,
-                                                       &matched);
+                       ret = hnae3_match_n_instantiate(client, ae_dev, true);
                         if (ret)
                                 dev_err(&ae_dev->pdev->dev,
                                         "match and instantiation failed\n");
-                       if (matched)
-                               break;
                 }
         }
  
@@ -175,7 +164,6 @@ void hnae3_unregister_ae_algo(struct hnae3_ae_algo *ae_algo)
         const struct pci_device_id *id;
         struct hnae3_ae_dev *ae_dev;
         struct hnae3_client *client;
-       bool matched;
  
         mutex_lock(&hnae3_common_lock);
         /* Check if there are matched ae_dev */
@@ -187,12 +175,8 @@ void hnae3_unregister_ae_algo(struct hnae3_ae_algo *ae_algo)
                 /* check the client list for the match with this ae_dev type and
                  * un-initialize the figure out client instance
                  */
-               list_for_each_entry(client, &hnae3_client_list, node) {
-                       hnae3_match_n_instantiate(client, ae_dev, false,
-                                                 &matched);
-                       if (matched)
-                               break;
-               }
+               list_for_each_entry(client, &hnae3_client_list, node)
+                       hnae3_match_n_instantiate(client, ae_dev, false);
  
                 ae_algo->ops->uninit_ae_dev(ae_dev);
                 hnae_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 0);
@@ -212,7 +196,6 @@ int hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev)
         const struct pci_device_id *id;
         struct hnae3_ae_algo *ae_algo;
         struct hnae3_client *client;
-       bool matched;
         int ret = 0;
  
         mutex_lock(&hnae3_common_lock);
@@ -246,13 +229,10 @@ int hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev)
          * initialize the figure out client instance
          */
         list_for_each_entry(client, &hnae3_client_list, node) {
-               ret = hnae3_match_n_instantiate(client, ae_dev, true,
-                                               &matched);
+               ret = hnae3_match_n_instantiate(client, ae_dev, true);
                 if (ret)
                         dev_err(&ae_dev->pdev->dev,
                                 "match and instantiation failed\n");
-               if (matched)
-                       break;
         }
  
  out_err:
@@ -270,7 +250,6 @@ void hnae3_unregister_ae_dev(struct hnae3_ae_dev *ae_dev)
         const struct pci_device_id *id;
         struct hnae3_ae_algo *ae_algo;
         struct hnae3_client *client;
-       bool matched;
  
         mutex_lock(&hnae3_common_lock);
         /* Check if there are matched ae_algo */
@@ -279,12 +258,8 @@ void hnae3_unregister_ae_dev(struct hnae3_ae_dev *ae_dev)
                 if (!id)
                         continue;
  
-               list_for_each_entry(client, &hnae3_client_list, node) {
-                       hnae3_match_n_instantiate(client, ae_dev, false,
-                                                 &matched);
-                       if (matched)
-                               break;
-               }
+               list_for_each_entry(client, &hnae3_client_list, node)
+                       hnae3_match_n_instantiate(client, ae_dev, false);
  
                 ae_algo->ops->uninit_ae_dev(ae_dev);
                 hnae_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 0);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h

index b2f28ae..1a01cad 100644 (file)
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -49,7 +49,17 @@
  #define HNAE3_CLASS_NAME_SIZE 16
  
  #define HNAE3_DEV_INITED_B                     0x0
-#define HNAE_DEV_SUPPORT_ROCE_B                        0x1
+#define HNAE3_DEV_SUPPORT_ROCE_B               0x1
+#define HNAE3_DEV_SUPPORT_DCB_B                        0x2
+
+#define HNAE3_DEV_SUPPORT_ROCE_DCB_BITS (BIT(HNAE3_DEV_SUPPORT_DCB_B) |\
+               BIT(HNAE3_DEV_SUPPORT_ROCE_B))
+
+#define hnae3_dev_roce_supported(hdev) \
+       hnae_get_bit(hdev->ae_dev->flag, HNAE3_DEV_SUPPORT_ROCE_B)
+
+#define hnae3_dev_dcb_supported(hdev) \
+       hnae_get_bit(hdev->ae_dev->flag, HNAE3_DEV_SUPPORT_DCB_B)
  
  #define ring_ptr_move_fw(ring, p) \
         ((ring)->p = ((ring)->p + 1) % (ring)->desc_num)
@@ -366,12 +376,12 @@ struct hnae3_ae_algo {
  struct hnae3_tc_info {
         u16     tqp_offset;     /* TQP offset from base TQP */
         u16     tqp_count;      /* Total TQPs */
-       u8      up;             /* user priority */
         u8      tc;             /* TC index */
         bool    enable;         /* If this TC is enable or not */
  };
  
  #define HNAE3_MAX_TC           8
+#define HNAE3_MAX_USER_PRIO    8
  struct hnae3_knic_private_info {
         struct net_device *netdev; /* Set by KNIC client when init instance */
         u16 rss_size;              /* Allocated RSS queues */
@@ -379,6 +389,7 @@ struct hnae3_knic_private_info {
         u16 num_desc;
  
         u8 num_tc;                 /* Total number of enabled TCs */
+       u8 prio_tc[HNAE3_MAX_USER_PRIO];  /* TC indexed by prio */
         struct hnae3_tc_info tc_info[HNAE3_MAX_TC]; /* Idx of array is HW TC */
  
         u16 num_tqps;             /* total number of TQPs in this handle */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h

index 91ae013..758cf39 100644 (file)
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -238,7 +238,7 @@ struct hclge_tqp_map {
         u8 rsv[18];
  };
  
-#define HCLGE_VECTOR_ELEMENTS_PER_CMD  11
+#define HCLGE_VECTOR_ELEMENTS_PER_CMD  10
  
  enum hclge_int_type {
         HCLGE_INT_TX,
@@ -252,8 +252,12 @@ struct hclge_ctrl_vector_chain {
  #define HCLGE_INT_TYPE_S       0
  #define HCLGE_INT_TYPE_M       0x3
  #define HCLGE_TQP_ID_S         2
-#define HCLGE_TQP_ID_M         (0x3fff << HCLGE_TQP_ID_S)
+#define HCLGE_TQP_ID_M         (0x7ff << HCLGE_TQP_ID_S)
+#define HCLGE_INT_GL_IDX_S     13
+#define HCLGE_INT_GL_IDX_M     (0x3 << HCLGE_INT_GL_IDX_S)
         __le16 tqp_type_and_id[HCLGE_VECTOR_ELEMENTS_PER_CMD];
+       u8 vfid;
+       u8 rsv;
  };
  
  #define HCLGE_TC_NUM           8
@@ -266,7 +270,8 @@ struct hclge_tx_buff_alloc {
  
  struct hclge_rx_priv_buff {
         __le16 buf_num[HCLGE_TC_NUM];
-       u8 rsv[8];
+       __le16 shared_buf;
+       u8 rsv[6];
  };
  
  struct hclge_query_version {
@@ -684,6 +689,7 @@ struct hclge_reset_tqp_queue {
  #define HCLGE_DEFAULT_TX_BUF           0x4000   /* 16k  bytes */
  #define HCLGE_TOTAL_PKT_BUF            0x108000 /* 1.03125M bytes */
  #define HCLGE_DEFAULT_DV               0xA000   /* 40k byte */
+#define HCLGE_DEFAULT_NON_DCB_DV       0x7800  /* 30K byte */
  
  #define HCLGE_TYPE_CRQ                 0
  #define HCLGE_TYPE_CSQ                 1
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c

index bb45365..e0685e6 100644 (file)
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -46,17 +46,7 @@ static const struct pci_device_id ae_algo_pci_tbl[] = {
         {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA), 0},
         {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC), 0},
         {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), 0},
-       /* Required last entry */
-       {0, }
-};
-
-static const struct pci_device_id roce_pci_tbl[] = {
-       {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA), 0},
-       {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA_MACSEC), 0},
-       {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA), 0},
-       {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC), 0},
-       {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), 0},
-       /* Required last entry */
+       /* required last entry */
         {0, }
  };
  
@@ -894,7 +884,7 @@ static int hclge_query_pf_resource(struct hclge_dev *hdev)
         hdev->num_tqps = __le16_to_cpu(req->tqp_num);
         hdev->pkt_buf_size = __le16_to_cpu(req->buf_size) << HCLGE_BUF_UNIT_S;
  
-       if (hnae_get_bit(hdev->ae_dev->flag, HNAE_DEV_SUPPORT_ROCE_B)) {
+       if (hnae3_dev_roce_supported(hdev)) {
                 hdev->num_roce_msix =
                 hnae_get_field(__le16_to_cpu(req->pf_intr_vector_number),
                                HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S);
@@ -1063,9 +1053,9 @@ static int hclge_configure(struct hclge_dev *hdev)
         hdev->base_tqp_pid = 0;
         hdev->rss_size_max = 1;
         hdev->rx_buf_len = cfg.rx_buf_len;
-       for (i = 0; i < ETH_ALEN; i++)
-               hdev->hw.mac.mac_addr[i] = cfg.mac_addr[i];
+       ether_addr_copy(hdev->hw.mac.mac_addr, cfg.mac_addr);
         hdev->hw.mac.media_type = cfg.media_type;
+       hdev->hw.mac.phy_addr = cfg.phy_addr;
         hdev->num_desc = cfg.tqp_desc_num;
         hdev->tm_info.num_pg = 1;
         hdev->tm_info.num_tc = cfg.tc_num;
@@ -1454,7 +1444,11 @@ static bool  hclge_is_rx_buf_ok(struct hclge_dev *hdev, u32 rx_all)
         tc_num = hclge_get_tc_num(hdev);
         pfc_enable_num = hclge_get_pfc_enalbe_num(hdev);
  
-       shared_buf_min = 2 * hdev->mps + HCLGE_DEFAULT_DV;
+       if (hnae3_dev_dcb_supported(hdev))
+               shared_buf_min = 2 * hdev->mps + HCLGE_DEFAULT_DV;
+       else
+               shared_buf_min = 2 * hdev->mps + HCLGE_DEFAULT_NON_DCB_DV;
+
         shared_buf_tc = pfc_enable_num * hdev->mps +
                         (tc_num - pfc_enable_num) * hdev->mps / 2 +
                         hdev->mps;
@@ -1495,6 +1489,16 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev, u32 tx_size)
         struct hclge_priv_buf *priv;
         int i;
  
+       /* When DCB is not supported, rx private
+        * buffer is not allocated.
+        */
+       if (!hnae3_dev_dcb_supported(hdev)) {
+               if (!hclge_is_rx_buf_ok(hdev, rx_all))
+                       return -ENOMEM;
+
+               return 0;
+       }
+
         /* step 1, try to alloc private buffer for all enabled tc */
         for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
                 priv = &hdev->priv_buf[i];
@@ -1510,6 +1514,11 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev, u32 tx_size)
                                 priv->wl.high = 2 * hdev->mps;
                                 priv->buf_size = priv->wl.high;
                         }
+               } else {
+                       priv->enable = 0;
+                       priv->wl.low = 0;
+                       priv->wl.high = 0;
+                       priv->buf_size = 0;
                 }
         }
  
@@ -1522,8 +1531,15 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev, u32 tx_size)
         for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
                 priv = &hdev->priv_buf[i];
  
-               if (hdev->hw_tc_map & BIT(i))
-                       priv->enable = 1;
+               priv->enable = 0;
+               priv->wl.low = 0;
+               priv->wl.high = 0;
+               priv->buf_size = 0;
+
+               if (!(hdev->hw_tc_map & BIT(i)))
+                       continue;
+
+               priv->enable = 1;
  
                 if (hdev->tm_info.hw_pfc_map & BIT(i)) {
                         priv->wl.low = 128;
@@ -1616,6 +1632,10 @@ static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev)
                         cpu_to_le16(true << HCLGE_TC0_PRI_BUF_EN_B);
         }
  
+       req->shared_buf =
+               cpu_to_le16((hdev->s_buf.buf_size >> HCLGE_BUF_UNIT_S) |
+                           (1 << HCLGE_TC0_PRI_BUF_EN_B));
+
         ret = hclge_cmd_send(&hdev->hw, &desc, 1);
         if (ret) {
                 dev_err(&hdev->pdev->dev,
@@ -1782,18 +1802,22 @@ int hclge_buffer_alloc(struct hclge_dev *hdev)
                 return ret;
         }
  
-       ret = hclge_rx_priv_wl_config(hdev);
-       if (ret) {
-               dev_err(&hdev->pdev->dev,
-                       "could not configure rx private waterline %d\n", ret);
-               return ret;
-       }
+       if (hnae3_dev_dcb_supported(hdev)) {
+               ret = hclge_rx_priv_wl_config(hdev);
+               if (ret) {
+                       dev_err(&hdev->pdev->dev,
+                               "could not configure rx private waterline %d\n",
+                               ret);
+                       return ret;
+               }
  
-       ret = hclge_common_thrd_config(hdev);
-       if (ret) {
-               dev_err(&hdev->pdev->dev,
-                       "could not configure common threshold %d\n", ret);
-               return ret;
+               ret = hclge_common_thrd_config(hdev);
+               if (ret) {
+                       dev_err(&hdev->pdev->dev,
+                               "could not configure common threshold %d\n",
+                               ret);
+                       return ret;
+               }
         }
  
         ret = hclge_common_wl_config(hdev);
@@ -2582,6 +2606,7 @@ static int hclge_rss_init_hw(struct hclge_dev *hdev)
         u16 tc_valid[HCLGE_MAX_TC_NUM];
         u16 tc_size[HCLGE_MAX_TC_NUM];
         u32 *rss_indir = NULL;
+       u16 rss_size = 0, roundup_size;
         const u8 *key;
         int i, ret, j;
  
@@ -2596,7 +2621,13 @@ static int hclge_rss_init_hw(struct hclge_dev *hdev)
         for (j = 0; j < hdev->num_vmdq_vport + 1; j++) {
                 for (i = 0; i < HCLGE_RSS_IND_TBL_SIZE; i++) {
                         vport[j].rss_indirection_tbl[i] =
-                               i % hdev->rss_size_max;
+                               i % vport[j].alloc_rss_size;
+
+                       /* vport 0 is for PF */
+                       if (j != 0)
+                               continue;
+
+                       rss_size = vport[j].alloc_rss_size;
                         rss_indir[i] = vport[j].rss_indirection_tbl[i];
                 }
         }
@@ -2613,42 +2644,31 @@ static int hclge_rss_init_hw(struct hclge_dev *hdev)
         if (ret)
                 goto err;
  
+       /* Each TC have the same queue size, and tc_size set to hardware is
+        * the log2 of roundup power of two of rss_size, the acutal queue
+        * size is limited by indirection table.
+        */
+       if (rss_size > HCLGE_RSS_TC_SIZE_7 || rss_size == 0) {
+               dev_err(&hdev->pdev->dev,
+                       "Configure rss tc size failed, invalid TC_SIZE = %d\n",
+                       rss_size);
+               return -EINVAL;
+       }
+
+       roundup_size = roundup_pow_of_two(rss_size);
+       roundup_size = ilog2(roundup_size);
+
         for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
-               if (hdev->hw_tc_map & BIT(i))
-                       tc_valid[i] = 1;
-               else
-                       tc_valid[i] = 0;
+               tc_valid[i] = 0;
  
-               switch (hdev->rss_size_max) {
-               case HCLGE_RSS_TC_SIZE_0:
-                       tc_size[i] = 0;
-                       break;
-               case HCLGE_RSS_TC_SIZE_1:
-                       tc_size[i] = 1;
-                       break;
-               case HCLGE_RSS_TC_SIZE_2:
-                       tc_size[i] = 2;
-                       break;
-               case HCLGE_RSS_TC_SIZE_3:
-                       tc_size[i] = 3;
-                       break;
-               case HCLGE_RSS_TC_SIZE_4:
-                       tc_size[i] = 4;
-                       break;
-               case HCLGE_RSS_TC_SIZE_5:
-                       tc_size[i] = 5;
-                       break;
-               case HCLGE_RSS_TC_SIZE_6:
-                       tc_size[i] = 6;
-                       break;
-               case HCLGE_RSS_TC_SIZE_7:
-                       tc_size[i] = 7;
-                       break;
-               default:
-                       break;
-               }
-               tc_offset[i] = hdev->rss_size_max * i;
+               if (!(hdev->hw_tc_map & BIT(i)))
+                       continue;
+
+               tc_valid[i] = 1;
+               tc_size[i] = roundup_size;
+               tc_offset[i] = rss_size * i;
         }
+
         ret = hclge_set_rss_tc_mode(hdev, tc_valid, tc_size, tc_offset);
  
  err:
@@ -2679,7 +2699,11 @@ int hclge_map_vport_ring_to_vector(struct hclge_vport *vport, int vector_id,
                                hnae_get_bit(node->flag, HNAE3_RING_TYPE_B));
                 hnae_set_field(req->tqp_type_and_id[i], HCLGE_TQP_ID_M,
                                HCLGE_TQP_ID_S,  node->tqp_index);
+               hnae_set_field(req->tqp_type_and_id[i], HCLGE_INT_GL_IDX_M,
+                              HCLGE_INT_GL_IDX_S,
+                              hnae_get_bit(node->flag, HNAE3_RING_TYPE_B));
                 req->tqp_type_and_id[i] = cpu_to_le16(req->tqp_type_and_id[i]);
+               req->vfid = vport->vport_id;
  
                 if (++i >= HCLGE_VECTOR_ELEMENTS_PER_CMD) {
                         req->int_cause_num = HCLGE_VECTOR_ELEMENTS_PER_CMD;
@@ -2763,8 +2787,12 @@ static int hclge_unmap_ring_from_vector(
                                hnae_get_bit(node->flag, HNAE3_RING_TYPE_B));
                 hnae_set_field(req->tqp_type_and_id[i], HCLGE_TQP_ID_M,
                                HCLGE_TQP_ID_S,  node->tqp_index);
+               hnae_set_field(req->tqp_type_and_id[i], HCLGE_INT_GL_IDX_M,
+                              HCLGE_INT_GL_IDX_S,
+                              hnae_get_bit(node->flag, HNAE3_RING_TYPE_B));
  
                 req->tqp_type_and_id[i] = cpu_to_le16(req->tqp_type_and_id[i]);
+               req->vfid = vport->vport_id;
  
                 if (++i >= HCLGE_VECTOR_ELEMENTS_PER_CMD) {
                         req->int_cause_num = HCLGE_VECTOR_ELEMENTS_PER_CMD;
@@ -2778,7 +2806,7 @@ static int hclge_unmap_ring_from_vector(
                         }
                         i = 0;
                         hclge_cmd_setup_basic_desc(&desc,
-                                                  HCLGE_OPC_ADD_RING_TO_VECTOR,
+                                                  HCLGE_OPC_DEL_RING_TO_VECTOR,
                                                    false);
                         req->int_vector_id = vector_id;
                 }
@@ -3665,6 +3693,7 @@ static int hclge_init_vlan_config(struct hclge_dev *hdev)
  {
  #define HCLGE_VLAN_TYPE_VF_TABLE   0
  #define HCLGE_VLAN_TYPE_PORT_TABLE 1
+       struct hnae3_handle *handle;
         int ret;
  
         ret = hclge_set_vlan_filter_ctrl(hdev, HCLGE_VLAN_TYPE_VF_TABLE,
@@ -3674,8 +3703,11 @@ static int hclge_init_vlan_config(struct hclge_dev *hdev)
  
         ret = hclge_set_vlan_filter_ctrl(hdev, HCLGE_VLAN_TYPE_PORT_TABLE,
                                          true);
+       if (ret)
+               return ret;
  
-       return ret;
+       handle = &hdev->vport[0].nic;
+       return hclge_set_port_vlan_filter(handle, htons(ETH_P_8021Q), 0, false);
  }
  
  static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu)
@@ -3920,8 +3952,7 @@ static int hclge_init_client_instance(struct hnae3_client *client,
                                 goto err;
  
                         if (hdev->roce_client &&
-                           hnae_get_bit(hdev->ae_dev->flag,
-                                        HNAE_DEV_SUPPORT_ROCE_B)) {
+                           hnae3_dev_roce_supported(hdev)) {
                                 struct hnae3_client *rc = hdev->roce_client;
  
                                 ret = hclge_init_roce_base_info(vport);
@@ -3944,8 +3975,7 @@ static int hclge_init_client_instance(struct hnae3_client *client,
  
                         break;
                 case HNAE3_CLIENT_ROCE:
-                       if (hnae_get_bit(hdev->ae_dev->flag,
-                                        HNAE_DEV_SUPPORT_ROCE_B)) {
+                       if (hnae3_dev_roce_supported(hdev)) {
                                 hdev->roce_client = client;
                                 vport->roce.client = client;
                         }
@@ -4057,7 +4087,6 @@ static void hclge_pci_uninit(struct hclge_dev *hdev)
  static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
  {
         struct pci_dev *pdev = ae_dev->pdev;
-       const struct pci_device_id *id;
         struct hclge_dev *hdev;
         int ret;
  
@@ -4072,10 +4101,6 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
         hdev->ae_dev = ae_dev;
         ae_dev->priv = hdev;
  
-       id = pci_match_id(roce_pci_tbl, ae_dev->pdev);
-       if (id)
-               hnae_set_bit(ae_dev->flag, HNAE_DEV_SUPPORT_ROCE_B, 1);
-
         ret = hclge_pci_init(hdev);
         if (ret) {
                 dev_err(&pdev->dev, "PCI init failed\n");
@@ -4138,12 +4163,6 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
                 return ret;
         }
  
-       ret = hclge_rss_init_hw(hdev);
-       if (ret) {
-               dev_err(&pdev->dev, "Rss init fail, ret =%d\n", ret);
-               return  ret;
-       }
-
         ret = hclge_init_vlan_config(hdev);
         if (ret) {
                 dev_err(&pdev->dev, "VLAN init fail, ret =%d\n", ret);
@@ -4156,6 +4175,12 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
                 return ret;
         }
  
+       ret = hclge_rss_init_hw(hdev);
+       if (ret) {
+               dev_err(&pdev->dev, "Rss init fail, ret =%d\n", ret);
+               return ret;
+       }
+
         setup_timer(&hdev->service_timer, hclge_service_timer,
                     (unsigned long)hdev);
         INIT_WORK(&hdev->service_task, hclge_service_task);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h

index edb10ad..9fcfd93 100644 (file)
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -176,7 +176,6 @@ struct hclge_pg_info {
  struct hclge_tc_info {
         u8 tc_id;
         u8 tc_sch_mode;         /* 0: sp; 1: dwrr */
-       u8 up;
         u8 pgid;
         u32 bw_limit;
  };
@@ -197,6 +196,7 @@ struct hclge_tm_info {
         u8 num_tc;
         u8 num_pg;      /* It must be 1 if vNET-Base schd */
         u8 pg_dwrr[HCLGE_PG_NUM];
+       u8 prio_tc[HNAE3_MAX_USER_PRIO];
         struct hclge_pg_info pg_info[HCLGE_PG_NUM];
         struct hclge_tc_info tc_info[HNAE3_MAX_TC];
         enum hclge_fc_mode fc_mode;
@@ -477,6 +477,7 @@ struct hclge_vport {
         u8  rss_hash_key[HCLGE_RSS_KEY_SIZE]; /* User configured hash keys */
         /* User configured lookup table entries */
         u8  rss_indirection_tbl[HCLGE_RSS_IND_TBL_SIZE];
+       u16 alloc_rss_size;
  
         u16 qs_offset;
         u16 bw_limit;           /* VSI BW Limit (0 = disabled) */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c

index 1c577d2..73a75d7 100644 (file)
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -128,9 +128,7 @@ static int hclge_fill_pri_array(struct hclge_dev *hdev, u8 *pri, u8 pri_id)
  {
         u8 tc;
  
-       for (tc = 0; tc < hdev->tm_info.num_tc; tc++)
-               if (hdev->tm_info.tc_info[tc].up == pri_id)
-                       break;
+       tc = hdev->tm_info.prio_tc[pri_id];
  
         if (tc >= hdev->tm_info.num_tc)
                 return -EINVAL;
@@ -158,7 +156,7 @@ static int hclge_up_to_tc_map(struct hclge_dev *hdev)
  
         hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PRI_TO_TC_MAPPING, false);
  
-       for (pri_id = 0; pri_id < hdev->tm_info.num_tc; pri_id++) {
+       for (pri_id = 0; pri_id < HNAE3_MAX_USER_PRIO; pri_id++) {
                 ret = hclge_fill_pri_array(hdev, pri, pri_id);
                 if (ret)
                         return ret;
@@ -280,11 +278,11 @@ static int hclge_tm_pg_shapping_cfg(struct hclge_dev *hdev,
  
         shap_cfg_cmd->pg_id = pg_id;
  
-       hclge_tm_set_feild(shap_cfg_cmd->pg_shapping_para, IR_B, ir_b);
-       hclge_tm_set_feild(shap_cfg_cmd->pg_shapping_para, IR_U, ir_u);
-       hclge_tm_set_feild(shap_cfg_cmd->pg_shapping_para, IR_S, ir_s);
-       hclge_tm_set_feild(shap_cfg_cmd->pg_shapping_para, BS_B, bs_b);
-       hclge_tm_set_feild(shap_cfg_cmd->pg_shapping_para, BS_S, bs_s);
+       hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, IR_B, ir_b);
+       hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, IR_U, ir_u);
+       hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, IR_S, ir_s);
+       hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, BS_B, bs_b);
+       hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, BS_S, bs_s);
  
         return hclge_cmd_send(&hdev->hw, &desc, 1);
  }
@@ -307,11 +305,11 @@ static int hclge_tm_pri_shapping_cfg(struct hclge_dev *hdev,
  
         shap_cfg_cmd->pri_id = pri_id;
  
-       hclge_tm_set_feild(shap_cfg_cmd->pri_shapping_para, IR_B, ir_b);
-       hclge_tm_set_feild(shap_cfg_cmd->pri_shapping_para, IR_U, ir_u);
-       hclge_tm_set_feild(shap_cfg_cmd->pri_shapping_para, IR_S, ir_s);
-       hclge_tm_set_feild(shap_cfg_cmd->pri_shapping_para, BS_B, bs_b);
-       hclge_tm_set_feild(shap_cfg_cmd->pri_shapping_para, BS_S, bs_s);
+       hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, IR_B, ir_b);
+       hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, IR_U, ir_u);
+       hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, IR_S, ir_s);
+       hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, BS_B, bs_b);
+       hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, BS_S, bs_s);
  
         return hclge_cmd_send(&hdev->hw, &desc, 1);
  }
@@ -397,6 +395,7 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport)
                         kinfo->num_tqps / kinfo->num_tc);
         vport->qs_offset = hdev->tm_info.num_tc * vport->vport_id;
         vport->dwrr = 100;  /* 100 percent as init */
+       vport->alloc_rss_size = kinfo->rss_size;
  
         for (i = 0; i < kinfo->num_tc; i++) {
                 if (hdev->hw_tc_map & BIT(i)) {
@@ -404,16 +403,17 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport)
                         kinfo->tc_info[i].tqp_offset = i * kinfo->rss_size;
                         kinfo->tc_info[i].tqp_count = kinfo->rss_size;
                         kinfo->tc_info[i].tc = i;
-                       kinfo->tc_info[i].up = hdev->tm_info.tc_info[i].up;
                 } else {
                         /* Set to default queue if TC is disable */
                         kinfo->tc_info[i].enable = false;
                         kinfo->tc_info[i].tqp_offset = 0;
                         kinfo->tc_info[i].tqp_count = 1;
                         kinfo->tc_info[i].tc = 0;
-                       kinfo->tc_info[i].up = 0;
                 }
         }
+
+       memcpy(kinfo->prio_tc, hdev->tm_info.prio_tc,
+              FIELD_SIZEOF(struct hnae3_knic_private_info, prio_tc));
  }
  
  static void hclge_tm_vport_info_update(struct hclge_dev *hdev)
@@ -435,12 +435,15 @@ static void hclge_tm_tc_info_init(struct hclge_dev *hdev)
         for (i = 0; i < hdev->tm_info.num_tc; i++) {
                 hdev->tm_info.tc_info[i].tc_id = i;
                 hdev->tm_info.tc_info[i].tc_sch_mode = HCLGE_SCH_MODE_DWRR;
-               hdev->tm_info.tc_info[i].up = i;
                 hdev->tm_info.tc_info[i].pgid = 0;
                 hdev->tm_info.tc_info[i].bw_limit =
                         hdev->tm_info.pg_info[0].bw_limit;
         }
  
+       for (i = 0; i < HNAE3_MAX_USER_PRIO; i++)
+               hdev->tm_info.prio_tc[i] =
+                       (i >= hdev->tm_info.num_tc) ? 0 : i;
+
         hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
  }
  
@@ -976,6 +979,10 @@ int hclge_pause_setup_hw(struct hclge_dev *hdev)
         if (ret)
                 return ret;
  
+       /* Only DCB-supported dev supports qset back pressure setting */
+       if (!hnae3_dev_dcb_supported(hdev))
+               return 0;
+
         for (i = 0; i < hdev->tm_info.num_tc; i++) {
                 ret = hclge_tm_qs_bp_cfg(hdev, i);
                 if (ret)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h

index 7e67337..85158b0 100644 (file)
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -94,10 +94,10 @@ struct hclge_bp_to_qs_map_cmd {
         u32 rsvd1;
  };
  
-#define hclge_tm_set_feild(dest, string, val) \
+#define hclge_tm_set_field(dest, string, val) \
                         hnae_set_field((dest), (HCLGE_TM_SHAP_##string##_MSK), \
                                        (HCLGE_TM_SHAP_##string##_LSH), val)
-#define hclge_tm_get_feild(src, string) \
+#define hclge_tm_get_field(src, string) \
                         hnae_get_field((src), (HCLGE_TM_SHAP_##string##_MSK), \
                                        (HCLGE_TM_SHAP_##string##_LSH))
  
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c

index 1c3e294..35369e1 100644 (file)
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
@@ -41,11 +41,16 @@ static struct hnae3_client client;
  static const struct pci_device_id hns3_pci_tbl[] = {
         {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_GE), 0},
         {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE), 0},
-       {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA), 0},
-       {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA_MACSEC), 0},
-       {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA), 0},
-       {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC), 0},
-       {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), 0},
+       {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA),
+        HNAE3_DEV_SUPPORT_ROCE_DCB_BITS},
+       {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA_MACSEC),
+        HNAE3_DEV_SUPPORT_ROCE_DCB_BITS},
+       {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA),
+        HNAE3_DEV_SUPPORT_ROCE_DCB_BITS},
+       {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC),
+        HNAE3_DEV_SUPPORT_ROCE_DCB_BITS},
+       {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC),
+        HNAE3_DEV_SUPPORT_ROCE_DCB_BITS},
         /* required last entry */
         {0, }
  };
@@ -1348,6 +1353,7 @@ static int hns3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
         }
  
         ae_dev->pdev = pdev;
+       ae_dev->flag = ent->driver_data;
         ae_dev->dev_type = HNAE3_DEV_KNIC;
         pci_set_drvdata(pdev, ae_dev);
  
@@ -2705,10 +2711,11 @@ static void hns3_init_mac_addr(struct net_device *netdev)
                 eth_hw_addr_random(netdev);
                 dev_warn(priv->dev, "using random MAC address %pM\n",
                          netdev->dev_addr);
-               /* Also copy this new MAC address into hdev */
-               if (h->ae_algo->ops->set_mac_addr)
-                       h->ae_algo->ops->set_mac_addr(h, netdev->dev_addr);
         }
+
+       if (h->ae_algo->ops->set_mac_addr)
+               h->ae_algo->ops->set_mac_addr(h, netdev->dev_addr);
+
  }
  
  static void hns3_nic_set_priv_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c

index 2c74baa..fff09dc 100644 (file)
--- a/drivers/net/ethernet/ibm/emac/mal.c
+++ b/drivers/net/ethernet/ibm/emac/mal.c
@@ -402,7 +402,7 @@ static int mal_poll(struct napi_struct *napi, int budget)
         unsigned long flags;
  
         MAL_DBG2(mal, "poll(%d)" NL, budget);
- again:
+
         /* Process TX skbs */
         list_for_each(l, &mal->poll_list) {
                 struct mal_commac *mc =
@@ -451,7 +451,6 @@ static int mal_poll(struct napi_struct *napi, int budget)
                         spin_lock_irqsave(&mal->lock, flags);
                         mal_disable_eob_irq(mal);
                         spin_unlock_irqrestore(&mal->lock, flags);
-                       goto again;
                 }
                 mc->ops->poll_tx(mc->dev);
         }
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-ethtool.c b/drivers/net/ethernet/qualcomm/emac/emac-ethtool.c

index bbe2463..c8c6231 100644 (file)
--- a/drivers/net/ethernet/qualcomm/emac/emac-ethtool.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-ethtool.c
@@ -88,6 +88,8 @@ static void emac_set_msglevel(struct net_device *netdev, u32 data)
  static int emac_get_sset_count(struct net_device *netdev, int sset)
  {
         switch (sset) {
+       case ETH_SS_PRIV_FLAGS:
+               return 1;
         case ETH_SS_STATS:
                 return EMAC_STATS_LEN;
         default:
@@ -100,6 +102,10 @@ static void emac_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
         unsigned int i;
  
         switch (stringset) {
+       case ETH_SS_PRIV_FLAGS:
+               strcpy(data, "single-pause-mode");
+               break;
+
         case ETH_SS_STATS:
                 for (i = 0; i < EMAC_STATS_LEN; i++) {
                         strlcpy(data, emac_ethtool_stat_strings[i],
@@ -230,6 +236,27 @@ static int emac_get_regs_len(struct net_device *netdev)
         return EMAC_MAX_REG_SIZE * sizeof(u32);
  }
  
+#define EMAC_PRIV_ENABLE_SINGLE_PAUSE  BIT(0)
+
+static int emac_set_priv_flags(struct net_device *netdev, u32 flags)
+{
+       struct emac_adapter *adpt = netdev_priv(netdev);
+
+       adpt->single_pause_mode = !!(flags & EMAC_PRIV_ENABLE_SINGLE_PAUSE);
+
+       if (netif_running(netdev))
+               return emac_reinit_locked(adpt);
+
+       return 0;
+}
+
+static u32 emac_get_priv_flags(struct net_device *netdev)
+{
+       struct emac_adapter *adpt = netdev_priv(netdev);
+
+       return adpt->single_pause_mode ? EMAC_PRIV_ENABLE_SINGLE_PAUSE : 0;
+}
+
  static const struct ethtool_ops emac_ethtool_ops = {
         .get_link_ksettings = phy_ethtool_get_link_ksettings,
         .set_link_ksettings = phy_ethtool_set_link_ksettings,
@@ -253,6 +280,9 @@ static const struct ethtool_ops emac_ethtool_ops = {
  
         .get_regs_len    = emac_get_regs_len,
         .get_regs        = emac_get_regs,
+
+       .set_priv_flags = emac_set_priv_flags,
+       .get_priv_flags = emac_get_priv_flags,
  };
  
  void emac_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c

index bcd4708..0ea3ca0 100644 (file)
--- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c
@@ -551,6 +551,28 @@ static void emac_mac_start(struct emac_adapter *adpt)
         mac &= ~(HUGEN | VLAN_STRIP | TPAUSE | SIMR | HUGE | MULTI_ALL |
                  DEBUG_MODE | SINGLE_PAUSE_MODE);
  
+       /* Enable single-pause-frame mode if requested.
+        *
+        * If enabled, the EMAC will send a single pause frame when the RX
+        * queue is full.  This normally leads to packet loss because
+        * the pause frame disables the remote MAC only for 33ms (the quanta),
+        * and then the remote MAC continues sending packets even though
+        * the RX queue is still full.
+        *
+        * If disabled, the EMAC sends a pause frame every 31ms until the RX
+        * queue is no longer full.  Normally, this is the preferred
+        * method of operation.  However, when the system is hung (e.g.
+        * cores are halted), the EMAC interrupt handler is never called
+        * and so the RX queue fills up quickly and stays full.  The resuling
+        * non-stop "flood" of pause frames sometimes has the effect of
+        * disabling nearby switches.  In some cases, other nearby switches
+        * are also affected, shutting down the entire network.
+        *
+        * The user can enable or disable single-pause-frame mode
+        * via ethtool.
+        */
+       mac |= adpt->single_pause_mode ? SINGLE_PAUSE_MODE : 0;
+
         writel_relaxed(csr1, adpt->csr + EMAC_EMAC_WRAPPER_CSR1);
  
         writel_relaxed(mac, adpt->base + EMAC_MAC_CTRL);
diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c

index 60850bf..7595435 100644 (file)
--- a/drivers/net/ethernet/qualcomm/emac/emac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac.c
@@ -443,6 +443,9 @@ static void emac_init_adapter(struct emac_adapter *adpt)
  
         /* default to automatic flow control */
         adpt->automatic = true;
+
+       /* Disable single-pause-frame mode by default */
+       adpt->single_pause_mode = false;
  }
  
  /* Get the clock */
diff --git a/drivers/net/ethernet/qualcomm/emac/emac.h b/drivers/net/ethernet/qualcomm/emac/emac.h

index 8ee4ec6..d7c9f44 100644 (file)
--- a/drivers/net/ethernet/qualcomm/emac/emac.h
+++ b/drivers/net/ethernet/qualcomm/emac/emac.h
@@ -363,6 +363,9 @@ struct emac_adapter {
         bool                            tx_flow_control;
         bool                            rx_flow_control;
  
+       /* True == use single-pause-frame mode. */
+       bool                            single_pause_mode;
+
         /* Ring parameter */
         u8                              tpd_burst;
         u8                              rfd_burst;
diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c

index ca22f28..d24b47b 100644 (file)
--- a/drivers/net/ethernet/realtek/8139too.c
+++ b/drivers/net/ethernet/realtek/8139too.c
@@ -2135,11 +2135,12 @@ static int rtl8139_poll(struct napi_struct *napi, int budget)
         if (likely(RTL_R16(IntrStatus) & RxAckBits))
                 work_done += rtl8139_rx(dev, tp, budget);
  
-       if (work_done < budget && napi_complete_done(napi, work_done)) {
+       if (work_done < budget) {
                 unsigned long flags;
  
                 spin_lock_irqsave(&tp->lock, flags);
-               RTL_W16_F(IntrMask, rtl8139_intr_mask);
+               if (napi_complete_done(napi, work_done))
+                       RTL_W16_F(IntrMask, rtl8139_intr_mask);
                 spin_unlock_irqrestore(&tp->lock, flags);
         }
         spin_unlock(&tp->rx_lock);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c

index a366b37..8a280b4 100644 (file)
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -315,6 +315,7 @@ static int stmmac_dt_phy(struct plat_stmmacenet_data *plat,
                 { .compatible = "allwinner,sun8i-h3-emac" },
                 { .compatible = "allwinner,sun8i-v3s-emac" },
                 { .compatible = "allwinner,sun50i-a64-emac" },
+               {},
         };
  
         /* If phy-handle property is passed from DT, use it as the PHY */
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h

index d98cdfb..5176be7 100644 (file)
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -150,6 +150,8 @@ struct netvsc_device_info {
         u32  num_chn;
         u32  send_sections;
         u32  recv_sections;
+       u32  send_section_size;
+       u32  recv_section_size;
  };
  
  enum rndis_device_state {
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c

index a5511b7..8d5077f 100644 (file)
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -76,9 +76,6 @@ static struct netvsc_device *alloc_net_device(void)
         net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
         net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
  
-       net_device->recv_section_size = NETVSC_RECV_SECTION_SIZE;
-       net_device->send_section_size = NETVSC_SEND_SECTION_SIZE;
-
         init_completion(&net_device->channel_init_wait);
         init_waitqueue_head(&net_device->subchan_open);
         INIT_WORK(&net_device->subchan_work, rndis_set_subchannel);
@@ -262,7 +259,7 @@ static int netvsc_init_buf(struct hv_device *device,
         int ret = 0;
  
         /* Get receive buffer area. */
-       buf_size = device_info->recv_sections * net_device->recv_section_size;
+       buf_size = device_info->recv_sections * device_info->recv_section_size;
         buf_size = roundup(buf_size, PAGE_SIZE);
  
         net_device->recv_buf = vzalloc(buf_size);
@@ -344,7 +341,7 @@ static int netvsc_init_buf(struct hv_device *device,
                 goto cleanup;
  
         /* Now setup the send buffer. */
-       buf_size = device_info->send_sections * net_device->send_section_size;
+       buf_size = device_info->send_sections * device_info->send_section_size;
         buf_size = round_up(buf_size, PAGE_SIZE);
  
         net_device->send_buf = vzalloc(buf_size);
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c

index d4902ee..a32ae02 100644 (file)
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -848,7 +848,9 @@ static int netvsc_set_channels(struct net_device *net,
         device_info.num_chn = count;
         device_info.ring_size = ring_size;
         device_info.send_sections = nvdev->send_section_cnt;
+       device_info.send_section_size = nvdev->send_section_size;
         device_info.recv_sections = nvdev->recv_section_cnt;
+       device_info.recv_section_size = nvdev->recv_section_size;
  
         rndis_filter_device_remove(dev, nvdev);
  
@@ -963,7 +965,9 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
         device_info.ring_size = ring_size;
         device_info.num_chn = nvdev->num_chn;
         device_info.send_sections = nvdev->send_section_cnt;
+       device_info.send_section_size = nvdev->send_section_size;
         device_info.recv_sections = nvdev->recv_section_cnt;
+       device_info.recv_section_size = nvdev->recv_section_size;
  
         rndis_filter_device_remove(hdev, nvdev);
  
@@ -1485,7 +1489,9 @@ static int netvsc_set_ringparam(struct net_device *ndev,
         device_info.num_chn = nvdev->num_chn;
         device_info.ring_size = ring_size;
         device_info.send_sections = new_tx;
+       device_info.send_section_size = nvdev->send_section_size;
         device_info.recv_sections = new_rx;
+       device_info.recv_section_size = nvdev->recv_section_size;
  
         netif_device_detach(ndev);
         was_opened = rndis_filter_opened(nvdev);
@@ -1934,7 +1940,9 @@ static int netvsc_probe(struct hv_device *dev,
         device_info.ring_size = ring_size;
         device_info.num_chn = VRSS_CHANNEL_DEFAULT;
         device_info.send_sections = NETVSC_DEFAULT_TX;
+       device_info.send_section_size = NETVSC_SEND_SECTION_SIZE;
         device_info.recv_sections = NETVSC_DEFAULT_RX;
+       device_info.recv_section_size = NETVSC_RECV_SECTION_SIZE;
  
         nvdev = rndis_filter_device_add(dev, &device_info);
         if (IS_ERR(nvdev)) {
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig

index a9d16a3..cd931cf 100644 (file)
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -160,15 +160,6 @@ config MDIO_XGENE
  
  endif
  
-menuconfig PHYLIB
-       tristate "PHY Device support and infrastructure"
-       depends on NETDEVICES
-       select MDIO_DEVICE
-       help
-         Ethernet controllers are usually attached to PHY
-         devices.  This option provides infrastructure for
-         managing PHY devices.
-
  config PHYLINK
         tristate
         depends on NETDEVICES
@@ -179,6 +170,15 @@ config PHYLINK
           configuration links, PHYs, and Serdes links with MAC level
           autonegotiation modes.
  
+menuconfig PHYLIB
+       tristate "PHY Device support and infrastructure"
+       depends on NETDEVICES
+       select MDIO_DEVICE
+       help
+         Ethernet controllers are usually attached to PHY
+         devices.  This option provides infrastructure for
+         managing PHY devices.
+
  if PHYLIB
  
  config SWPHY
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c

index e842d2c..2b1e67b 100644 (file)
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -373,7 +373,8 @@ void phy_ethtool_ksettings_get(struct phy_device *phydev,
                 cmd->base.port = PORT_BNC;
         else
                 cmd->base.port = PORT_MII;
-
+       cmd->base.transceiver = phy_is_internal(phydev) ?
+                               XCVR_INTERNAL : XCVR_EXTERNAL;
         cmd->base.phy_address = phydev->mdio.addr;
         cmd->base.autoneg = phydev->autoneg;
         cmd->base.eth_tp_mdix_ctrl = phydev->mdix_ctrl;
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c

index 8cf0c59..67f25ac 100644 (file)
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -879,7 +879,7 @@ void phy_attached_print(struct phy_device *phydev, const char *fmt, ...)
  {
         const char *drv_name = phydev->drv ? phydev->drv->name : "unbound";
         char *irq_str;
-       char irq_num[4];
+       char irq_num[8];
  
         switch(phydev->irq) {
         case PHY_POLL:
diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c

index d15dd39..2e5150b 100644 (file)
--- a/drivers/net/phy/xilinx_gmii2rgmii.c
+++ b/drivers/net/phy/xilinx_gmii2rgmii.c
@@ -44,7 +44,7 @@ static int xgmiitorgmii_read_status(struct phy_device *phydev)
         priv->phy_drv->read_status(phydev);
  
         val = mdiobus_read(phydev->mdio.bus, priv->addr, XILINX_GMII2RGMII_REG);
-       val &= XILINX_GMII2RGMII_SPEED_MASK;
+       val &= ~XILINX_GMII2RGMII_SPEED_MASK;
  
         if (phydev->speed == SPEED_1000)
                 val |= BMCR_SPEED1000;
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c

index b99a7fb..0161f77 100644 (file)
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -1265,30 +1265,45 @@ static int lan78xx_ethtool_get_eeprom(struct net_device *netdev,
                                       struct ethtool_eeprom *ee, u8 *data)
  {
         struct lan78xx_net *dev = netdev_priv(netdev);
+       int ret;
+
+       ret = usb_autopm_get_interface(dev->intf);
+       if (ret)
+               return ret;
  
         ee->magic = LAN78XX_EEPROM_MAGIC;
  
-       return lan78xx_read_raw_eeprom(dev, ee->offset, ee->len, data);
+       ret = lan78xx_read_raw_eeprom(dev, ee->offset, ee->len, data);
+
+       usb_autopm_put_interface(dev->intf);
+
+       return ret;
  }
  
  static int lan78xx_ethtool_set_eeprom(struct net_device *netdev,
                                       struct ethtool_eeprom *ee, u8 *data)
  {
         struct lan78xx_net *dev = netdev_priv(netdev);
+       int ret;
+
+       ret = usb_autopm_get_interface(dev->intf);
+       if (ret)
+               return ret;
  
-       /* Allow entire eeprom update only */
-       if ((ee->magic == LAN78XX_EEPROM_MAGIC) &&
-           (ee->offset == 0) &&
-           (ee->len == 512) &&
-           (data[0] == EEPROM_INDICATOR))
-               return lan78xx_write_raw_eeprom(dev, ee->offset, ee->len, data);
+       /* Invalid EEPROM_INDICATOR at offset zero will result in a failure
+        * to load data from EEPROM
+        */
+       if (ee->magic == LAN78XX_EEPROM_MAGIC)
+               ret = lan78xx_write_raw_eeprom(dev, ee->offset, ee->len, data);
         else if ((ee->magic == LAN78XX_OTP_MAGIC) &&
                  (ee->offset == 0) &&
                  (ee->len == 512) &&
                  (data[0] == OTP_INDICATOR_1))
-               return lan78xx_write_raw_otp(dev, ee->offset, ee->len, data);
+               ret = lan78xx_write_raw_otp(dev, ee->offset, ee->len, data);
+
+       usb_autopm_put_interface(dev->intf);
  
-       return -EINVAL;
+       return ret;
  }
  
  static void lan78xx_get_strings(struct net_device *netdev, u32 stringset,
@@ -2434,7 +2449,6 @@ static int lan78xx_reset(struct lan78xx_net *dev)
         /* LAN7801 only has RGMII mode */
         if (dev->chipid == ID_REV_CHIP_ID_7801_)
                 buf &= ~MAC_CR_GMII_EN_;
-       buf |= MAC_CR_AUTO_DUPLEX_ | MAC_CR_AUTO_SPEED_;
         ret = lan78xx_write_reg(dev, MAC_CR, buf);
  
         ret = lan78xx_read_reg(dev, MAC_TX, &buf);
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c

index 1427a38..3e4d1e7 100644 (file)
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -1417,6 +1417,15 @@ static int btt_claim_class(struct device *dev)
                 struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
                 struct nd_namespace_index *nsindex;
  
+               /*
+                * If any of the DIMMs do not support labels the only
+                * possible BTT format is v1.
+                */
+               if (!ndd) {
+                       loop_bitmask = 0;
+                       break;
+               }
+
                 nsindex = to_namespace_index(ndd, ndd->ns_current);
                 if (nsindex == NULL)
                         loop_bitmask |= 1;
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c

index acc816b..bb2aad0 100644 (file)
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -134,8 +134,6 @@ static inline bool nvme_req_needs_retry(struct request *req)
                 return false;
         if (nvme_req(req)->status & NVME_SC_DNR)
                 return false;
-       if (jiffies - req->start_time >= req->timeout)
-               return false;
         if (nvme_req(req)->retries >= nvme_max_retries)
                 return false;
         return true;
@@ -2590,7 +2588,7 @@ static void nvme_async_event_work(struct work_struct *work)
                 container_of(work, struct nvme_ctrl, async_event_work);
  
         spin_lock_irq(&ctrl->lock);
-       while (ctrl->event_limit > 0) {
+       while (ctrl->state == NVME_CTRL_LIVE && ctrl->event_limit > 0) {
                 int aer_idx = --ctrl->event_limit;
  
                 spin_unlock_irq(&ctrl->lock);
@@ -2677,7 +2675,8 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
                 /*FALLTHRU*/
         case NVME_SC_ABORT_REQ:
                 ++ctrl->event_limit;
-               queue_work(nvme_wq, &ctrl->async_event_work);
+               if (ctrl->state == NVME_CTRL_LIVE)
+                       queue_work(nvme_wq, &ctrl->async_event_work);
                 break;
         default:
                 break;
@@ -2692,7 +2691,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
                 nvme_queue_scan(ctrl);
                 break;
         case NVME_AER_NOTICE_FW_ACT_STARTING:
-               schedule_work(&ctrl->fw_act_work);
+               queue_work(nvme_wq, &ctrl->fw_act_work);
                 break;
         default:
                 dev_warn(ctrl->device, "async event result %08x\n", result);
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c

index 4730775..555c976 100644 (file)
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -565,6 +565,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
         opts->queue_size = NVMF_DEF_QUEUE_SIZE;
         opts->nr_io_queues = num_online_cpus();
         opts->reconnect_delay = NVMF_DEF_RECONNECT_DELAY;
+       opts->kato = NVME_DEFAULT_KATO;
  
         options = o = kstrdup(buf, GFP_KERNEL);
         if (!options)
@@ -655,21 +656,22 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
                                 goto out;
                         }
  
-                       if (opts->discovery_nqn) {
-                               pr_err("Discovery controllers cannot accept keep_alive_tmo != 0\n");
-                               ret = -EINVAL;
-                               goto out;
-                       }
-
                         if (token < 0) {
                                 pr_err("Invalid keep_alive_tmo %d\n", token);
                                 ret = -EINVAL;
                                 goto out;
-                       } else if (token == 0) {
+                       } else if (token == 0 && !opts->discovery_nqn) {
                                 /* Allowed for debug */
                                 pr_warn("keep_alive_tmo 0 won't execute keep alives!!!\n");
                         }
                         opts->kato = token;
+
+                       if (opts->discovery_nqn && opts->kato) {
+                               pr_err("Discovery controllers cannot accept KATO != 0\n");
+                               ret = -EINVAL;
+                               goto out;
+                       }
+
                         break;
                 case NVMF_OPT_CTRL_LOSS_TMO:
                         if (match_int(args, &token)) {
@@ -762,8 +764,6 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
         uuid_copy(&opts->host->id, &hostid);
  
  out:
-       if (!opts->discovery_nqn && !opts->kato)
-               opts->kato = NVME_DEFAULT_KATO;
         kfree(options);
         return ret;
  }
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c

index d2e882c..af075e9 100644 (file)
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1376,7 +1376,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
         if (atomic_read(&op->state) == FCPOP_STATE_ABORTED)
                 status = cpu_to_le16((NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1);
         else if (freq->status)
-               status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1);
+               status = cpu_to_le16(NVME_SC_INTERNAL << 1);
  
         /*
          * For the linux implementation, if we have an unsuccesful
@@ -1404,7 +1404,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
                  */
                 if (freq->transferred_length !=
                         be32_to_cpu(op->cmd_iu.data_len)) {
-                       status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1);
+                       status = cpu_to_le16(NVME_SC_INTERNAL << 1);
                         goto done;
                 }
                 result.u64 = 0;
@@ -1421,7 +1421,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
                                         freq->transferred_length ||
                              op->rsp_iu.status_code ||
                              sqe->common.command_id != cqe->command_id)) {
-                       status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1);
+                       status = cpu_to_le16(NVME_SC_INTERNAL << 1);
                         goto done;
                 }
                 result = cqe->result;
@@ -1429,7 +1429,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
                 break;
  
         default:
-               status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1);
+               status = cpu_to_le16(NVME_SC_INTERNAL << 1);
                 goto done;
         }
  
@@ -1989,16 +1989,17 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
          * as well as those by FC-NVME spec.
          */
         WARN_ON_ONCE(sqe->common.metadata);
-       WARN_ON_ONCE(sqe->common.dptr.prp1);
-       WARN_ON_ONCE(sqe->common.dptr.prp2);
         sqe->common.flags |= NVME_CMD_SGL_METABUF;
  
         /*
-        * format SQE DPTR field per FC-NVME rules
-        *    type=data block descr; subtype=offset;
-        *    offset is currently 0.
+        * format SQE DPTR field per FC-NVME rules:
+        *    type=0x5     Transport SGL Data Block Descriptor
+        *    subtype=0xA  Transport-specific value
+        *    address=0
+        *    length=length of the data series
          */
-       sqe->rw.dptr.sgl.type = NVME_SGL_FMT_OFFSET;
+       sqe->rw.dptr.sgl.type = (NVME_TRANSPORT_SGL_DATA_DESC << 4) |
+                                       NVME_SGL_FMT_TRANSPORT_A;
         sqe->rw.dptr.sgl.length = cpu_to_le32(data_len);
         sqe->rw.dptr.sgl.addr = 0;
  
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c

index 4a21213..cb73bc8 100644 (file)
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -24,6 +24,7 @@
  #include <linux/mm.h>
  #include <linux/module.h>
  #include <linux/mutex.h>
+#include <linux/once.h>
  #include <linux/pci.h>
  #include <linux/poison.h>
  #include <linux/t10-pi.h>
@@ -540,6 +541,20 @@ static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi)
  }
  #endif
  
+static void nvme_print_sgl(struct scatterlist *sgl, int nents)
+{
+       int i;
+       struct scatterlist *sg;
+
+       for_each_sg(sgl, sg, nents, i) {
+               dma_addr_t phys = sg_phys(sg);
+               pr_warn("sg[%d] phys_addr:%pad offset:%d length:%d "
+                       "dma_address:%pad dma_length:%d\n",
+                       i, &phys, sg->offset, sg->length, &sg_dma_address(sg),
+                       sg_dma_len(sg));
+       }
+}
+
  static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req)
  {
         struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
@@ -622,19 +637,10 @@ static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req)
         return BLK_STS_OK;
  
   bad_sgl:
-       if (WARN_ONCE(1, "Invalid SGL for payload:%d nents:%d\n",
-                               blk_rq_payload_bytes(req), iod->nents)) {
-               for_each_sg(iod->sg, sg, iod->nents, i) {
-                       dma_addr_t phys = sg_phys(sg);
-                       pr_warn("sg[%d] phys_addr:%pad offset:%d length:%d "
-                              "dma_address:%pad dma_length:%d\n", i, &phys,
-                                       sg->offset, sg->length,
-                                       &sg_dma_address(sg),
-                                       sg_dma_len(sg));
-               }
-       }
+       WARN(DO_ONCE(nvme_print_sgl, iod->sg, iod->nents),
+                       "Invalid SGL for payload:%d nents:%d\n",
+                       blk_rq_payload_bytes(req), iod->nents);
         return BLK_STS_IOERR;
-
  }
  
  static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
@@ -1313,11 +1319,11 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
         if (result < 0)
                 goto release_cq;
  
+       nvme_init_queue(nvmeq, qid);
         result = queue_request_irq(nvmeq);
         if (result < 0)
                 goto release_sq;
  
-       nvme_init_queue(nvmeq, qid);
         return result;
  
   release_sq:
@@ -1464,6 +1470,7 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev)
                 return result;
  
         nvmeq->cq_vector = 0;
+       nvme_init_queue(nvmeq, 0);
         result = queue_request_irq(nvmeq);
         if (result) {
                 nvmeq->cq_vector = -1;
@@ -2156,7 +2163,6 @@ static void nvme_reset_work(struct work_struct *work)
         if (result)
                 goto out;
  
-       nvme_init_queue(dev->queues[0], 0);
         result = nvme_alloc_admin_tags(dev);
         if (result)
                 goto out;
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c

index 5898300..92a03ff 100644 (file)
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -942,7 +942,12 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
         }
  
         changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
-       WARN_ON_ONCE(!changed);
+       if (!changed) {
+               /* state change failure is ok if we're in DELETING state */
+               WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING);
+               return;
+       }
+
         ctrl->ctrl.nr_reconnects = 0;
  
         nvme_start_ctrl(&ctrl->ctrl);
@@ -962,7 +967,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
         struct nvme_rdma_ctrl *ctrl = container_of(work,
                         struct nvme_rdma_ctrl, err_work);
  
-       nvme_stop_ctrl(&ctrl->ctrl);
+       nvme_stop_keep_alive(&ctrl->ctrl);
  
         if (ctrl->ctrl.queue_count > 1) {
                 nvme_stop_queues(&ctrl->ctrl);
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c

index 7c23eaf..1b208be 100644 (file)
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -390,10 +390,10 @@ static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
         if (status)
                 nvmet_set_status(req, status);
  
-       /* XXX: need to fill in something useful for sq_head */
-       req->rsp->sq_head = 0;
-       if (likely(req->sq)) /* may happen during early failure */
-               req->rsp->sq_id = cpu_to_le16(req->sq->qid);
+       if (req->sq->size)
+               req->sq->sqhd = (req->sq->sqhd + 1) % req->sq->size;
+       req->rsp->sq_head = cpu_to_le16(req->sq->sqhd);
+       req->rsp->sq_id = cpu_to_le16(req->sq->qid);
         req->rsp->command_id = req->cmd->common.command_id;
  
         if (req->ns)
@@ -420,6 +420,7 @@ void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq,
  void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
                 u16 qid, u16 size)
  {
+       sq->sqhd = 0;
         sq->qid = qid;
         sq->size = size;
  
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c

index 859a667..db3bf6b 100644 (file)
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -109,9 +109,14 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req)
                 pr_warn("queue already connected!\n");
                 return NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR;
         }
+       if (!sqsize) {
+               pr_warn("queue size zero!\n");
+               return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
+       }
  
-       nvmet_cq_setup(ctrl, req->cq, qid, sqsize);
-       nvmet_sq_setup(ctrl, req->sq, qid, sqsize);
+       /* note: convert queue size from 0's-based value to 1's-based value */
+       nvmet_cq_setup(ctrl, req->cq, qid, sqsize + 1);
+       nvmet_sq_setup(ctrl, req->sq, qid, sqsize + 1);
         return 0;
  }
  
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c

index 421e43b..58e010b 100644 (file)
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -148,7 +148,7 @@ struct nvmet_fc_tgt_assoc {
         u32                             a_id;
         struct nvmet_fc_tgtport         *tgtport;
         struct list_head                a_list;
-       struct nvmet_fc_tgt_queue       *queues[NVMET_NR_QUEUES];
+       struct nvmet_fc_tgt_queue       *queues[NVMET_NR_QUEUES + 1];
         struct kref                     ref;
  };
  
@@ -608,7 +608,7 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
         unsigned long flags;
         int ret;
  
-       if (qid >= NVMET_NR_QUEUES)
+       if (qid > NVMET_NR_QUEUES)
                 return NULL;
  
         queue = kzalloc((sizeof(*queue) +
@@ -783,6 +783,9 @@ nvmet_fc_find_target_queue(struct nvmet_fc_tgtport *tgtport,
         u16 qid = nvmet_fc_getqueueid(connection_id);
         unsigned long flags;
  
+       if (qid > NVMET_NR_QUEUES)
+               return NULL;
+
         spin_lock_irqsave(&tgtport->lock, flags);
         list_for_each_entry(assoc, &tgtport->assoc_list, a_list) {
                 if (association_id == assoc->association_id) {
@@ -888,7 +891,7 @@ nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc)
         int i;
  
         spin_lock_irqsave(&tgtport->lock, flags);
-       for (i = NVMET_NR_QUEUES - 1; i >= 0; i--) {
+       for (i = NVMET_NR_QUEUES; i >= 0; i--) {
                 queue = assoc->queues[i];
                 if (queue) {
                         if (!nvmet_fc_tgt_q_get(queue))
@@ -1910,8 +1913,7 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport,
                         spin_lock_irqsave(&fod->flock, flags);
                         fod->writedataactive = false;
                         spin_unlock_irqrestore(&fod->flock, flags);
-                       nvmet_req_complete(&fod->req,
-                                       NVME_SC_FC_TRANSPORT_ERROR);
+                       nvmet_req_complete(&fod->req, NVME_SC_INTERNAL);
                 } else /* NVMET_FCOP_READDATA or NVMET_FCOP_READDATA_RSP */ {
                         fcpreq->fcp_error = ret;
                         fcpreq->transferred_length = 0;
@@ -1929,8 +1931,7 @@ __nvmet_fc_fod_op_abort(struct nvmet_fc_fcp_iod *fod, bool abort)
         /* if in the middle of an io and we need to tear down */
         if (abort) {
                 if (fcpreq->op == NVMET_FCOP_WRITEDATA) {
-                       nvmet_req_complete(&fod->req,
-                                       NVME_SC_FC_TRANSPORT_ERROR);
+                       nvmet_req_complete(&fod->req, NVME_SC_INTERNAL);
                         return true;
                 }
  
@@ -1968,8 +1969,7 @@ nvmet_fc_fod_op_done(struct nvmet_fc_fcp_iod *fod)
                         fod->abort = true;
                         spin_unlock(&fod->flock);
  
-                       nvmet_req_complete(&fod->req,
-                                       NVME_SC_FC_TRANSPORT_ERROR);
+                       nvmet_req_complete(&fod->req, NVME_SC_INTERNAL);
                         return;
                 }
  
@@ -2533,13 +2533,17 @@ nvmet_fc_remove_port(struct nvmet_port *port)
  {
         struct nvmet_fc_tgtport *tgtport = port->priv;
         unsigned long flags;
+       bool matched = false;
  
         spin_lock_irqsave(&nvmet_fc_tgtlock, flags);
         if (tgtport->port == port) {
-               nvmet_fc_tgtport_put(tgtport);
+               matched = true;
                 tgtport->port = NULL;
         }
         spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags);
+
+       if (matched)
+               nvmet_fc_tgtport_put(tgtport);
  }
  
  static struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = {
diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c

index 1cb9847..7b75d9d 100644 (file)
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -224,8 +224,6 @@ struct fcloop_nport {
         struct fcloop_lport *lport;
         struct list_head nport_list;
         struct kref ref;
-       struct completion rport_unreg_done;
-       struct completion tport_unreg_done;
         u64 node_name;
         u64 port_name;
         u32 port_role;
@@ -576,7 +574,7 @@ fcloop_tgt_fcp_abort(struct nvmet_fc_target_port *tgtport,
         tfcp_req->aborted = true;
         spin_unlock(&tfcp_req->reqlock);
  
-       tfcp_req->status = NVME_SC_FC_TRANSPORT_ABORTED;
+       tfcp_req->status = NVME_SC_INTERNAL;
  
         /*
          * nothing more to do. If io wasn't active, the transport should
@@ -630,6 +628,32 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport,
         schedule_work(&inireq->iniwork);
  }
  
+static void
+fcloop_nport_free(struct kref *ref)
+{
+       struct fcloop_nport *nport =
+               container_of(ref, struct fcloop_nport, ref);
+       unsigned long flags;
+
+       spin_lock_irqsave(&fcloop_lock, flags);
+       list_del(&nport->nport_list);
+       spin_unlock_irqrestore(&fcloop_lock, flags);
+
+       kfree(nport);
+}
+
+static void
+fcloop_nport_put(struct fcloop_nport *nport)
+{
+       kref_put(&nport->ref, fcloop_nport_free);
+}
+
+static int
+fcloop_nport_get(struct fcloop_nport *nport)
+{
+       return kref_get_unless_zero(&nport->ref);
+}
+
  static void
  fcloop_localport_delete(struct nvme_fc_local_port *localport)
  {
@@ -644,8 +668,7 @@ fcloop_remoteport_delete(struct nvme_fc_remote_port *remoteport)
  {
         struct fcloop_rport *rport = remoteport->private;
  
-       /* release any threads waiting for the unreg to complete */
-       complete(&rport->nport->rport_unreg_done);
+       fcloop_nport_put(rport->nport);
  }
  
  static void
@@ -653,8 +676,7 @@ fcloop_targetport_delete(struct nvmet_fc_target_port *targetport)
  {
         struct fcloop_tport *tport = targetport->private;
  
-       /* release any threads waiting for the unreg to complete */
-       complete(&tport->nport->tport_unreg_done);
+       fcloop_nport_put(tport->nport);
  }
  
  #define        FCLOOP_HW_QUEUES                4
@@ -722,6 +744,7 @@ fcloop_create_local_port(struct device *dev, struct device_attribute *attr,
                 goto out_free_opts;
         }
  
+       memset(&pinfo, 0, sizeof(pinfo));
         pinfo.node_name = opts->wwnn;
         pinfo.port_name = opts->wwpn;
         pinfo.port_role = opts->roles;
@@ -804,32 +827,6 @@ fcloop_delete_local_port(struct device *dev, struct device_attribute *attr,
         return ret ? ret : count;
  }
  
-static void
-fcloop_nport_free(struct kref *ref)
-{
-       struct fcloop_nport *nport =
-               container_of(ref, struct fcloop_nport, ref);
-       unsigned long flags;
-
-       spin_lock_irqsave(&fcloop_lock, flags);
-       list_del(&nport->nport_list);
-       spin_unlock_irqrestore(&fcloop_lock, flags);
-
-       kfree(nport);
-}
-
-static void
-fcloop_nport_put(struct fcloop_nport *nport)
-{
-       kref_put(&nport->ref, fcloop_nport_free);
-}
-
-static int
-fcloop_nport_get(struct fcloop_nport *nport)
-{
-       return kref_get_unless_zero(&nport->ref);
-}
-
  static struct fcloop_nport *
  fcloop_alloc_nport(const char *buf, size_t count, bool remoteport)
  {
@@ -938,6 +935,7 @@ fcloop_create_remote_port(struct device *dev, struct device_attribute *attr,
         if (!nport)
                 return -EIO;
  
+       memset(&pinfo, 0, sizeof(pinfo));
         pinfo.node_name = nport->node_name;
         pinfo.port_name = nport->port_name;
         pinfo.port_role = nport->port_role;
@@ -979,24 +977,12 @@ __unlink_remote_port(struct fcloop_nport *nport)
  }
  
  static int
-__wait_remoteport_unreg(struct fcloop_nport *nport, struct fcloop_rport *rport)
+__remoteport_unreg(struct fcloop_nport *nport, struct fcloop_rport *rport)
  {
-       int ret;
-
         if (!rport)
                 return -EALREADY;
  
-       init_completion(&nport->rport_unreg_done);
-
-       ret = nvme_fc_unregister_remoteport(rport->remoteport);
-       if (ret)
-               return ret;
-
-       wait_for_completion(&nport->rport_unreg_done);
-
-       fcloop_nport_put(nport);
-
-       return ret;
+       return nvme_fc_unregister_remoteport(rport->remoteport);
  }
  
  static ssize_t
@@ -1029,7 +1015,7 @@ fcloop_delete_remote_port(struct device *dev, struct device_attribute *attr,
         if (!nport)
                 return -ENOENT;
  
-       ret = __wait_remoteport_unreg(nport, rport);
+       ret = __remoteport_unreg(nport, rport);
  
         return ret ? ret : count;
  }
@@ -1086,24 +1072,12 @@ __unlink_target_port(struct fcloop_nport *nport)
  }
  
  static int
-__wait_targetport_unreg(struct fcloop_nport *nport, struct fcloop_tport *tport)
+__targetport_unreg(struct fcloop_nport *nport, struct fcloop_tport *tport)
  {
-       int ret;
-
         if (!tport)
                 return -EALREADY;
  
-       init_completion(&nport->tport_unreg_done);
-
-       ret = nvmet_fc_unregister_targetport(tport->targetport);
-       if (ret)
-               return ret;
-
-       wait_for_completion(&nport->tport_unreg_done);
-
-       fcloop_nport_put(nport);
-
-       return ret;
+       return nvmet_fc_unregister_targetport(tport->targetport);
  }
  
  static ssize_t
@@ -1136,7 +1110,7 @@ fcloop_delete_target_port(struct device *dev, struct device_attribute *attr,
         if (!nport)
                 return -ENOENT;
  
-       ret = __wait_targetport_unreg(nport, tport);
+       ret = __targetport_unreg(nport, tport);
  
         return ret ? ret : count;
  }
@@ -1223,11 +1197,11 @@ static void __exit fcloop_exit(void)
  
                 spin_unlock_irqrestore(&fcloop_lock, flags);
  
-               ret = __wait_targetport_unreg(nport, tport);
+               ret = __targetport_unreg(nport, tport);
                 if (ret)
                         pr_warn("%s: Failed deleting target port\n", __func__);
  
-               ret = __wait_remoteport_unreg(nport, rport);
+               ret = __remoteport_unreg(nport, rport);
                 if (ret)
                         pr_warn("%s: Failed deleting remote port\n", __func__);
  
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h

index 7d261ab..7b8e20a 100644 (file)
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -74,6 +74,7 @@ struct nvmet_sq {
         struct percpu_ref       ref;
         u16                     qid;
         u16                     size;
+       u16                     sqhd;
         struct completion       free_done;
         struct completion       confirm_done;
  };
diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c

index 4ddc6e8..f9308c2 100644 (file)
--- a/drivers/pci/endpoint/functions/pci-epf-test.c
+++ b/drivers/pci/endpoint/functions/pci-epf-test.c
@@ -251,9 +251,8 @@ err:
         return ret;
  }
  
-static void pci_epf_test_raise_irq(struct pci_epf_test *epf_test)
+static void pci_epf_test_raise_irq(struct pci_epf_test *epf_test, u8 irq)
  {
-       u8 irq;
         u8 msi_count;
         struct pci_epf *epf = epf_test->epf;
         struct pci_epc *epc = epf->epc;
@@ -262,7 +261,6 @@ static void pci_epf_test_raise_irq(struct pci_epf_test *epf_test)
  
         reg->status |= STATUS_IRQ_RAISED;
         msi_count = pci_epc_get_msi(epc);
-       irq = (reg->command & MSI_NUMBER_MASK) >> MSI_NUMBER_SHIFT;
         if (irq > msi_count || msi_count <= 0)
                 pci_epc_raise_irq(epc, PCI_EPC_IRQ_LEGACY, 0);
         else
@@ -289,6 +287,8 @@ static void pci_epf_test_cmd_handler(struct work_struct *work)
         reg->command = 0;
         reg->status = 0;
  
+       irq = (command & MSI_NUMBER_MASK) >> MSI_NUMBER_SHIFT;
+
         if (command & COMMAND_RAISE_LEGACY_IRQ) {
                 reg->status = STATUS_IRQ_RAISED;
                 pci_epc_raise_irq(epc, PCI_EPC_IRQ_LEGACY, 0);
@@ -301,7 +301,7 @@ static void pci_epf_test_cmd_handler(struct work_struct *work)
                         reg->status |= STATUS_WRITE_FAIL;
                 else
                         reg->status |= STATUS_WRITE_SUCCESS;
-               pci_epf_test_raise_irq(epf_test);
+               pci_epf_test_raise_irq(epf_test, irq);
                 goto reset_handler;
         }
  
@@ -311,7 +311,7 @@ static void pci_epf_test_cmd_handler(struct work_struct *work)
                         reg->status |= STATUS_READ_SUCCESS;
                 else
                         reg->status |= STATUS_READ_FAIL;
-               pci_epf_test_raise_irq(epf_test);
+               pci_epf_test_raise_irq(epf_test, irq);
                 goto reset_handler;
         }
  
@@ -321,13 +321,12 @@ static void pci_epf_test_cmd_handler(struct work_struct *work)
                         reg->status |= STATUS_COPY_SUCCESS;
                 else
                         reg->status |= STATUS_COPY_FAIL;
-               pci_epf_test_raise_irq(epf_test);
+               pci_epf_test_raise_irq(epf_test, irq);
                 goto reset_handler;
         }
  
         if (command & COMMAND_RAISE_MSI_IRQ) {
                 msi_count = pci_epc_get_msi(epc);
-               irq = (command & MSI_NUMBER_MASK) >> MSI_NUMBER_SHIFT;
                 if (irq > msi_count || msi_count <= 0)
                         goto reset_handler;
                 reg->status = STATUS_IRQ_RAISED;
diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c

index 0a9b787..3303dd8 100644 (file)
--- a/drivers/perf/arm_pmu_acpi.c
+++ b/drivers/perf/arm_pmu_acpi.c
@@ -235,6 +235,7 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn)
                 ret = armpmu_register(pmu);
                 if (ret) {
                         pr_warn("Failed to register PMU for CPU%d\n", cpu);
+                       kfree(pmu->name);
                         return ret;
                 }
         }
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c

index ea19b4f..29f35e2 100644 (file)
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -1644,7 +1644,9 @@ void dasd_generic_handle_state_change(struct dasd_device *device)
         dasd_schedule_device_bh(device);
         if (device->block) {
                 dasd_schedule_block_bh(device->block);
-               blk_mq_run_hw_queues(device->block->request_queue, true);
+               if (device->block->request_queue)
+                       blk_mq_run_hw_queues(device->block->request_queue,
+                                            true);
         }
  }
  EXPORT_SYMBOL_GPL(dasd_generic_handle_state_change);
@@ -3759,7 +3761,9 @@ int dasd_generic_path_operational(struct dasd_device *device)
         dasd_schedule_device_bh(device);
         if (device->block) {
                 dasd_schedule_block_bh(device->block);
-               blk_mq_run_hw_queues(device->block->request_queue, true);
+               if (device->block->request_queue)
+                       blk_mq_run_hw_queues(device->block->request_queue,
+                                            true);
                 }
  
         if (!device->stopped)
@@ -4025,7 +4029,9 @@ int dasd_generic_restore_device(struct ccw_device *cdev)
  
         if (device->block) {
                 dasd_schedule_block_bh(device->block);
-               blk_mq_run_hw_queues(device->block->request_queue, true);
+               if (device->block->request_queue)
+                       blk_mq_run_hw_queues(device->block->request_queue,
+                                            true);
         }
  
         clear_bit(DASD_FLAG_SUSPENDED, &device->flags);
diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c

index 2e7fd96..eb51893 100644 (file)
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -249,7 +249,7 @@ static void scm_request_requeue(struct scm_request *scmrq)
  static void scm_request_finish(struct scm_request *scmrq)
  {
         struct scm_blk_dev *bdev = scmrq->bdev;
-       int *error;
+       blk_status_t *error;
         int i;
  
         for (i = 0; i < nr_requests_per_io && scmrq->request[i]; i++) {
@@ -415,7 +415,7 @@ void scm_blk_irq(struct scm_device *scmdev, void *data, blk_status_t error)
  
  static void scm_blk_request_done(struct request *req)
  {
-       int *error = blk_mq_rq_to_pdu(req);
+       blk_status_t *error = blk_mq_rq_to_pdu(req);
  
         blk_mq_end_request(req, *error);
  }
@@ -450,7 +450,7 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev)
         atomic_set(&bdev->queued_reqs, 0);
  
         bdev->tag_set.ops = &scm_mq_ops;
-       bdev->tag_set.cmd_size = sizeof(int);
+       bdev->tag_set.cmd_size = sizeof(blk_status_t);
         bdev->tag_set.nr_hw_queues = nr_requests;
         bdev->tag_set.queue_depth = nr_requests_per_io * nr_requests;
         bdev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c

index 489b583..e5c32f4 100644 (file)
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -1225,10 +1225,16 @@ static int device_is_disconnected(struct ccw_device *cdev)
  static int recovery_check(struct device *dev, void *data)
  {
         struct ccw_device *cdev = to_ccwdev(dev);
+       struct subchannel *sch;
         int *redo = data;
  
         spin_lock_irq(cdev->ccwlock);
         switch (cdev->private->state) {
+       case DEV_STATE_ONLINE:
+               sch = to_subchannel(cdev->dev.parent);
+               if ((sch->schib.pmcw.pam & sch->opm) == sch->vpm)
+                       break;
+               /* fall through */
         case DEV_STATE_DISCONNECTED:
                 CIO_MSG_EVENT(3, "recovery: trigger 0.%x.%04x\n",
                               cdev->private->dev_id.ssid,
@@ -1260,7 +1266,7 @@ static void recovery_work_func(struct work_struct *unused)
                 }
                 spin_unlock_irq(&recovery_lock);
         } else
-               CIO_MSG_EVENT(4, "recovery: end\n");
+               CIO_MSG_EVENT(3, "recovery: end\n");
  }
  
  static DECLARE_WORK(recovery_work, recovery_work_func);
@@ -1274,11 +1280,11 @@ static void recovery_func(unsigned long data)
         schedule_work(&recovery_work);
  }
  
-static void ccw_device_schedule_recovery(void)
+void ccw_device_schedule_recovery(void)
  {
         unsigned long flags;
  
-       CIO_MSG_EVENT(4, "recovery: schedule\n");
+       CIO_MSG_EVENT(3, "recovery: schedule\n");
         spin_lock_irqsave(&recovery_lock, flags);
         if (!timer_pending(&recovery_timer) || (recovery_phase != 0)) {
                 recovery_phase = 0;
diff --git a/drivers/s390/cio/device.h b/drivers/s390/cio/device.h

index ec497af..69cb70f 100644 (file)
--- a/drivers/s390/cio/device.h
+++ b/drivers/s390/cio/device.h
@@ -134,6 +134,7 @@ void ccw_device_set_disconnected(struct ccw_device *cdev);
  void ccw_device_set_notoper(struct ccw_device *cdev);
  
  void ccw_device_set_timeout(struct ccw_device *, int);
+void ccw_device_schedule_recovery(void);
  
  /* Channel measurement facility related */
  void retry_set_schib(struct ccw_device *cdev);
diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c

index 12016e3..f98ea67 100644 (file)
--- a/drivers/s390/cio/device_fsm.c
+++ b/drivers/s390/cio/device_fsm.c
@@ -476,6 +476,17 @@ static void create_fake_irb(struct irb *irb, int type)
         }
  }
  
+static void ccw_device_handle_broken_paths(struct ccw_device *cdev)
+{
+       struct subchannel *sch = to_subchannel(cdev->dev.parent);
+       u8 broken_paths = (sch->schib.pmcw.pam & sch->opm) ^ sch->vpm;
+
+       if (broken_paths && (cdev->private->path_broken_mask != broken_paths))
+               ccw_device_schedule_recovery();
+
+       cdev->private->path_broken_mask = broken_paths;
+}
+
  void ccw_device_verify_done(struct ccw_device *cdev, int err)
  {
         struct subchannel *sch;
@@ -508,6 +519,7 @@ callback:
                         memset(&cdev->private->irb, 0, sizeof(struct irb));
                 }
                 ccw_device_report_path_events(cdev);
+               ccw_device_handle_broken_paths(cdev);
                 break;
         case -ETIME:
         case -EUSERS:
diff --git a/drivers/s390/cio/io_sch.h b/drivers/s390/cio/io_sch.h

index 220f491..9a1b56b 100644 (file)
--- a/drivers/s390/cio/io_sch.h
+++ b/drivers/s390/cio/io_sch.h
@@ -131,6 +131,8 @@ struct ccw_device_private {
                                    not operable */
         u8 path_gone_mask;      /* mask of paths, that became unavailable */
         u8 path_new_mask;       /* mask of paths, that became available */
+       u8 path_broken_mask;    /* mask of paths, which were found to be
+                                  unusable */
         struct {
                 unsigned int fast:1;    /* post with "channel end" */
                 unsigned int repall:1;  /* report every interrupt status */
diff --git a/drivers/scsi/arm/acornscsi.c b/drivers/scsi/arm/acornscsi.c

index 690816f..421fe86 100644 (file)
--- a/drivers/scsi/arm/acornscsi.c
+++ b/drivers/scsi/arm/acornscsi.c
@@ -2725,9 +2725,9 @@ int acornscsi_abort(struct scsi_cmnd *SCpnt)
   * Params   : SCpnt  - command causing reset
   * Returns  : one of SCSI_RESET_ macros
   */
-int acornscsi_host_reset(struct Scsi_Host *shpnt)
+int acornscsi_host_reset(struct scsi_cmnd *SCpnt)
  {
-       AS_Host *host = (AS_Host *)shpnt->hostdata;
+       AS_Host *host = (AS_Host *)SCpnt->device->host->hostdata;
         struct scsi_cmnd *SCptr;
      
      host->stats.resets += 1;
@@ -2741,7 +2741,7 @@ int acornscsi_host_reset(struct Scsi_Host *shpnt)
  
         printk(KERN_WARNING "acornscsi_reset: ");
         print_sbic_status(asr, ssr, host->scsi.phase);
-       for (devidx = 0; devidx < 9; devidx ++) {
+       for (devidx = 0; devidx < 9; devidx++)
             acornscsi_dumplog(host, devidx);
      }
  #endif
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c

index 79ba3ce..23bdb1c 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -884,7 +884,7 @@ out_err:
                                          wcqe->total_data_placed);
                         nCmd->transferred_length = 0;
                         nCmd->rcv_rsplen = 0;
-                       nCmd->status = NVME_SC_FC_TRANSPORT_ERROR;
+                       nCmd->status = NVME_SC_INTERNAL;
                 }
         }
  
diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c

index 1f59e7a..6b33a1f 100644 (file)
--- a/drivers/scsi/qla2xxx/qla_nvme.c
+++ b/drivers/scsi/qla2xxx/qla_nvme.c
@@ -180,7 +180,7 @@ static void qla_nvme_sp_done(void *ptr, int res)
                 goto rel;
  
         if (unlikely(res == QLA_FUNCTION_FAILED))
-               fd->status = NVME_SC_FC_TRANSPORT_ERROR;
+               fd->status = NVME_SC_INTERNAL;
         else
                 fd->status = 0;
  
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c

index 3c6bc00..ba9d70f 100644 (file)
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -3571,7 +3571,7 @@ fc_vport_sched_delete(struct work_struct *work)
  static enum blk_eh_timer_return
  fc_bsg_job_timeout(struct request *req)
  {
-       struct bsg_job *job = (void *) req->special;
+       struct bsg_job *job = blk_mq_rq_to_pdu(req);
         struct Scsi_Host *shost = fc_bsg_to_shost(job);
         struct fc_rport *rport = fc_bsg_to_rport(job);
         struct fc_internal *i = to_fc_internal(shost->transportt);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c

index 11c1738..fb9f8b5 100644 (file)
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2915,8 +2915,6 @@ static void sd_read_block_limits(struct scsi_disk *sdkp)
                                 sd_config_discard(sdkp, SD_LBP_WS16);
                         else if (sdkp->lbpws10)
                                 sd_config_discard(sdkp, SD_LBP_WS10);
-                       else if (sdkp->lbpu && sdkp->max_unmap_blocks)
-                               sd_config_discard(sdkp, SD_LBP_UNMAP);
                         else
                                 sd_config_discard(sdkp, SD_LBP_DISABLE);
                 }
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c

index cf0e71d..0419c22 100644 (file)
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -828,6 +828,39 @@ static int max_sectors_bytes(struct request_queue *q)
         return max_sectors << 9;
  }
  
+static void
+sg_fill_request_table(Sg_fd *sfp, sg_req_info_t *rinfo)
+{
+       Sg_request *srp;
+       int val;
+       unsigned int ms;
+
+       val = 0;
+       list_for_each_entry(srp, &sfp->rq_list, entry) {
+               if (val > SG_MAX_QUEUE)
+                       break;
+               rinfo[val].req_state = srp->done + 1;
+               rinfo[val].problem =
+                       srp->header.masked_status &
+                       srp->header.host_status &
+                       srp->header.driver_status;
+               if (srp->done)
+                       rinfo[val].duration =
+                               srp->header.duration;
+               else {
+                       ms = jiffies_to_msecs(jiffies);
+                       rinfo[val].duration =
+                               (ms > srp->header.duration) ?
+                               (ms - srp->header.duration) : 0;
+               }
+               rinfo[val].orphan = srp->orphan;
+               rinfo[val].sg_io_owned = srp->sg_io_owned;
+               rinfo[val].pack_id = srp->header.pack_id;
+               rinfo[val].usr_ptr = srp->header.usr_ptr;
+               val++;
+       }
+}
+
  static long
  sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg)
  {
@@ -1012,38 +1045,13 @@ sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg)
                         return -EFAULT;
                 else {
                         sg_req_info_t *rinfo;
-                       unsigned int ms;
  
-                       rinfo = kmalloc(SZ_SG_REQ_INFO * SG_MAX_QUEUE,
-                                                               GFP_KERNEL);
+                       rinfo = kzalloc(SZ_SG_REQ_INFO * SG_MAX_QUEUE,
+                                       GFP_KERNEL);
                         if (!rinfo)
                                 return -ENOMEM;
                         read_lock_irqsave(&sfp->rq_list_lock, iflags);
-                       val = 0;
-                       list_for_each_entry(srp, &sfp->rq_list, entry) {
-                               if (val >= SG_MAX_QUEUE)
-                                       break;
-                               memset(&rinfo[val], 0, SZ_SG_REQ_INFO);
-                               rinfo[val].req_state = srp->done + 1;
-                               rinfo[val].problem =
-                                       srp->header.masked_status &
-                                       srp->header.host_status &
-                                       srp->header.driver_status;
-                               if (srp->done)
-                                       rinfo[val].duration =
-                                               srp->header.duration;
-                               else {
-                                       ms = jiffies_to_msecs(jiffies);
-                                       rinfo[val].duration =
-                                               (ms > srp->header.duration) ?
-                                               (ms - srp->header.duration) : 0;
-                               }
-                               rinfo[val].orphan = srp->orphan;
-                               rinfo[val].sg_io_owned = srp->sg_io_owned;
-                               rinfo[val].pack_id = srp->header.pack_id;
-                               rinfo[val].usr_ptr = srp->header.usr_ptr;
-                               val++;
-                       }
+                       sg_fill_request_table(sfp, rinfo);
                         read_unlock_irqrestore(&sfp->rq_list_lock, iflags);
                         result = __copy_to_user(p, rinfo,
                                                 SZ_SG_REQ_INFO * SG_MAX_QUEUE);
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c

index 82a8866..a1c1700 100644 (file)
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -519,64 +519,6 @@ static int __xenbus_map_ring(struct xenbus_device *dev,
         return err;
  }
  
-static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
-                                    grant_ref_t *gnt_refs,
-                                    unsigned int nr_grefs,
-                                    void **vaddr)
-{
-       struct xenbus_map_node *node;
-       struct vm_struct *area;
-       pte_t *ptes[XENBUS_MAX_RING_GRANTS];
-       phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS];
-       int err = GNTST_okay;
-       int i;
-       bool leaked;
-
-       *vaddr = NULL;
-
-       if (nr_grefs > XENBUS_MAX_RING_GRANTS)
-               return -EINVAL;
-
-       node = kzalloc(sizeof(*node), GFP_KERNEL);
-       if (!node)
-               return -ENOMEM;
-
-       area = alloc_vm_area(XEN_PAGE_SIZE * nr_grefs, ptes);
-       if (!area) {
-               kfree(node);
-               return -ENOMEM;
-       }
-
-       for (i = 0; i < nr_grefs; i++)
-               phys_addrs[i] = arbitrary_virt_to_machine(ptes[i]).maddr;
-
-       err = __xenbus_map_ring(dev, gnt_refs, nr_grefs, node->handles,
-                               phys_addrs,
-                               GNTMAP_host_map | GNTMAP_contains_pte,
-                               &leaked);
-       if (err)
-               goto failed;
-
-       node->nr_handles = nr_grefs;
-       node->pv.area = area;
-
-       spin_lock(&xenbus_valloc_lock);
-       list_add(&node->next, &xenbus_valloc_pages);
-       spin_unlock(&xenbus_valloc_lock);
-
-       *vaddr = area->addr;
-       return 0;
-
-failed:
-       if (!leaked)
-               free_vm_area(area);
-       else
-               pr_alert("leaking VM area %p size %u page(s)", area, nr_grefs);
-
-       kfree(node);
-       return err;
-}
-
  struct map_ring_valloc_hvm
  {
         unsigned int idx;
@@ -725,6 +667,65 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
  }
  EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
  
+#ifdef CONFIG_XEN_PV
+static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
+                                    grant_ref_t *gnt_refs,
+                                    unsigned int nr_grefs,
+                                    void **vaddr)
+{
+       struct xenbus_map_node *node;
+       struct vm_struct *area;
+       pte_t *ptes[XENBUS_MAX_RING_GRANTS];
+       phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS];
+       int err = GNTST_okay;
+       int i;
+       bool leaked;
+
+       *vaddr = NULL;
+
+       if (nr_grefs > XENBUS_MAX_RING_GRANTS)
+               return -EINVAL;
+
+       node = kzalloc(sizeof(*node), GFP_KERNEL);
+       if (!node)
+               return -ENOMEM;
+
+       area = alloc_vm_area(XEN_PAGE_SIZE * nr_grefs, ptes);
+       if (!area) {
+               kfree(node);
+               return -ENOMEM;
+       }
+
+       for (i = 0; i < nr_grefs; i++)
+               phys_addrs[i] = arbitrary_virt_to_machine(ptes[i]).maddr;
+
+       err = __xenbus_map_ring(dev, gnt_refs, nr_grefs, node->handles,
+                               phys_addrs,
+                               GNTMAP_host_map | GNTMAP_contains_pte,
+                               &leaked);
+       if (err)
+               goto failed;
+
+       node->nr_handles = nr_grefs;
+       node->pv.area = area;
+
+       spin_lock(&xenbus_valloc_lock);
+       list_add(&node->next, &xenbus_valloc_pages);
+       spin_unlock(&xenbus_valloc_lock);
+
+       *vaddr = area->addr;
+       return 0;
+
+failed:
+       if (!leaked)
+               free_vm_area(area);
+       else
+               pr_alert("leaking VM area %p size %u page(s)", area, nr_grefs);
+
+       kfree(node);
+       return err;
+}
+
  static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)
  {
         struct xenbus_map_node *node;
@@ -788,6 +789,12 @@ static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)
         return err;
  }
  
+static const struct xenbus_ring_ops ring_ops_pv = {
+       .map = xenbus_map_ring_valloc_pv,
+       .unmap = xenbus_unmap_ring_vfree_pv,
+};
+#endif
+
  struct unmap_ring_vfree_hvm
  {
         unsigned int idx;
@@ -916,11 +923,6 @@ enum xenbus_state xenbus_read_driver_state(const char *path)
  }
  EXPORT_SYMBOL_GPL(xenbus_read_driver_state);
  
-static const struct xenbus_ring_ops ring_ops_pv = {
-       .map = xenbus_map_ring_valloc_pv,
-       .unmap = xenbus_unmap_ring_vfree_pv,
-};
-
  static const struct xenbus_ring_ops ring_ops_hvm = {
         .map = xenbus_map_ring_valloc_hvm,
         .unmap = xenbus_unmap_ring_vfree_hvm,
@@ -928,8 +930,10 @@ static const struct xenbus_ring_ops ring_ops_hvm = {
  
  void __init xenbus_ring_ops_init(void)
  {
+#ifdef CONFIG_XEN_PV
         if (!xen_feature(XENFEAT_auto_translated_physmap))
                 ring_ops = &ring_ops_pv;
         else
+#endif
                 ring_ops = &ring_ops_hvm;
  }
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c

index 9dd6b83..84edfc6 100644 (file)
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -7,7 +7,6 @@
  #include <linux/sched.h>
  #include <linux/debugfs.h>
  #include <linux/seq_file.h>
-#include <linux/utsname.h>
  #include <linux/ratelimit.h>
  
  #include "super.h"
@@ -884,8 +883,8 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
         void *p;
  
         const char* metadata[][2] = {
-               {"hostname", utsname()->nodename},
-               {"kernel_version", utsname()->release},
+               {"hostname", mdsc->nodename},
+               {"kernel_version", init_utsname()->release},
                 {"entity_id", opt->name ? : ""},
                 {"root", fsopt->server_path ? : "/"},
                 {NULL, NULL}
@@ -3539,6 +3538,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
         init_rwsem(&mdsc->pool_perm_rwsem);
         mdsc->pool_perm_tree = RB_ROOT;
  
+       strncpy(mdsc->nodename, utsname()->nodename,
+               sizeof(mdsc->nodename) - 1);
         return 0;
  }
  
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h

index db57ae9..636d6b2 100644 (file)
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -8,6 +8,7 @@
  #include <linux/rbtree.h>
  #include <linux/spinlock.h>
  #include <linux/refcount.h>
+#include <linux/utsname.h>
  
  #include <linux/ceph/types.h>
  #include <linux/ceph/messenger.h>
@@ -368,6 +369,8 @@ struct ceph_mds_client {
  
         struct rw_semaphore     pool_perm_rwsem;
         struct rb_root          pool_perm_tree;
+
+       char nodename[__NEW_UTS_LEN + 1];
  };
  
  extern const char *ceph_mds_op_name(int op);
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c

index 9727e1d..cbb9534 100644 (file)
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -160,8 +160,13 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
                         if ((ses->serverDomain == NULL) ||
                                 (ses->serverOS == NULL) ||
                                 (ses->serverNOS == NULL)) {
-                               seq_printf(m, "\n%d) entry for %s not fully "
-                                          "displayed\n\t", i, ses->serverName);
+                               seq_printf(m, "\n%d) Name: %s Uses: %d Capability: 0x%x\tSession Status: %d\t",
+                                       i, ses->serverName, ses->ses_count,
+                                       ses->capabilities, ses->status);
+                               if (ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST)
+                                       seq_printf(m, "Guest\t");
+                               else if (ses->session_flags & SMB2_SESSION_FLAG_IS_NULL)
+                                       seq_printf(m, "Anonymous\t");
                         } else {
                                 seq_printf(m,
                                     "\n%d) Name: %s  Domain: %s Uses: %d OS:"
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c

index 180b335..8c8b75d 100644 (file)
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -461,6 +461,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
                 seq_puts(s, ",nocase");
         if (tcon->retry)
                 seq_puts(s, ",hard");
+       else
+               seq_puts(s, ",soft");
         if (tcon->use_persistent)
                 seq_puts(s, ",persistenthandles");
         else if (tcon->use_resilient)
@@ -1447,7 +1449,7 @@ exit_cifs(void)
         exit_cifs_idmap();
  #endif
  #ifdef CONFIG_CIFS_UPCALL
-       unregister_key_type(&cifs_spnego_key_type);
+       exit_cifs_spnego();
  #endif
         cifs_destroy_request_bufs();
         cifs_destroy_mids();
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h

index 30bf89b..5a10e56 100644 (file)
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -149,5 +149,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
  extern const struct export_operations cifs_export_ops;
  #endif /* CONFIG_CIFS_NFSD_EXPORT */
  
-#define CIFS_VERSION   "2.09"
+#define CIFS_VERSION   "2.10"
  #endif                         /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h

index 808486c..de5b2e1 100644 (file)
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -188,6 +188,8 @@ enum smb_version {
  #ifdef CONFIG_CIFS_SMB311
         Smb_311,
  #endif /* SMB311 */
+       Smb_3any,
+       Smb_default,
         Smb_version_err
  };
  
@@ -1701,6 +1703,10 @@ extern struct smb_version_values smb20_values;
  #define SMB21_VERSION_STRING   "2.1"
  extern struct smb_version_operations smb21_operations;
  extern struct smb_version_values smb21_values;
+#define SMBDEFAULT_VERSION_STRING "default"
+extern struct smb_version_values smbdefault_values;
+#define SMB3ANY_VERSION_STRING "3"
+extern struct smb_version_values smb3any_values;
  #define SMB30_VERSION_STRING   "3.0"
  extern struct smb_version_operations smb30_operations;
  extern struct smb_version_values smb30_values;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c

index 5aa2d27..0bfc228 100644 (file)
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -301,6 +301,8 @@ static const match_table_t cifs_smb_version_tokens = {
         { Smb_311, SMB311_VERSION_STRING },
         { Smb_311, ALT_SMB311_VERSION_STRING },
  #endif /* SMB311 */
+       { Smb_3any, SMB3ANY_VERSION_STRING },
+       { Smb_default, SMBDEFAULT_VERSION_STRING },
         { Smb_version_err, NULL }
  };
  
@@ -1148,6 +1150,14 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol)
                 vol->vals = &smb311_values;
                 break;
  #endif /* SMB311 */
+       case Smb_3any:
+               vol->ops = &smb30_operations; /* currently identical with 3.0 */
+               vol->vals = &smb3any_values;
+               break;
+       case Smb_default:
+               vol->ops = &smb30_operations; /* currently identical with 3.0 */
+               vol->vals = &smbdefault_values;
+               break;
         default:
                 cifs_dbg(VFS, "Unknown vers= option specified: %s\n", value);
                 return 1;
@@ -1274,9 +1284,9 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
  
         vol->actimeo = CIFS_DEF_ACTIMEO;
  
-       /* FIXME: add autonegotiation for SMB3 or later rather than just SMB3 */
-       vol->ops = &smb30_operations; /* both secure and accepted widely */
-       vol->vals = &smb30_values;
+       /* offer SMB2.1 and later (SMB3 etc). Secure and widely accepted */
+       vol->ops = &smb30_operations;
+       vol->vals = &smbdefault_values;
  
         vol->echo_interval = SMB_ECHO_INTERVAL_DEFAULT;
  
@@ -1988,11 +1998,10 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
  
         if (got_version == false)
                 pr_warn("No dialect specified on mount. Default has changed to "
-                       "a more secure dialect, SMB3 (vers=3.0), from CIFS "
+                       "a more secure dialect, SMB2.1 or later (e.g. SMB3), from CIFS "
                         "(SMB1). To use the less secure SMB1 dialect to access "
-                       "old servers which do not support SMB3 specify vers=1.0"
-                       " on mount. For somewhat newer servers such as Windows "
-                       "7 try vers=2.1.\n");
+                       "old servers which do not support SMB3 (or SMB2.1) specify vers=1.0"
+                       " on mount.\n");
  
         kfree(mountdata_copy);
         return 0;
@@ -2133,6 +2142,7 @@ static int match_server(struct TCP_Server_Info *server, struct smb_vol *vol)
         if (vol->nosharesock)
                 return 0;
  
+       /* BB update this for smb3any and default case */
         if ((server->vals != vol->vals) || (server->ops != vol->ops))
                 return 0;
  
@@ -4144,6 +4154,14 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
         cifs_dbg(FYI, "Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d\n",
                  server->sec_mode, server->capabilities, server->timeAdj);
  
+       if (ses->auth_key.response) {
+               cifs_dbg(VFS, "Free previous auth_key.response = %p\n",
+                        ses->auth_key.response);
+               kfree(ses->auth_key.response);
+               ses->auth_key.response = NULL;
+               ses->auth_key.len = 0;
+       }
+
         if (server->ops->sess_setup)
                 rc = server->ops->sess_setup(xid, ses, nls_info);
  
diff --git a/fs/cifs/file.c b/fs/cifs/file.c

index 0786f19..92fdf9c 100644 (file)
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -224,6 +224,13 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
         if (backup_cred(cifs_sb))
                 create_options |= CREATE_OPEN_BACKUP_INTENT;
  
+       /* O_SYNC also has bit for O_DSYNC so following check picks up either */
+       if (f_flags & O_SYNC)
+               create_options |= CREATE_WRITE_THROUGH;
+
+       if (f_flags & O_DIRECT)
+               create_options |= CREATE_NO_BUFFER;
+
         oparms.tcon = tcon;
         oparms.cifs_sb = cifs_sb;
         oparms.desired_access = desired_access;
@@ -1102,8 +1109,10 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
         struct cifs_tcon *tcon;
         unsigned int num, max_num, max_buf;
         LOCKING_ANDX_RANGE *buf, *cur;
-       int types[] = {LOCKING_ANDX_LARGE_FILES,
-                      LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
+       static const int types[] = {
+               LOCKING_ANDX_LARGE_FILES,
+               LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
+       };
         int i;
  
         xid = get_xid();
@@ -1434,8 +1443,10 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
                   unsigned int xid)
  {
         int rc = 0, stored_rc;
-       int types[] = {LOCKING_ANDX_LARGE_FILES,
-                      LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
+       static const int types[] = {
+               LOCKING_ANDX_LARGE_FILES,
+               LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
+       };
         unsigned int i;
         unsigned int max_num, num, max_buf;
         LOCKING_ANDX_RANGE *buf, *cur;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c

index a869363..7c732cb 100644 (file)
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -234,6 +234,8 @@ cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info,
         fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime);
         fattr->cf_mtime = cifs_NTtimeToUnix(info->LastModificationTime);
         fattr->cf_ctime = cifs_NTtimeToUnix(info->LastStatusChange);
+       /* old POSIX extensions don't get create time */
+
         fattr->cf_mode = le64_to_cpu(info->Permissions);
  
         /*
@@ -2024,6 +2026,19 @@ int cifs_getattr(const struct path *path, struct kstat *stat,
         stat->blksize = CIFS_MAX_MSGSIZE;
         stat->ino = CIFS_I(inode)->uniqueid;
  
+       /* old CIFS Unix Extensions doesn't return create time */
+       if (CIFS_I(inode)->createtime) {
+               stat->result_mask |= STATX_BTIME;
+               stat->btime =
+                     cifs_NTtimeToUnix(cpu_to_le64(CIFS_I(inode)->createtime));
+       }
+
+       stat->attributes_mask |= (STATX_ATTR_COMPRESSED | STATX_ATTR_ENCRYPTED);
+       if (CIFS_I(inode)->cifsAttrs & FILE_ATTRIBUTE_COMPRESSED)
+               stat->attributes |= STATX_ATTR_COMPRESSED;
+       if (CIFS_I(inode)->cifsAttrs & FILE_ATTRIBUTE_ENCRYPTED)
+               stat->attributes |= STATX_ATTR_ENCRYPTED;
+
         /*
          * If on a multiuser mount without unix extensions or cifsacl being
          * enabled, and the admin hasn't overridden them, set the ownership
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c

index fb2934b..0dafdba 100644 (file)
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -426,6 +426,7 @@ smb2_query_file_info(const unsigned int xid, struct cifs_tcon *tcon,
         return rc;
  }
  
+#ifdef CONFIG_CIFS_XATTR
  static ssize_t
  move_smb2_ea_to_cifs(char *dst, size_t dst_size,
                      struct smb2_file_full_ea_info *src, size_t src_size,
@@ -613,6 +614,7 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
  
         return rc;
  }
+#endif
  
  static bool
  smb2_can_echo(struct TCP_Server_Info *server)
@@ -3110,6 +3112,46 @@ struct smb_version_values smb21_values = {
         .create_lease_size = sizeof(struct create_lease),
  };
  
+struct smb_version_values smb3any_values = {
+       .version_string = SMB3ANY_VERSION_STRING,
+       .protocol_id = SMB302_PROT_ID, /* doesn't matter, send protocol array */
+       .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION,
+       .large_lock_type = 0,
+       .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
+       .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
+       .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
+       .header_size = sizeof(struct smb2_hdr),
+       .max_header_size = MAX_SMB2_HDR_SIZE,
+       .read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
+       .lock_cmd = SMB2_LOCK,
+       .cap_unix = 0,
+       .cap_nt_find = SMB2_NT_FIND,
+       .cap_large_files = SMB2_LARGE_FILES,
+       .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED,
+       .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED,
+       .create_lease_size = sizeof(struct create_lease_v2),
+};
+
+struct smb_version_values smbdefault_values = {
+       .version_string = SMBDEFAULT_VERSION_STRING,
+       .protocol_id = SMB302_PROT_ID, /* doesn't matter, send protocol array */
+       .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION,
+       .large_lock_type = 0,
+       .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
+       .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
+       .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
+       .header_size = sizeof(struct smb2_hdr),
+       .max_header_size = MAX_SMB2_HDR_SIZE,
+       .read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
+       .lock_cmd = SMB2_LOCK,
+       .cap_unix = 0,
+       .cap_nt_find = SMB2_NT_FIND,
+       .cap_large_files = SMB2_LARGE_FILES,
+       .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED,
+       .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED,
+       .create_lease_size = sizeof(struct create_lease_v2),
+};
+
  struct smb_version_values smb30_values = {
         .version_string = SMB30_VERSION_STRING,
         .protocol_id = SMB30_PROT_ID,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c

index 5531e7e..6f0e634 100644 (file)
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -439,7 +439,7 @@ assemble_neg_contexts(struct smb2_negotiate_req *req)
         build_encrypt_ctxt((struct smb2_encryption_neg_context *)pneg_ctxt);
         req->NegotiateContextOffset = cpu_to_le32(OFFSET_OF_NEG_CONTEXT);
         req->NegotiateContextCount = cpu_to_le16(2);
-       inc_rfc1001_len(req, 4 + sizeof(struct smb2_preauth_neg_context) + 2
+       inc_rfc1001_len(req, 4 + sizeof(struct smb2_preauth_neg_context)
                         + sizeof(struct smb2_encryption_neg_context)); /* calculate hash */
  }
  #else
@@ -491,10 +491,25 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
  
         req->hdr.sync_hdr.SessionId = 0;
  
-       req->Dialects[0] = cpu_to_le16(ses->server->vals->protocol_id);
-
-       req->DialectCount = cpu_to_le16(1); /* One vers= at a time for now */
-       inc_rfc1001_len(req, 2);
+       if (strcmp(ses->server->vals->version_string,
+                  SMB3ANY_VERSION_STRING) == 0) {
+               req->Dialects[0] = cpu_to_le16(SMB30_PROT_ID);
+               req->Dialects[1] = cpu_to_le16(SMB302_PROT_ID);
+               req->DialectCount = cpu_to_le16(2);
+               inc_rfc1001_len(req, 4);
+       } else if (strcmp(ses->server->vals->version_string,
+                  SMBDEFAULT_VERSION_STRING) == 0) {
+               req->Dialects[0] = cpu_to_le16(SMB21_PROT_ID);
+               req->Dialects[1] = cpu_to_le16(SMB30_PROT_ID);
+               req->Dialects[2] = cpu_to_le16(SMB302_PROT_ID);
+               req->DialectCount = cpu_to_le16(3);
+               inc_rfc1001_len(req, 6);
+       } else {
+               /* otherwise send specific dialect */
+               req->Dialects[0] = cpu_to_le16(ses->server->vals->protocol_id);
+               req->DialectCount = cpu_to_le16(1);
+               inc_rfc1001_len(req, 2);
+       }
  
         /* only one of SMB2 signing flags may be set in SMB2 request */
         if (ses->sign)
@@ -528,16 +543,43 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
          */
         if (rc == -EOPNOTSUPP) {
                 cifs_dbg(VFS, "Dialect not supported by server. Consider "
-                       "specifying vers=1.0 or vers=2.1 on mount for accessing"
+                       "specifying vers=1.0 or vers=2.0 on mount for accessing"
                         " older servers\n");
                 goto neg_exit;
         } else if (rc != 0)
                 goto neg_exit;
  
+       if (strcmp(ses->server->vals->version_string,
+                  SMB3ANY_VERSION_STRING) == 0) {
+               if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID)) {
+                       cifs_dbg(VFS,
+                               "SMB2 dialect returned but not requested\n");
+                       return -EIO;
+               } else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) {
+                       cifs_dbg(VFS,
+                               "SMB2.1 dialect returned but not requested\n");
+                       return -EIO;
+               }
+       } else if (strcmp(ses->server->vals->version_string,
+                  SMBDEFAULT_VERSION_STRING) == 0) {
+               if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID)) {
+                       cifs_dbg(VFS,
+                               "SMB2 dialect returned but not requested\n");
+                       return -EIO;
+               } else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) {
+                       /* ops set to 3.0 by default for default so update */
+                       ses->server->ops = &smb21_operations;
+               }
+       } else if (le16_to_cpu(rsp->DialectRevision) !=
+                               ses->server->vals->protocol_id) {
+               /* if requested single dialect ensure returned dialect matched */
+               cifs_dbg(VFS, "Illegal 0x%x dialect returned: not requested\n",
+                       le16_to_cpu(rsp->DialectRevision));
+               return -EIO;
+       }
+
         cifs_dbg(FYI, "mode 0x%x\n", rsp->SecurityMode);
  
-       /* BB we may eventually want to match the negotiated vs. requested
-          dialect, even though we are only requesting one at a time */
         if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID))
                 cifs_dbg(FYI, "negotiated smb2.0 dialect\n");
         else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID))
@@ -558,6 +600,8 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
         }
         server->dialect = le16_to_cpu(rsp->DialectRevision);
  
+       /* BB: add check that dialect was valid given dialect(s) we asked for */
+
         /* SMB2 only has an extended negflavor */
         server->negflavor = CIFS_NEGFLAVOR_EXTENDED;
         /* set it to the maximum buffer size value we can send with 1 credit */
@@ -606,20 +650,28 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
         struct validate_negotiate_info_req vneg_inbuf;
         struct validate_negotiate_info_rsp *pneg_rsp;
         u32 rsplen;
+       u32 inbuflen; /* max of 4 dialects */
  
         cifs_dbg(FYI, "validate negotiate\n");
  
         /*
          * validation ioctl must be signed, so no point sending this if we
-        * can not sign it.  We could eventually change this to selectively
+        * can not sign it (ie are not known user).  Even if signing is not
+        * required (enabled but not negotiated), in those cases we selectively
          * sign just this, the first and only signed request on a connection.
-        * This is good enough for now since a user who wants better security
-        * would also enable signing on the mount. Having validation of
-        * negotiate info for signed connections helps reduce attack vectors
+        * Having validation of negotiate info  helps reduce attack vectors.
          */
-       if (tcon->ses->server->sign == false)
+       if (tcon->ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST)
                 return 0; /* validation requires signing */
  
+       if (tcon->ses->user_name == NULL) {
+               cifs_dbg(FYI, "Can't validate negotiate: null user mount\n");
+               return 0; /* validation requires signing */
+       }
+
+       if (tcon->ses->session_flags & SMB2_SESSION_FLAG_IS_NULL)
+               cifs_dbg(VFS, "Unexpected null user (anonymous) auth flag sent by server\n");
+
         vneg_inbuf.Capabilities =
                         cpu_to_le32(tcon->ses->server->vals->req_capabilities);
         memcpy(vneg_inbuf.Guid, tcon->ses->server->client_guid,
@@ -634,9 +686,30 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
         else
                 vneg_inbuf.SecurityMode = 0;
  
-       vneg_inbuf.DialectCount = cpu_to_le16(1);
-       vneg_inbuf.Dialects[0] =
-               cpu_to_le16(tcon->ses->server->vals->protocol_id);
+
+       if (strcmp(tcon->ses->server->vals->version_string,
+               SMB3ANY_VERSION_STRING) == 0) {
+               vneg_inbuf.Dialects[0] = cpu_to_le16(SMB30_PROT_ID);
+               vneg_inbuf.Dialects[1] = cpu_to_le16(SMB302_PROT_ID);
+               vneg_inbuf.DialectCount = cpu_to_le16(2);
+               /* structure is big enough for 3 dialects, sending only 2 */
+               inbuflen = sizeof(struct validate_negotiate_info_req) - 2;
+       } else if (strcmp(tcon->ses->server->vals->version_string,
+               SMBDEFAULT_VERSION_STRING) == 0) {
+               vneg_inbuf.Dialects[0] = cpu_to_le16(SMB21_PROT_ID);
+               vneg_inbuf.Dialects[1] = cpu_to_le16(SMB30_PROT_ID);
+               vneg_inbuf.Dialects[2] = cpu_to_le16(SMB302_PROT_ID);
+               vneg_inbuf.DialectCount = cpu_to_le16(3);
+               /* structure is big enough for 3 dialects */
+               inbuflen = sizeof(struct validate_negotiate_info_req);
+       } else {
+               /* otherwise specific dialect was requested */
+               vneg_inbuf.Dialects[0] =
+                       cpu_to_le16(tcon->ses->server->vals->protocol_id);
+               vneg_inbuf.DialectCount = cpu_to_le16(1);
+               /* structure is big enough for 3 dialects, sending only 1 */
+               inbuflen = sizeof(struct validate_negotiate_info_req) - 4;
+       }
  
         rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
                 FSCTL_VALIDATE_NEGOTIATE_INFO, true /* is_fsctl */,
@@ -1110,6 +1183,8 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
         while (sess_data->func)
                 sess_data->func(sess_data);
  
+       if ((ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST) && (ses->sign))
+               cifs_dbg(VFS, "signing requested but authenticated as guest\n");
         rc = sess_data->result;
  out:
         kfree(sess_data);
@@ -1634,7 +1709,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
         struct cifs_tcon *tcon = oparms->tcon;
         struct cifs_ses *ses = tcon->ses;
         struct kvec iov[4];
-       struct kvec rsp_iov;
+       struct kvec rsp_iov = {NULL, 0};
         int resp_buftype;
         int uni_path_len;
         __le16 *copy_path = NULL;
@@ -1763,7 +1838,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
  
         if (rc != 0) {
                 cifs_stats_fail_inc(tcon, SMB2_CREATE_HE);
-               if (err_buf)
+               if (err_buf && rsp)
                         *err_buf = kmemdup(rsp, get_rfc1002_length(rsp) + 4,
                                            GFP_KERNEL);
                 goto creat_exit;
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h

index 393ed5f..6c9653a 100644 (file)
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -716,7 +716,7 @@ struct validate_negotiate_info_req {
         __u8   Guid[SMB2_CLIENT_GUID_SIZE];
         __le16 SecurityMode;
         __le16 DialectCount;
-       __le16 Dialects[1]; /* dialect (someday maybe list) client asked for */
+       __le16 Dialects[3]; /* BB expand this if autonegotiate > 3 dialects */
  } __packed;
  
  struct validate_negotiate_info_rsp {
diff --git a/fs/direct-io.c b/fs/direct-io.c

index 5fa2211..62cf812 100644 (file)
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -229,6 +229,7 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
  {
         loff_t offset = dio->iocb->ki_pos;
         ssize_t transferred = 0;
+       int err;
  
         /*
          * AIO submission can race with bio completion to get here while
@@ -258,8 +259,22 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
         if (ret == 0)
                 ret = transferred;
  
+       /*
+        * Try again to invalidate clean pages which might have been cached by
+        * non-direct readahead, or faulted in by get_user_pages() if the source
+        * of the write was an mmap'ed region of the file we're writing.  Either
+        * one is a pretty crazy thing to do, so we don't support it 100%.  If
+        * this invalidation fails, tough, the write still worked...
+        */
+       if (ret > 0 && dio->op == REQ_OP_WRITE &&
+           dio->inode->i_mapping->nrpages) {
+               err = invalidate_inode_pages2_range(dio->inode->i_mapping,
+                                       offset >> PAGE_SHIFT,
+                                       (offset + ret - 1) >> PAGE_SHIFT);
+               WARN_ON_ONCE(err);
+       }
+
         if (dio->end_io) {
-               int err;
  
                 // XXX: ki_pos??
                 err = dio->end_io(dio->iocb, offset, ret, dio->private);
@@ -304,6 +319,7 @@ static void dio_bio_end_aio(struct bio *bio)
         struct dio *dio = bio->bi_private;
         unsigned long remaining;
         unsigned long flags;
+       bool defer_completion = false;
  
         /* cleanup the bio */
         dio_bio_complete(dio, bio);
@@ -315,7 +331,19 @@ static void dio_bio_end_aio(struct bio *bio)
         spin_unlock_irqrestore(&dio->bio_lock, flags);
  
         if (remaining == 0) {
-               if (dio->result && dio->defer_completion) {
+               /*
+                * Defer completion when defer_completion is set or
+                * when the inode has pages mapped and this is AIO write.
+                * We need to invalidate those pages because there is a
+                * chance they contain stale data in the case buffered IO
+                * went in between AIO submission and completion into the
+                * same region.
+                */
+               if (dio->result)
+                       defer_completion = dio->defer_completion ||
+                                          (dio->op == REQ_OP_WRITE &&
+                                           dio->inode->i_mapping->nrpages);
+               if (defer_completion) {
                         INIT_WORK(&dio->complete_work, dio_aio_complete_work);
                         queue_work(dio->inode->i_sb->s_dio_done_wq,
                                    &dio->complete_work);
@@ -1210,10 +1238,19 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
          * For AIO O_(D)SYNC writes we need to defer completions to a workqueue
          * so that we can call ->fsync.
          */
-       if (dio->is_async && iov_iter_rw(iter) == WRITE &&
-           ((iocb->ki_filp->f_flags & O_DSYNC) ||
-            IS_SYNC(iocb->ki_filp->f_mapping->host))) {
-               retval = dio_set_defer_completion(dio);
+       if (dio->is_async && iov_iter_rw(iter) == WRITE) {
+               retval = 0;
+               if ((iocb->ki_filp->f_flags & O_DSYNC) ||
+                   IS_SYNC(iocb->ki_filp->f_mapping->host))
+                       retval = dio_set_defer_completion(dio);
+               else if (!dio->inode->i_sb->s_dio_done_wq) {
+                       /*
+                        * In case of AIO write racing with buffered read we
+                        * need to defer completion. We can't decide this now,
+                        * however the workqueue needs to be initialized here.
+                        */
+                       retval = sb_init_dio_done_wq(dio->inode->i_sb);
+               }
                 if (retval) {
                         /*
                          * We grab i_mutex only for reads so we don't have
diff --git a/fs/fcntl.c b/fs/fcntl.c

index 0491da3..448a111 100644 (file)
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -749,7 +749,7 @@ static void send_sigio_to_task(struct task_struct *p,
                          * specific si_codes.  In that case use SI_SIGIO instead
                          * to remove the ambiguity.
                          */
-                       if (sig_specific_sicodes(signum))
+                       if ((signum != SIGPOLL) && sig_specific_sicodes(signum))
                                 si.si_code = SI_SIGIO;
  
                         /* Make sure we are called with one of the POLL_*
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c

index 98e845b..11066d8 100644 (file)
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1945,13 +1945,9 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
  {
         struct gfs2_glock_iter *gi = seq->private;
         loff_t n = *pos;
-       int ret;
-
-       if (gi->last_pos <= *pos)
-               n = (*pos - gi->last_pos);
  
-       ret = rhashtable_walk_start(&gi->hti);
-       if (ret)
+       rhashtable_walk_enter(&gl_hash_table, &gi->hti);
+       if (rhashtable_walk_start(&gi->hti) != 0)
                 return NULL;
  
         do {
@@ -1959,6 +1955,7 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
         } while (gi->gl && n--);
  
         gi->last_pos = *pos;
+
         return gi->gl;
  }
  
@@ -1970,6 +1967,7 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
         (*pos)++;
         gi->last_pos = *pos;
         gfs2_glock_iter_next(gi);
+
         return gi->gl;
  }
  
@@ -1980,6 +1978,7 @@ static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
  
         gi->gl = NULL;
         rhashtable_walk_stop(&gi->hti);
+       rhashtable_walk_exit(&gi->hti);
  }
  
  static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
@@ -2042,12 +2041,10 @@ static int __gfs2_glocks_open(struct inode *inode, struct file *file,
                 struct gfs2_glock_iter *gi = seq->private;
  
                 gi->sdp = inode->i_private;
-               gi->last_pos = 0;
                 seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
                 if (seq->buf)
                         seq->size = GFS2_SEQ_GOODSIZE;
                 gi->gl = NULL;
-               rhashtable_walk_enter(&gl_hash_table, &gi->hti);
         }
         return ret;
  }
@@ -2063,7 +2060,6 @@ static int gfs2_glocks_release(struct inode *inode, struct file *file)
         struct gfs2_glock_iter *gi = seq->private;
  
         gi->gl = NULL;
-       rhashtable_walk_exit(&gi->hti);
         return seq_release_private(inode, file);
  }
  
diff --git a/fs/iomap.c b/fs/iomap.c

index 269b24a..be61cf7 100644 (file)
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -713,8 +713,24 @@ struct iomap_dio {
  static ssize_t iomap_dio_complete(struct iomap_dio *dio)
  {
         struct kiocb *iocb = dio->iocb;
+       struct inode *inode = file_inode(iocb->ki_filp);
         ssize_t ret;
  
+       /*
+        * Try again to invalidate clean pages which might have been cached by
+        * non-direct readahead, or faulted in by get_user_pages() if the source
+        * of the write was an mmap'ed region of the file we're writing.  Either
+        * one is a pretty crazy thing to do, so we don't support it 100%.  If
+        * this invalidation fails, tough, the write still worked...
+        */
+       if (!dio->error &&
+           (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
+               ret = invalidate_inode_pages2_range(inode->i_mapping,
+                               iocb->ki_pos >> PAGE_SHIFT,
+                               (iocb->ki_pos + dio->size - 1) >> PAGE_SHIFT);
+               WARN_ON_ONCE(ret);
+       }
+
         if (dio->end_io) {
                 ret = dio->end_io(iocb,
                                 dio->error ? dio->error : dio->size,
@@ -993,6 +1009,13 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
         WARN_ON_ONCE(ret);
         ret = 0;
  
+       if (iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) &&
+           !inode->i_sb->s_dio_done_wq) {
+               ret = sb_init_dio_done_wq(inode->i_sb);
+               if (ret < 0)
+                       goto out_free_dio;
+       }
+
         inode_dio_begin(inode);
  
         blk_start_plug(&plug);
@@ -1015,13 +1038,6 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
         if (ret < 0)
                 iomap_dio_set_error(dio, ret);
  
-       if (ret >= 0 && iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) &&
-                       !inode->i_sb->s_dio_done_wq) {
-               ret = sb_init_dio_done_wq(inode->i_sb);
-               if (ret < 0)
-                       iomap_dio_set_error(dio, ret);
-       }
-
         if (!atomic_dec_and_test(&dio->ref)) {
                 if (!is_sync_kiocb(iocb))
                         return -EIOCBQUEUED;
@@ -1042,19 +1058,6 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
  
         ret = iomap_dio_complete(dio);
  
-       /*
-        * Try again to invalidate clean pages which might have been cached by
-        * non-direct readahead, or faulted in by get_user_pages() if the source
-        * of the write was an mmap'ed region of the file we're writing.  Either
-        * one is a pretty crazy thing to do, so we don't support it 100%.  If
-        * this invalidation fails, tough, the write still worked...
-        */
-       if (iov_iter_rw(iter) == WRITE) {
-               int err = invalidate_inode_pages2_range(mapping,
-                               start >> PAGE_SHIFT, end >> PAGE_SHIFT);
-               WARN_ON_ONCE(err);
-       }
-
         return ret;
  
  out_free_dio:
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c

index db692f5..447a24d 100644 (file)
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -514,9 +514,11 @@ static int isofs_show_options(struct seq_file *m, struct dentry *root)
         if (sbi->s_fmode != ISOFS_INVALID_MODE)
                 seq_printf(m, ",fmode=%o", sbi->s_fmode);
  
+#ifdef CONFIG_JOLIET
         if (sbi->s_nls_iocharset &&
             strcmp(sbi->s_nls_iocharset->charset, CONFIG_NLS_DEFAULT) != 0)
                 seq_printf(m, ",iocharset=%s", sbi->s_nls_iocharset->charset);
+#endif
         return 0;
  }
  
diff --git a/fs/proc/array.c b/fs/proc/array.c

index 88c3555..525157c 100644 (file)
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -62,6 +62,7 @@
  #include <linux/mman.h>
  #include <linux/sched/mm.h>
  #include <linux/sched/numa_balancing.h>
+#include <linux/sched/task_stack.h>
  #include <linux/sched/task.h>
  #include <linux/sched/cputime.h>
  #include <linux/proc_fs.h>
@@ -421,7 +422,15 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
                  * esp and eip are intentionally zeroed out.  There is no
                  * non-racy way to read them without freezing the task.
                  * Programs that need reliable values can use ptrace(2).
+                *
+                * The only exception is if the task is core dumping because
+                * a program is not able to use ptrace(2) in that case. It is
+                * safe because the task has stopped executing permanently.
                  */
+               if (permitted && (task->flags & PF_DUMPCORE)) {
+                       eip = KSTK_EIP(task);
+                       esp = KSTK_ESP(task);
+               }
         }
  
         get_task_comm(tcomm, task);
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c

index 8381db9..50b0556 100644 (file)
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1980,7 +1980,9 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
                 ret = dquot_add_space(transfer_to[cnt], cur_space, rsv_space, 0,
                                       &warn_to[cnt]);
                 if (ret) {
+                       spin_lock(&transfer_to[cnt]->dq_dqb_lock);
                         dquot_decr_inodes(transfer_to[cnt], inode_usage);
+                       spin_unlock(&transfer_to[cnt]->dq_dqb_lock);
                         goto over_quota;
                 }
         }
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c

index c0187cd..a73e5b3 100644 (file)
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -328,12 +328,16 @@ static int v2_write_dquot(struct dquot *dquot)
         if (!dquot->dq_off) {
                 alloc = true;
                 down_write(&dqopt->dqio_sem);
+       } else {
+               down_read(&dqopt->dqio_sem);
         }
         ret = qtree_write_dquot(
                         sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv,
                         dquot);
         if (alloc)
                 up_write(&dqopt->dqio_sem);
+       else
+               up_read(&dqopt->dqio_sem);
         return ret;
  }
  
diff --git a/fs/read_write.c b/fs/read_write.c

index a2b9a47..f0d4b16 100644 (file)
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -112,7 +112,7 @@ generic_file_llseek_size(struct file *file, loff_t offset, int whence,
                  * In the generic case the entire file is data, so as long as
                  * offset isn't at the end of the file then the offset is data.
                  */
-               if (offset >= eof)
+               if ((unsigned long long)offset >= eof)
                         return -ENXIO;
                 break;
         case SEEK_HOLE:
@@ -120,7 +120,7 @@ generic_file_llseek_size(struct file *file, loff_t offset, int whence,
                  * There is a virtual hole at the end of the file, so as long as
                  * offset isn't i_size or larger, return i_size.
                  */
-               if (offset >= eof)
+               if ((unsigned long long)offset >= eof)
                         return -ENXIO;
                 offset = eof;
                 break;
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c

index b008ff3..df3e600 100644 (file)
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -156,7 +156,8 @@ __xfs_ag_resv_free(
         trace_xfs_ag_resv_free(pag, type, 0);
  
         resv = xfs_perag_resv(pag, type);
-       pag->pag_mount->m_ag_max_usable += resv->ar_asked;
+       if (pag->pag_agno == 0)
+               pag->pag_mount->m_ag_max_usable += resv->ar_asked;
         /*
          * AGFL blocks are always considered "free", so whatever
          * was reserved at mount time must be given back at umount.
@@ -216,7 +217,14 @@ __xfs_ag_resv_init(
                 return error;
         }
  
-       mp->m_ag_max_usable -= ask;
+       /*
+        * Reduce the maximum per-AG allocation length by however much we're
+        * trying to reserve for an AG.  Since this is a filesystem-wide
+        * counter, we only make the adjustment for AG 0.  This assumes that
+        * there aren't any AGs hungrier for per-AG reservation than AG 0.
+        */
+       if (pag->pag_agno == 0)
+               mp->m_ag_max_usable -= ask;
  
         resv = xfs_perag_resv(pag, type);
         resv->ar_asked = ask;
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c

index 459f4b4..044a363 100644 (file)
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -49,7 +49,6 @@
  #include "xfs_rmap.h"
  #include "xfs_ag_resv.h"
  #include "xfs_refcount.h"
-#include "xfs_rmap_btree.h"
  #include "xfs_icache.h"
  
  
@@ -192,12 +191,8 @@ xfs_bmap_worst_indlen(
         int             maxrecs;        /* maximum record count at this level */
         xfs_mount_t     *mp;            /* mount structure */
         xfs_filblks_t   rval;           /* return value */
-       xfs_filblks_t   orig_len;
  
         mp = ip->i_mount;
-
-       /* Calculate the worst-case size of the bmbt. */
-       orig_len = len;
         maxrecs = mp->m_bmap_dmxr[0];
         for (level = 0, rval = 0;
              level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
@@ -205,20 +200,12 @@ xfs_bmap_worst_indlen(
                 len += maxrecs - 1;
                 do_div(len, maxrecs);
                 rval += len;
-               if (len == 1) {
-                       rval += XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
+               if (len == 1)
+                       return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
                                 level - 1;
-                       break;
-               }
                 if (level == 0)
                         maxrecs = mp->m_bmap_dmxr[1];
         }
-
-       /* Calculate the worst-case size of the rmapbt. */
-       if (xfs_sb_version_hasrmapbt(&mp->m_sb))
-               rval += 1 + xfs_rmapbt_calc_size(mp, orig_len) +
-                               mp->m_rmap_maxlevels;
-
         return rval;
  }
  
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c

index 2917260..f18e593 100644 (file)
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -343,7 +343,8 @@ xfs_end_io(
                 error = xfs_reflink_end_cow(ip, offset, size);
                 break;
         case XFS_IO_UNWRITTEN:
-               error = xfs_iomap_write_unwritten(ip, offset, size);
+               /* writeback should never update isize */
+               error = xfs_iomap_write_unwritten(ip, offset, size, false);
                 break;
         default:
                 ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans);
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c

index cd9a540..bc6c6e1 100644 (file)
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1459,7 +1459,19 @@ xfs_shift_file_space(
                 return error;
  
         /*
-        * The extent shiting code works on extent granularity. So, if
+        * Clean out anything hanging around in the cow fork now that
+        * we've flushed all the dirty data out to disk to avoid having
+        * CoW extents at the wrong offsets.
+        */
+       if (xfs_is_reflink_inode(ip)) {
+               error = xfs_reflink_cancel_cow_range(ip, offset, NULLFILEOFF,
+                               true);
+               if (error)
+                       return error;
+       }
+
+       /*
+        * The extent shifting code works on extent granularity. So, if
          * stop_fsb is not the starting block of extent, we need to split
          * the extent at stop_fsb.
          */
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c

index da14658..2f97c12 100644 (file)
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1258,8 +1258,6 @@ xfs_buf_ioapply_map(
         int             size;
         int             offset;
  
-       total_nr_pages = bp->b_page_count;
-
         /* skip the pages in the buffer before the start offset */
         page_index = 0;
         offset = *buf_offset;
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c

index bd786a9..eaf86f5 100644 (file)
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -347,7 +347,7 @@ xfs_verifier_error(
  {
         struct xfs_mount *mp = bp->b_target->bt_mount;
  
-       xfs_alert(mp, "Metadata %s detected at %pF, %s block 0x%llx",
+       xfs_alert(mp, "Metadata %s detected at %pS, %s block 0x%llx",
                   bp->b_error == -EFSBADCRC ? "CRC error" : "corruption",
                   __return_address, bp->b_ops->name, bp->b_bn);
  
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c

index ebdd0bd..309e26c 100644 (file)
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -58,7 +58,7 @@ xfs_zero_range(
         xfs_off_t               count,
         bool                    *did_zero)
  {
-       return iomap_zero_range(VFS_I(ip), pos, count, NULL, &xfs_iomap_ops);
+       return iomap_zero_range(VFS_I(ip), pos, count, did_zero, &xfs_iomap_ops);
  }
  
  int
@@ -377,8 +377,6 @@ restart:
          */
         spin_lock(&ip->i_flags_lock);
         if (iocb->ki_pos > i_size_read(inode)) {
-               bool    zero = false;
-
                 spin_unlock(&ip->i_flags_lock);
                 if (!drained_dio) {
                         if (*iolock == XFS_IOLOCK_SHARED) {
@@ -399,7 +397,7 @@ restart:
                         drained_dio = true;
                         goto restart;
                 }
-               error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero);
+               error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), NULL);
                 if (error)
                         return error;
         } else
@@ -436,7 +434,6 @@ xfs_dio_write_end_io(
         struct inode            *inode = file_inode(iocb->ki_filp);
         struct xfs_inode        *ip = XFS_I(inode);
         loff_t                  offset = iocb->ki_pos;
-       bool                    update_size = false;
         int                     error = 0;
  
         trace_xfs_end_io_direct_write(ip, offset, size);
@@ -447,6 +444,21 @@ xfs_dio_write_end_io(
         if (size <= 0)
                 return size;
  
+       if (flags & IOMAP_DIO_COW) {
+               error = xfs_reflink_end_cow(ip, offset, size);
+               if (error)
+                       return error;
+       }
+
+       /*
+        * Unwritten conversion updates the in-core isize after extent
+        * conversion but before updating the on-disk size. Updating isize any
+        * earlier allows a racing dio read to find unwritten extents before
+        * they are converted.
+        */
+       if (flags & IOMAP_DIO_UNWRITTEN)
+               return xfs_iomap_write_unwritten(ip, offset, size, true);
+
         /*
          * We need to update the in-core inode size here so that we don't end up
          * with the on-disk inode size being outside the in-core inode size. We
@@ -461,20 +473,11 @@ xfs_dio_write_end_io(
         spin_lock(&ip->i_flags_lock);
         if (offset + size > i_size_read(inode)) {
                 i_size_write(inode, offset + size);
-               update_size = true;
-       }
-       spin_unlock(&ip->i_flags_lock);
-
-       if (flags & IOMAP_DIO_COW) {
-               error = xfs_reflink_end_cow(ip, offset, size);
-               if (error)
-                       return error;
-       }
-
-       if (flags & IOMAP_DIO_UNWRITTEN)
-               error = xfs_iomap_write_unwritten(ip, offset, size);
-       else if (update_size)
+               spin_unlock(&ip->i_flags_lock);
                 error = xfs_setfilesize(ip, offset, size);
+       } else {
+               spin_unlock(&ip->i_flags_lock);
+       }
  
         return error;
  }
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c

index 5599dda..4ec5b7f 100644 (file)
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1624,10 +1624,12 @@ xfs_itruncate_extents(
                 goto out;
  
         /*
-        * Clear the reflink flag if we truncated everything.
+        * Clear the reflink flag if there are no data fork blocks and
+        * there are no extents staged in the cow fork.
          */
-       if (ip->i_d.di_nblocks == 0 && xfs_is_reflink_inode(ip)) {
-               ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
+       if (xfs_is_reflink_inode(ip) && ip->i_cnextents == 0) {
+               if (ip->i_d.di_nblocks == 0)
+                       ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
                 xfs_inode_clear_cowblocks_tag(ip);
         }
  
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c

index 6d0f74e..a705f34 100644 (file)
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -745,7 +745,7 @@ xfs_iflush_done(
                  */
                 iip = INODE_ITEM(blip);
                 if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) ||
-                   lip->li_flags & XFS_LI_FAILED)
+                   (blip->li_flags & XFS_LI_FAILED))
                         need_ail++;
  
                 blip = next;
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c

index 5049e8a..aa75389 100644 (file)
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1088,6 +1088,7 @@ xfs_ioctl_setattr_dax_invalidate(
         int                     *join_flags)
  {
         struct inode            *inode = VFS_I(ip);
+       struct super_block      *sb = inode->i_sb;
         int                     error;
  
         *join_flags = 0;
@@ -1100,7 +1101,7 @@ xfs_ioctl_setattr_dax_invalidate(
         if (fa->fsx_xflags & FS_XFLAG_DAX) {
                 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)))
                         return -EINVAL;
-               if (ip->i_mount->m_sb.sb_blocksize != PAGE_SIZE)
+               if (bdev_dax_supported(sb, sb->s_blocksize) < 0)
                         return -EINVAL;
         }
  
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c

index a1909bc..f179bdf 100644 (file)
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -829,7 +829,8 @@ int
  xfs_iomap_write_unwritten(
         xfs_inode_t     *ip,
         xfs_off_t       offset,
-       xfs_off_t       count)
+       xfs_off_t       count,
+       bool            update_isize)
  {
         xfs_mount_t     *mp = ip->i_mount;
         xfs_fileoff_t   offset_fsb;
@@ -840,6 +841,7 @@ xfs_iomap_write_unwritten(
         xfs_trans_t     *tp;
         xfs_bmbt_irec_t imap;
         struct xfs_defer_ops dfops;
+       struct inode    *inode = VFS_I(ip);
         xfs_fsize_t     i_size;
         uint            resblks;
         int             error;
@@ -899,7 +901,8 @@ xfs_iomap_write_unwritten(
                 i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb);
                 if (i_size > offset + count)
                         i_size = offset + count;
-
+               if (update_isize && i_size > i_size_read(inode))
+                       i_size_write(inode, i_size);
                 i_size = xfs_new_eof(ip, i_size);
                 if (i_size) {
                         ip->i_d.di_size = i_size;
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h

index 00db3ec..ee53506 100644 (file)
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -27,7 +27,7 @@ int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
                         struct xfs_bmbt_irec *, int);
  int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t,
                         struct xfs_bmbt_irec *);
-int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t);
+int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
  
  void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
                 struct xfs_bmbt_irec *);
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c

index 2f2dc3c..4246876 100644 (file)
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -274,7 +274,7 @@ xfs_fs_commit_blocks(
                                         (end - 1) >> PAGE_SHIFT);
                 WARN_ON_ONCE(error);
  
-               error = xfs_iomap_write_unwritten(ip, start, length);
+               error = xfs_iomap_write_unwritten(ip, start, length, false);
                 if (error)
                         goto out_drop_iolock;
         }
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c

index c996f4a..584cf2d 100644 (file)
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1654,6 +1654,16 @@ xfs_fs_fill_super(
                 "DAX and reflink have not been tested together!");
         }
  
+       if (mp->m_flags & XFS_MOUNT_DISCARD) {
+               struct request_queue *q = bdev_get_queue(sb->s_bdev);
+
+               if (!blk_queue_discard(q)) {
+                       xfs_warn(mp, "mounting with \"discard\" option, but "
+                                       "the device does not support discard");
+                       mp->m_flags &= ~XFS_MOUNT_DISCARD;
+               }
+       }
+
         if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
                 if (mp->m_sb.sb_rblocks) {
                         xfs_alert(mp,
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h

index dedf9d7..fa15052 100644 (file)
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -399,17 +399,12 @@ extern const struct fwnode_operations acpi_device_fwnode_ops;
  extern const struct fwnode_operations acpi_data_fwnode_ops;
  extern const struct fwnode_operations acpi_static_fwnode_ops;
  
+bool is_acpi_device_node(const struct fwnode_handle *fwnode);
+bool is_acpi_data_node(const struct fwnode_handle *fwnode);
+
  static inline bool is_acpi_node(const struct fwnode_handle *fwnode)
  {
-       return !IS_ERR_OR_NULL(fwnode) &&
-               (fwnode->ops == &acpi_device_fwnode_ops
-                || fwnode->ops == &acpi_data_fwnode_ops);
-}
-
-static inline bool is_acpi_device_node(const struct fwnode_handle *fwnode)
-{
-       return !IS_ERR_OR_NULL(fwnode) &&
-               fwnode->ops == &acpi_device_fwnode_ops;
+       return (is_acpi_device_node(fwnode) || is_acpi_data_node(fwnode));
  }
  
  #define to_acpi_device_node(__fwnode)                                  \
@@ -422,11 +417,6 @@ static inline bool is_acpi_device_node(const struct fwnode_handle *fwnode)
                         NULL;                                           \
         })
  
-static inline bool is_acpi_data_node(const struct fwnode_handle *fwnode)
-{
-       return !IS_ERR_OR_NULL(fwnode) && fwnode->ops == &acpi_data_fwnode_ops;
-}
-
  #define to_acpi_data_node(__fwnode)                                    \
         ({                                                              \
                 typeof(__fwnode) __to_acpi_data_node_fwnode = __fwnode; \
diff --git a/include/linux/audit.h b/include/linux/audit.h

index 74d4d4e..cb708eb 100644 (file)
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -314,11 +314,7 @@ void audit_core_dumps(long signr);
  
  static inline void audit_seccomp(unsigned long syscall, long signr, int code)
  {
-       if (!audit_enabled)
-               return;
-
-       /* Force a record to be reported if a signal was delivered. */
-       if (signr || unlikely(!audit_dummy_context()))
+       if (audit_enabled && unlikely(!audit_dummy_context()))
                 __audit_seccomp(syscall, signr, code);
  }
  
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h

index 460294b..02fa42d 100644 (file)
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -551,6 +551,7 @@ struct request_queue {
         int                     node;
  #ifdef CONFIG_BLK_DEV_IO_TRACE
         struct blk_trace        *blk_trace;
+       struct mutex            blk_trace_mutex;
  #endif
         /*
          * for flush operations
diff --git a/include/linux/device.h b/include/linux/device.h

index c6f2720..1d26079 100644 (file)
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -838,7 +838,7 @@ struct dev_links_info {
   * @driver_data: Private pointer for driver specific info.
   * @links:     Links to suppliers and consumers of this device.
   * @power:     For device power management.
- *             See Documentation/power/admin-guide/devices.rst for details.
+ *             See Documentation/driver-api/pm/devices.rst for details.
   * @pm_domain: Provide callbacks that are executed during system suspend,
   *             hibernation, system resume and during runtime PM transitions
   *             along with subsystem-level and driver-level callbacks.
diff --git a/include/linux/input.h b/include/linux/input.h

index a65e3b2..fb5e23c 100644 (file)
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -529,6 +529,7 @@ int input_ff_event(struct input_dev *dev, unsigned int type, unsigned int code,
  
  int input_ff_upload(struct input_dev *dev, struct ff_effect *effect, struct file *file);
  int input_ff_erase(struct input_dev *dev, int effect_id, struct file *file);
+int input_ff_flush(struct input_dev *dev, struct file *file);
  
  int input_ff_create_memless(struct input_dev *dev, void *data,
                 int (*play_effect)(struct input_dev *, void *, struct ff_effect *));
diff --git a/include/linux/irq.h b/include/linux/irq.h

index b99a784..d4728bf 100644 (file)
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -783,10 +783,7 @@ static inline struct cpumask *irq_data_get_affinity_mask(struct irq_data *d)
  static inline
  struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d)
  {
-       if (!cpumask_empty(d->common->effective_affinity))
-               return d->common->effective_affinity;
-
-       return d->common->affinity;
+       return d->common->effective_affinity;
  }
  static inline void irq_data_update_effective_affinity(struct irq_data *d,
                                                       const struct cpumask *m)
diff --git a/include/linux/key.h b/include/linux/key.h

index 0441141..e315e16 100644 (file)
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -187,6 +187,7 @@ struct key {
  #define KEY_FLAG_BUILTIN       8       /* set if key is built in to the kernel */
  #define KEY_FLAG_ROOT_CAN_INVAL        9       /* set if key can be invalidated by root without permission */
  #define KEY_FLAG_KEEP          10      /* set if key should not be removed */
+#define KEY_FLAG_UID_KEYRING   11      /* set if key is a user or user session keyring */
  
         /* the key type and key description string
          * - the desc is used to match a key against search criteria
@@ -243,6 +244,7 @@ extern struct key *key_alloc(struct key_type *type,
  #define KEY_ALLOC_NOT_IN_QUOTA         0x0002  /* not in quota */
  #define KEY_ALLOC_BUILT_IN             0x0004  /* Key is built into kernel */
  #define KEY_ALLOC_BYPASS_RESTRICTION   0x0008  /* Override the check on restricted keyrings */
+#define KEY_ALLOC_UID_KEYRING          0x0010  /* allocating a user or user session keyring */
  
  extern void key_revoke(struct key *key);
  extern void key_invalidate(struct key *key);
diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h

index 9c5cb44..a726f96 100644 (file)
--- a/include/linux/nvme-fc-driver.h
+++ b/include/linux/nvme-fc-driver.h
@@ -346,11 +346,6 @@ struct nvme_fc_remote_port {
   *       indicating an FC transport Aborted status.
   *       Entrypoint is Mandatory.
   *
- * @defer_rcv:  Called by the transport to signal the LLLD that it has
- *       begun processing of a previously received NVME CMD IU. The LLDD
- *       is now free to re-use the rcv buffer associated with the
- *       nvmefc_tgt_fcp_req.
- *
   * @max_hw_queues:  indicates the maximum number of hw queues the LLDD
   *       supports for cpu affinitization.
   *       Value is Mandatory. Must be at least 1.
@@ -806,11 +801,19 @@ struct nvmet_fc_target_port {
   *       outstanding operation (if there was one) to complete, then will
   *       call the fcp_req_release() callback to return the command's
   *       exchange context back to the LLDD.
+ *       Entrypoint is Mandatory.
   *
   * @fcp_req_release:  Called by the transport to return a nvmefc_tgt_fcp_req
   *       to the LLDD after all operations on the fcp operation are complete.
   *       This may be due to the command completing or upon completion of
   *       abort cleanup.
+ *       Entrypoint is Mandatory.
+ *
+ * @defer_rcv:  Called by the transport to signal the LLLD that it has
+ *       begun processing of a previously received NVME CMD IU. The LLDD
+ *       is now free to re-use the rcv buffer associated with the
+ *       nvmefc_tgt_fcp_req.
+ *       Entrypoint is Optional.
   *
   * @max_hw_queues:  indicates the maximum number of hw queues the LLDD
   *       supports for cpu affinitization.
diff --git a/include/linux/nvme.h b/include/linux/nvme.h

index 87723c8..9310ce7 100644 (file)
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -471,12 +471,14 @@ enum nvme_opcode {
   *
   * @NVME_SGL_FMT_ADDRESS:     absolute address of the data block
   * @NVME_SGL_FMT_OFFSET:      relative offset of the in-capsule data block
+ * @NVME_SGL_FMT_TRANSPORT_A: transport defined format, value 0xA
   * @NVME_SGL_FMT_INVALIDATE:  RDMA transport specific remote invalidation
   *                            request subtype
   */
  enum {
         NVME_SGL_FMT_ADDRESS            = 0x00,
         NVME_SGL_FMT_OFFSET             = 0x01,
+       NVME_SGL_FMT_TRANSPORT_A        = 0x0A,
         NVME_SGL_FMT_INVALIDATE         = 0x0f,
  };
  
@@ -490,12 +492,16 @@ enum {
   *
   * For struct nvme_keyed_sgl_desc:
   *   @NVME_KEY_SGL_FMT_DATA_DESC:      keyed data block descriptor
+ *
+ * Transport-specific SGL types:
+ *   @NVME_TRANSPORT_SGL_DATA_DESC:    Transport SGL data dlock descriptor
   */
  enum {
         NVME_SGL_FMT_DATA_DESC          = 0x00,
         NVME_SGL_FMT_SEG_DESC           = 0x02,
         NVME_SGL_FMT_LAST_SEG_DESC      = 0x03,
         NVME_KEY_SGL_FMT_DATA_DESC      = 0x04,
+       NVME_TRANSPORT_SGL_DATA_DESC    = 0x05,
  };
  
  struct nvme_sgl_desc {
@@ -1127,19 +1133,6 @@ enum {
         NVME_SC_UNWRITTEN_BLOCK         = 0x287,
  
         NVME_SC_DNR                     = 0x4000,
-
-
-       /*
-        * FC Transport-specific error status values for NVME commands
-        *
-        * Transport-specific status code values must be in the range 0xB0..0xBF
-        */
-
-       /* Generic FC failure - catchall */
-       NVME_SC_FC_TRANSPORT_ERROR      = 0x00B0,
-
-       /* I/O failure due to FC ABTS'd */
-       NVME_SC_FC_TRANSPORT_ABORTED    = 0x00B1,
  };
  
  struct nvme_completion {
diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h

index e0d1946..fb908e5 100644 (file)
--- a/include/linux/of_platform.h
+++ b/include/linux/of_platform.h
@@ -57,7 +57,14 @@ extern const struct of_device_id of_default_bus_match_table[];
  extern struct platform_device *of_device_alloc(struct device_node *np,
                                          const char *bus_id,
                                          struct device *parent);
+#ifdef CONFIG_OF
  extern struct platform_device *of_find_device_by_node(struct device_node *np);
+#else
+static inline struct platform_device *of_find_device_by_node(struct device_node *np)
+{
+       return NULL;
+}
+#endif
  
  /* Platform devices and busses creation */
  extern struct platform_device *of_platform_device_create(struct device_node *np,
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h

index ecc296c..c8bef43 100644 (file)
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -3,7 +3,8 @@
  
  #include <uapi/linux/seccomp.h>
  
-#define SECCOMP_FILTER_FLAG_MASK       (SECCOMP_FILTER_FLAG_TSYNC)
+#define SECCOMP_FILTER_FLAG_MASK       (SECCOMP_FILTER_FLAG_TSYNC | \
+                                        SECCOMP_FILTER_FLAG_LOG)
  
  #ifdef CONFIG_SECCOMP
  
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h

index 95606a2..a78186d 100644 (file)
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -221,21 +221,25 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event)
         }                                                               \
         static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__))
  
-#ifdef TIF_FSCHECK
  /*
   * Called before coming back to user-mode. Returning to user-mode with an
   * address limit different than USER_DS can allow to overwrite kernel memory.
   */
  static inline void addr_limit_user_check(void)
  {
-
+#ifdef TIF_FSCHECK
         if (!test_thread_flag(TIF_FSCHECK))
                 return;
+#endif
  
-       BUG_ON(!segment_eq(get_fs(), USER_DS));
+       if (CHECK_DATA_CORRUPTION(!segment_eq(get_fs(), USER_DS),
+                                 "Invalid address limit on user-mode return"))
+               force_sig(SIGKILL, current);
+
+#ifdef TIF_FSCHECK
         clear_thread_flag(TIF_FSCHECK);
-}
  #endif
+}
  
  asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special,
                                qid_t id, void __user *addr);
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h

index 7f11050..2e0f222 100644 (file)
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -272,6 +272,7 @@ struct trace_event_call {
         int                             perf_refcount;
         struct hlist_head __percpu      *perf_events;
         struct bpf_prog                 *prog;
+       struct perf_event               *bpf_prog_owner;
  
         int     (*perf_perm)(struct trace_event_call *,
                              struct perf_event *);
diff --git a/include/net/dst.h b/include/net/dst.h

index 93568bd..06a6765 100644 (file)
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -271,7 +271,7 @@ static inline void dst_use_noref(struct dst_entry *dst, unsigned long time)
  static inline struct dst_entry *dst_clone(struct dst_entry *dst)
  {
         if (dst)
-               atomic_inc(&dst->__refcnt);
+               dst_hold(dst);
         return dst;
  }
  
@@ -311,21 +311,6 @@ static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb
         __skb_dst_copy(nskb, oskb->_skb_refdst);
  }
  
-/**
- * skb_dst_force - makes sure skb dst is refcounted
- * @skb: buffer
- *
- * If dst is not yet refcounted, let's do it
- */
-static inline void skb_dst_force(struct sk_buff *skb)
-{
-       if (skb_dst_is_noref(skb)) {
-               WARN_ON(!rcu_read_lock_held());
-               skb->_skb_refdst &= ~SKB_DST_NOREF;
-               dst_clone(skb_dst(skb));
-       }
-}
-
  /**
   * dst_hold_safe - Take a reference on a dst if possible
   * @dst: pointer to dst entry
@@ -339,16 +324,17 @@ static inline bool dst_hold_safe(struct dst_entry *dst)
  }
  
  /**
- * skb_dst_force_safe - makes sure skb dst is refcounted
+ * skb_dst_force - makes sure skb dst is refcounted
   * @skb: buffer
   *
   * If dst is not yet refcounted and not destroyed, grab a ref on it.
   */
-static inline void skb_dst_force_safe(struct sk_buff *skb)
+static inline void skb_dst_force(struct sk_buff *skb)
  {
         if (skb_dst_is_noref(skb)) {
                 struct dst_entry *dst = skb_dst(skb);
  
+               WARN_ON(!rcu_read_lock_held());
                 if (!dst_hold_safe(dst))
                         dst = NULL;
  
diff --git a/include/net/route.h b/include/net/route.h

index 1b09a93..57dfc68 100644 (file)
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -190,7 +190,7 @@ static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src,
         rcu_read_lock();
         err = ip_route_input_noref(skb, dst, src, tos, devin);
         if (!err) {
-               skb_dst_force_safe(skb);
+               skb_dst_force(skb);
                 if (!skb_dst(skb))
                         err = -EINVAL;
         }
diff --git a/include/net/sock.h b/include/net/sock.h

index 03a3625..a6b9a8d 100644 (file)
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -856,7 +856,7 @@ void sk_stream_write_space(struct sock *sk);
  static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb)
  {
         /* dont let skb dst not refcounted, we are going to leave rcu lock */
-       skb_dst_force_safe(skb);
+       skb_dst_force(skb);
  
         if (!sk->sk_backlog.tail)
                 sk->sk_backlog.head = skb;
diff --git a/include/net/tcp.h b/include/net/tcp.h

index b510f28..3bc910a 100644 (file)
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -544,7 +544,6 @@ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
                      int min_tso_segs);
  void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
                                int nonagle);
-bool tcp_may_send_now(struct sock *sk);
  int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs);
  int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs);
  void tcp_retransmit_timer(struct sock *sk);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h

index bdb1279..e8608b2 100644 (file)
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -285,7 +285,7 @@ enum ib_tm_cap_flags {
         IB_TM_CAP_RC                = 1 << 0,
  };
  
-struct ib_xrq_caps {
+struct ib_tm_caps {
         /* Max size of RNDV header */
         u32 max_rndv_hdr_size;
         /* Max number of entries in tag matching list */
@@ -358,7 +358,7 @@ struct ib_device_attr {
         struct ib_rss_caps      rss_caps;
         u32                     max_wq_type_rq;
         u32                     raw_packet_caps; /* Use ib_raw_packet_caps enum */
-       struct ib_xrq_caps      xrq_caps;
+       struct ib_tm_caps       tm_caps;
  };
  
  enum ib_mtu {
@@ -1739,7 +1739,7 @@ struct ib_mr {
         u32                lkey;
         u32                rkey;
         u64                iova;
-       u32                length;
+       u64                length;
         unsigned int       page_size;
         bool               need_inval;
         union {
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h

index 9c041da..5bd1b1d 100644 (file)
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1753,6 +1753,8 @@ enum ethtool_reset_flags {
   *     %ethtool_link_mode_bit_indices for the link modes, and other
   *     link features that the link partner advertised through
   *     autonegotiation; 0 if unknown or not applicable.  Read-only.
+ * @transceiver: Used to distinguish different possible PHY types,
+ *     reported consistently by PHYLIB.  Read-only.
   *
   * If autonegotiation is disabled, the speed and @duplex represent the
   * fixed link mode and are writable if the driver supports multiple
@@ -1804,7 +1806,9 @@ struct ethtool_link_settings {
         __u8    eth_tp_mdix;
         __u8    eth_tp_mdix_ctrl;
         __s8    link_mode_masks_nwords;
-       __u32   reserved[8];
+       __u8    transceiver;
+       __u8    reserved1[3];
+       __u32   reserved[7];
         __u32   link_mode_masks[0];
         /* layout of link_mode_masks fields:
          * __u32 map_supported[link_mode_masks_nwords];
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h

index 7b4567b..26283fe 100644 (file)
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -23,15 +23,15 @@
  #ifndef KFD_IOCTL_H_INCLUDED
  #define KFD_IOCTL_H_INCLUDED
  
-#include <linux/types.h>
+#include <drm/drm.h>
  #include <linux/ioctl.h>
  
  #define KFD_IOCTL_MAJOR_VERSION 1
  #define KFD_IOCTL_MINOR_VERSION 1
  
  struct kfd_ioctl_get_version_args {
-       uint32_t major_version; /* from KFD */
-       uint32_t minor_version; /* from KFD */
+       __u32 major_version;    /* from KFD */
+       __u32 minor_version;    /* from KFD */
  };
  
  /* For kfd_ioctl_create_queue_args.queue_type. */
@@ -43,36 +43,36 @@ struct kfd_ioctl_get_version_args {
  #define KFD_MAX_QUEUE_PRIORITY         15
  
  struct kfd_ioctl_create_queue_args {
-       uint64_t ring_base_address;     /* to KFD */
-       uint64_t write_pointer_address; /* from KFD */
-       uint64_t read_pointer_address;  /* from KFD */
-       uint64_t doorbell_offset;       /* from KFD */
-
-       uint32_t ring_size;             /* to KFD */
-       uint32_t gpu_id;                /* to KFD */
-       uint32_t queue_type;            /* to KFD */
-       uint32_t queue_percentage;      /* to KFD */
-       uint32_t queue_priority;        /* to KFD */
-       uint32_t queue_id;              /* from KFD */
-
-       uint64_t eop_buffer_address;    /* to KFD */
-       uint64_t eop_buffer_size;       /* to KFD */
-       uint64_t ctx_save_restore_address; /* to KFD */
-       uint64_t ctx_save_restore_size; /* to KFD */
+       __u64 ring_base_address;        /* to KFD */
+       __u64 write_pointer_address;    /* from KFD */
+       __u64 read_pointer_address;     /* from KFD */
+       __u64 doorbell_offset;  /* from KFD */
+
+       __u32 ring_size;                /* to KFD */
+       __u32 gpu_id;           /* to KFD */
+       __u32 queue_type;               /* to KFD */
+       __u32 queue_percentage; /* to KFD */
+       __u32 queue_priority;   /* to KFD */
+       __u32 queue_id;         /* from KFD */
+
+       __u64 eop_buffer_address;       /* to KFD */
+       __u64 eop_buffer_size;  /* to KFD */
+       __u64 ctx_save_restore_address; /* to KFD */
+       __u64 ctx_save_restore_size;    /* to KFD */
  };
  
  struct kfd_ioctl_destroy_queue_args {
-       uint32_t queue_id;              /* to KFD */
-       uint32_t pad;
+       __u32 queue_id;         /* to KFD */
+       __u32 pad;
  };
  
  struct kfd_ioctl_update_queue_args {
-       uint64_t ring_base_address;     /* to KFD */
+       __u64 ring_base_address;        /* to KFD */
  
-       uint32_t queue_id;              /* to KFD */
-       uint32_t ring_size;             /* to KFD */
-       uint32_t queue_percentage;      /* to KFD */
-       uint32_t queue_priority;        /* to KFD */
+       __u32 queue_id;         /* to KFD */
+       __u32 ring_size;                /* to KFD */
+       __u32 queue_percentage; /* to KFD */
+       __u32 queue_priority;   /* to KFD */
  };
  
  /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */
@@ -80,13 +80,13 @@ struct kfd_ioctl_update_queue_args {
  #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1
  
  struct kfd_ioctl_set_memory_policy_args {
-       uint64_t alternate_aperture_base;       /* to KFD */
-       uint64_t alternate_aperture_size;       /* to KFD */
+       __u64 alternate_aperture_base;  /* to KFD */
+       __u64 alternate_aperture_size;  /* to KFD */
  
-       uint32_t gpu_id;                        /* to KFD */
-       uint32_t default_policy;                /* to KFD */
-       uint32_t alternate_policy;              /* to KFD */
-       uint32_t pad;
+       __u32 gpu_id;                   /* to KFD */
+       __u32 default_policy;           /* to KFD */
+       __u32 alternate_policy;         /* to KFD */
+       __u32 pad;
  };
  
  /*
@@ -97,26 +97,26 @@ struct kfd_ioctl_set_memory_policy_args {
   */
  
  struct kfd_ioctl_get_clock_counters_args {
-       uint64_t gpu_clock_counter;     /* from KFD */
-       uint64_t cpu_clock_counter;     /* from KFD */
-       uint64_t system_clock_counter;  /* from KFD */
-       uint64_t system_clock_freq;     /* from KFD */
+       __u64 gpu_clock_counter;        /* from KFD */
+       __u64 cpu_clock_counter;        /* from KFD */
+       __u64 system_clock_counter;     /* from KFD */
+       __u64 system_clock_freq;        /* from KFD */
  
-       uint32_t gpu_id;                /* to KFD */
-       uint32_t pad;
+       __u32 gpu_id;           /* to KFD */
+       __u32 pad;
  };
  
  #define NUM_OF_SUPPORTED_GPUS 7
  
  struct kfd_process_device_apertures {
-       uint64_t lds_base;              /* from KFD */
-       uint64_t lds_limit;             /* from KFD */
-       uint64_t scratch_base;          /* from KFD */
-       uint64_t scratch_limit;         /* from KFD */
-       uint64_t gpuvm_base;            /* from KFD */
-       uint64_t gpuvm_limit;           /* from KFD */
-       uint32_t gpu_id;                /* from KFD */
-       uint32_t pad;
+       __u64 lds_base;         /* from KFD */
+       __u64 lds_limit;                /* from KFD */
+       __u64 scratch_base;             /* from KFD */
+       __u64 scratch_limit;            /* from KFD */
+       __u64 gpuvm_base;               /* from KFD */
+       __u64 gpuvm_limit;              /* from KFD */
+       __u32 gpu_id;           /* from KFD */
+       __u32 pad;
  };
  
  struct kfd_ioctl_get_process_apertures_args {
@@ -124,8 +124,8 @@ struct kfd_ioctl_get_process_apertures_args {
                         process_apertures[NUM_OF_SUPPORTED_GPUS];/* from KFD */
  
         /* from KFD, should be in the range [1 - NUM_OF_SUPPORTED_GPUS] */
-       uint32_t num_of_nodes;
-       uint32_t pad;
+       __u32 num_of_nodes;
+       __u32 pad;
  };
  
  #define MAX_ALLOWED_NUM_POINTS    100
@@ -133,25 +133,25 @@ struct kfd_ioctl_get_process_apertures_args {
  #define MAX_ALLOWED_WAC_BUFF_SIZE  128
  
  struct kfd_ioctl_dbg_register_args {
-       uint32_t gpu_id;                /* to KFD */
-       uint32_t pad;
+       __u32 gpu_id;           /* to KFD */
+       __u32 pad;
  };
  
  struct kfd_ioctl_dbg_unregister_args {
-       uint32_t gpu_id;                /* to KFD */
-       uint32_t pad;
+       __u32 gpu_id;           /* to KFD */
+       __u32 pad;
  };
  
  struct kfd_ioctl_dbg_address_watch_args {
-       uint64_t content_ptr;           /* a pointer to the actual content */
-       uint32_t gpu_id;                /* to KFD */
-       uint32_t buf_size_in_bytes;     /*including gpu_id and buf_size */
+       __u64 content_ptr;              /* a pointer to the actual content */
+       __u32 gpu_id;           /* to KFD */
+       __u32 buf_size_in_bytes;        /*including gpu_id and buf_size */
  };
  
  struct kfd_ioctl_dbg_wave_control_args {
-       uint64_t content_ptr;           /* a pointer to the actual content */
-       uint32_t gpu_id;                /* to KFD */
-       uint32_t buf_size_in_bytes;     /*including gpu_id and buf_size */
+       __u64 content_ptr;              /* a pointer to the actual content */
+       __u32 gpu_id;           /* to KFD */
+       __u32 buf_size_in_bytes;        /*including gpu_id and buf_size */
  };
  
  /* Matching HSA_EVENTTYPE */
@@ -172,44 +172,44 @@ struct kfd_ioctl_dbg_wave_control_args {
  #define KFD_SIGNAL_EVENT_LIMIT                 256
  
  struct kfd_ioctl_create_event_args {
-       uint64_t event_page_offset;     /* from KFD */
-       uint32_t event_trigger_data;    /* from KFD - signal events only */
-       uint32_t event_type;            /* to KFD */
-       uint32_t auto_reset;            /* to KFD */
-       uint32_t node_id;               /* to KFD - only valid for certain
+       __u64 event_page_offset;        /* from KFD */
+       __u32 event_trigger_data;       /* from KFD - signal events only */
+       __u32 event_type;               /* to KFD */
+       __u32 auto_reset;               /* to KFD */
+       __u32 node_id;          /* to KFD - only valid for certain
                                                         event types */
-       uint32_t event_id;              /* from KFD */
-       uint32_t event_slot_index;      /* from KFD */
+       __u32 event_id;         /* from KFD */
+       __u32 event_slot_index; /* from KFD */
  };
  
  struct kfd_ioctl_destroy_event_args {
-       uint32_t event_id;              /* to KFD */
-       uint32_t pad;
+       __u32 event_id;         /* to KFD */
+       __u32 pad;
  };
  
  struct kfd_ioctl_set_event_args {
-       uint32_t event_id;              /* to KFD */
-       uint32_t pad;
+       __u32 event_id;         /* to KFD */
+       __u32 pad;
  };
  
  struct kfd_ioctl_reset_event_args {
-       uint32_t event_id;              /* to KFD */
-       uint32_t pad;
+       __u32 event_id;         /* to KFD */
+       __u32 pad;
  };
  
  struct kfd_memory_exception_failure {
-       uint32_t NotPresent;    /* Page not present or supervisor privilege */
-       uint32_t ReadOnly;      /* Write access to a read-only page */
-       uint32_t NoExecute;     /* Execute access to a page marked NX */
-       uint32_t pad;
+       __u32 NotPresent;       /* Page not present or supervisor privilege */
+       __u32 ReadOnly; /* Write access to a read-only page */
+       __u32 NoExecute;        /* Execute access to a page marked NX */
+       __u32 pad;
  };
  
  /* memory exception data*/
  struct kfd_hsa_memory_exception_data {
         struct kfd_memory_exception_failure failure;
-       uint64_t va;
-       uint32_t gpu_id;
-       uint32_t pad;
+       __u64 va;
+       __u32 gpu_id;
+       __u32 pad;
  };
  
  /* Event data*/
@@ -217,19 +217,19 @@ struct kfd_event_data {
         union {
                 struct kfd_hsa_memory_exception_data memory_exception_data;
         };                              /* From KFD */
-       uint64_t kfd_event_data_ext;    /* pointer to an extension structure
+       __u64 kfd_event_data_ext;       /* pointer to an extension structure
                                            for future exception types */
-       uint32_t event_id;              /* to KFD */
-       uint32_t pad;
+       __u32 event_id;         /* to KFD */
+       __u32 pad;
  };
  
  struct kfd_ioctl_wait_events_args {
-       uint64_t events_ptr;            /* pointed to struct
+       __u64 events_ptr;               /* pointed to struct
                                            kfd_event_data array, to KFD */
-       uint32_t num_events;            /* to KFD */
-       uint32_t wait_for_all;          /* to KFD */
-       uint32_t timeout;               /* to KFD */
-       uint32_t wait_result;           /* from KFD */
+       __u32 num_events;               /* to KFD */
+       __u32 wait_for_all;             /* to KFD */
+       __u32 timeout;          /* to KFD */
+       __u32 wait_result;              /* from KFD */
  };
  
  struct kfd_ioctl_set_scratch_backing_va_args {
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h

index 0f238a4..f6bc1de 100644 (file)
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -11,27 +11,34 @@
  #define SECCOMP_MODE_FILTER    2 /* uses user-supplied filter. */
  
  /* Valid operations for seccomp syscall. */
-#define SECCOMP_SET_MODE_STRICT        0
-#define SECCOMP_SET_MODE_FILTER        1
+#define SECCOMP_SET_MODE_STRICT                0
+#define SECCOMP_SET_MODE_FILTER                1
+#define SECCOMP_GET_ACTION_AVAIL       2
  
  /* Valid flags for SECCOMP_SET_MODE_FILTER */
  #define SECCOMP_FILTER_FLAG_TSYNC      1
+#define SECCOMP_FILTER_FLAG_LOG                2
  
  /*
   * All BPF programs must return a 32-bit value.
   * The bottom 16-bits are for optional return data.
- * The upper 16-bits are ordered from least permissive values to most.
+ * The upper 16-bits are ordered from least permissive values to most,
+ * as a signed value (so 0x8000000 is negative).
   *
   * The ordering ensures that a min_t() over composed return values always
   * selects the least permissive choice.
   */
-#define SECCOMP_RET_KILL       0x00000000U /* kill the task immediately */
-#define SECCOMP_RET_TRAP       0x00030000U /* disallow and force a SIGSYS */
-#define SECCOMP_RET_ERRNO      0x00050000U /* returns an errno */
-#define SECCOMP_RET_TRACE      0x7ff00000U /* pass to a tracer or disallow */
-#define SECCOMP_RET_ALLOW      0x7fff0000U /* allow */
+#define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */
+#define SECCOMP_RET_KILL_THREAD         0x00000000U /* kill the thread */
+#define SECCOMP_RET_KILL        SECCOMP_RET_KILL_THREAD
+#define SECCOMP_RET_TRAP        0x00030000U /* disallow and force a SIGSYS */
+#define SECCOMP_RET_ERRNO       0x00050000U /* returns an errno */
+#define SECCOMP_RET_TRACE       0x7ff00000U /* pass to a tracer or disallow */
+#define SECCOMP_RET_LOG                 0x7ffc0000U /* allow after logging */
+#define SECCOMP_RET_ALLOW       0x7fff0000U /* allow */
  
  /* Masks for the return value sections. */
+#define SECCOMP_RET_ACTION_FULL        0xffff0000U
  #define SECCOMP_RET_ACTION     0x7fff0000U
  #define SECCOMP_RET_DATA       0x0000ffffU
  
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h

index 9a0b647..d4e0b53 100644 (file)
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -261,7 +261,7 @@ struct ib_uverbs_ex_query_device_resp {
         struct ib_uverbs_rss_caps rss_caps;
         __u32  max_wq_type_rq;
         __u32 raw_packet_caps;
-       struct ib_uverbs_tm_caps xrq_caps;
+       struct ib_uverbs_tm_caps tm_caps;
  };
  
  struct ib_uverbs_query_port {
diff --git a/include/xen/arm/page.h b/include/xen/arm/page.h

index 415dbc6..6adc2a9 100644 (file)
--- a/include/xen/arm/page.h
+++ b/include/xen/arm/page.h
@@ -84,16 +84,6 @@ static inline xmaddr_t arbitrary_virt_to_machine(void *vaddr)
         BUG();
  }
  
-/* TODO: this shouldn't be here but it is because the frontend drivers
- * are using it (its rolled in headers) even though we won't hit the code path.
- * So for right now just punt with this.
- */
-static inline pte_t *lookup_address(unsigned long address, unsigned int *level)
-{
-       BUG();
-       return NULL;
-}
-
  extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
                                    struct gnttab_map_grant_ref *kmap_ops,
                                    struct page **pages, unsigned int count);
diff --git a/ipc/shm.c b/ipc/shm.c

index 1b3adfe..badac46 100644 (file)
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -1154,7 +1154,7 @@ static int put_compat_shm_info(struct shm_info *ip,
         info.shm_swp = ip->shm_swp;
         info.swap_attempts = ip->swap_attempts;
         info.swap_successes = ip->swap_successes;
-       return copy_to_user(up, &info, sizeof(info));
+       return copy_to_user(uip, &info, sizeof(info));
  }
  
  static int copy_compat_shmid_to_user(void __user *buf, struct shmid64_ds *in,
@@ -1237,7 +1237,7 @@ COMPAT_SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, void __user *, uptr)
                 err = shmctl_stat(ns, shmid, cmd, &sem64);
                 if (err < 0)
                         return err;
-               if (copy_compat_shmid_to_user(&sem64, uptr, version))
+               if (copy_compat_shmid_to_user(uptr, &sem64, version))
                         err = -EFAULT;
                 return err;
  
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c

index 959c9a0..e093d9a 100644 (file)
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -75,8 +75,8 @@ static u64 dev_map_bitmap_size(const union bpf_attr *attr)
  static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
  {
         struct bpf_dtab *dtab;
+       int err = -EINVAL;
         u64 cost;
-       int err;
  
         /* check sanity of attributes */
         if (attr->max_entries == 0 || attr->key_size != 4 ||
@@ -108,6 +108,8 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
         if (err)
                 goto free_dtab;
  
+       err = -ENOMEM;
+
         /* A per cpu bitfield with a bit per possible net device */
         dtab->flush_needed = __alloc_percpu(dev_map_bitmap_size(attr),
                                             __alignof__(unsigned long));
@@ -128,7 +130,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
  free_dtab:
         free_percpu(dtab->flush_needed);
         kfree(dtab);
-       return ERR_PTR(-ENOMEM);
+       return ERR_PTR(err);
  }
  
  static void dev_map_free(struct bpf_map *map)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c

index cb17e1c..25d0749 100644 (file)
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -186,15 +186,17 @@ static int bpf_map_alloc_id(struct bpf_map *map)
  
  static void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
  {
+       unsigned long flags;
+
         if (do_idr_lock)
-               spin_lock_bh(&map_idr_lock);
+               spin_lock_irqsave(&map_idr_lock, flags);
         else
                 __acquire(&map_idr_lock);
  
         idr_remove(&map_idr, map->id);
  
         if (do_idr_lock)
-               spin_unlock_bh(&map_idr_lock);
+               spin_unlock_irqrestore(&map_idr_lock, flags);
         else
                 __release(&map_idr_lock);
  }
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c

index 799b245..b914fbe 100644 (file)
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4205,7 +4205,12 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
                 }
  
                 if (insn->imm == BPF_FUNC_redirect_map) {
-                       u64 addr = (unsigned long)prog;
+                       /* Note, we cannot use prog directly as imm as subsequent
+                        * rewrites would still change the prog pointer. The only
+                        * stable address we can use is aux, which also works with
+                        * prog clones during blinding.
+                        */
+                       u64 addr = (unsigned long)prog->aux;
                         struct bpf_insn r4_ld[] = {
                                 BPF_LD_IMM64(BPF_REG_4, addr),
                                 *insn,
diff --git a/kernel/events/core.c b/kernel/events/core.c

index 3e691b7..6bc21e2 100644 (file)
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8171,6 +8171,7 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
                 }
         }
         event->tp_event->prog = prog;
+       event->tp_event->bpf_prog_owner = event;
  
         return 0;
  }
@@ -8185,7 +8186,7 @@ static void perf_event_free_bpf_prog(struct perf_event *event)
                 return;
  
         prog = event->tp_event->prog;
-       if (prog) {
+       if (prog && event->tp_event->bpf_prog_owner == event) {
                 event->tp_event->prog = NULL;
                 bpf_prog_put(prog);
         }
diff --git a/kernel/extable.c b/kernel/extable.c

index 38c2412..9aa1cc4 100644 (file)
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -102,15 +102,7 @@ int core_kernel_data(unsigned long addr)
  
  int __kernel_text_address(unsigned long addr)
  {
-       if (core_kernel_text(addr))
-               return 1;
-       if (is_module_text_address(addr))
-               return 1;
-       if (is_ftrace_trampoline(addr))
-               return 1;
-       if (is_kprobe_optinsn_slot(addr) || is_kprobe_insn_slot(addr))
-               return 1;
-       if (is_bpf_text_address(addr))
+       if (kernel_text_address(addr))
                 return 1;
         /*
          * There might be init symbols in saved stacktraces.
@@ -127,17 +119,42 @@ int __kernel_text_address(unsigned long addr)
  
  int kernel_text_address(unsigned long addr)
  {
+       bool no_rcu;
+       int ret = 1;
+
         if (core_kernel_text(addr))
                 return 1;
+
+       /*
+        * If a stack dump happens while RCU is not watching, then
+        * RCU needs to be notified that it requires to start
+        * watching again. This can happen either by tracing that
+        * triggers a stack trace, or a WARN() that happens during
+        * coming back from idle, or cpu on or offlining.
+        *
+        * is_module_text_address() as well as the kprobe slots
+        * and is_bpf_text_address() require RCU to be watching.
+        */
+       no_rcu = !rcu_is_watching();
+
+       /* Treat this like an NMI as it can happen anywhere */
+       if (no_rcu)
+               rcu_nmi_enter();
+
         if (is_module_text_address(addr))
-               return 1;
+               goto out;
         if (is_ftrace_trampoline(addr))
-               return 1;
+               goto out;
         if (is_kprobe_optinsn_slot(addr) || is_kprobe_insn_slot(addr))
-               return 1;
+               goto out;
         if (is_bpf_text_address(addr))
-               return 1;
-       return 0;
+               goto out;
+       ret = 0;
+out:
+       if (no_rcu)
+               rcu_nmi_exit();
+
+       return ret;
  }
  
  /*
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c

index f51b7b6..6fc89fd 100644 (file)
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -202,7 +202,7 @@ __irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force)
  
         irqd_clr_managed_shutdown(d);
  
-       if (cpumask_any_and(aff, cpu_online_mask) > nr_cpu_ids) {
+       if (cpumask_any_and(aff, cpu_online_mask) >= nr_cpu_ids) {
                 /*
                  * Catch code which fiddles with enable_irq() on a managed
                  * and potentially shutdown IRQ. Chained interrupt
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c

index 1250e4b..0c44c7b 100644 (file)
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -882,6 +882,11 @@ void rcu_irq_exit(void)
  
         RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_exit() invoked with irqs enabled!!!");
         rdtp = this_cpu_ptr(&rcu_dynticks);
+
+       /* Page faults can happen in NMI handlers, so check... */
+       if (READ_ONCE(rdtp->dynticks_nmi_nesting))
+               return;
+
         WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
                      rdtp->dynticks_nesting < 1);
         if (rdtp->dynticks_nesting <= 1) {
@@ -1015,6 +1020,11 @@ void rcu_irq_enter(void)
  
         RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_enter() invoked with irqs enabled!!!");
         rdtp = this_cpu_ptr(&rcu_dynticks);
+
+       /* Page faults can happen in NMI handlers, so check... */
+       if (READ_ONCE(rdtp->dynticks_nmi_nesting))
+               return;
+
         oldval = rdtp->dynticks_nesting;
         rdtp->dynticks_nesting++;
         WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
diff --git a/kernel/seccomp.c b/kernel/seccomp.c

index 98b59b5..bb3a380 100644 (file)
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -17,11 +17,13 @@
  #include <linux/audit.h>
  #include <linux/compat.h>
  #include <linux/coredump.h>
+#include <linux/kmemleak.h>
  #include <linux/sched.h>
  #include <linux/sched/task_stack.h>
  #include <linux/seccomp.h>
  #include <linux/slab.h>
  #include <linux/syscalls.h>
+#include <linux/sysctl.h>
  
  #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
  #include <asm/syscall.h>
@@ -42,6 +44,7 @@
   *         get/put helpers should be used when accessing an instance
   *         outside of a lifetime-guarded section.  In general, this
   *         is only needed for handling filters shared across tasks.
+ * @log: true if all actions except for SECCOMP_RET_ALLOW should be logged
   * @prev: points to a previously installed, or inherited, filter
   * @prog: the BPF program to evaluate
   *
@@ -57,6 +60,7 @@
   */
  struct seccomp_filter {
         refcount_t usage;
+       bool log;
         struct seccomp_filter *prev;
         struct bpf_prog *prog;
  };
@@ -171,10 +175,15 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
  /**
   * seccomp_run_filters - evaluates all seccomp filters against @sd
   * @sd: optional seccomp data to be passed to filters
+ * @match: stores struct seccomp_filter that resulted in the return value,
+ *         unless filter returned SECCOMP_RET_ALLOW, in which case it will
+ *         be unchanged.
   *
   * Returns valid seccomp BPF response codes.
   */
-static u32 seccomp_run_filters(const struct seccomp_data *sd)
+#define ACTION_ONLY(ret) ((s32)((ret) & (SECCOMP_RET_ACTION_FULL)))
+static u32 seccomp_run_filters(const struct seccomp_data *sd,
+                              struct seccomp_filter **match)
  {
         struct seccomp_data sd_local;
         u32 ret = SECCOMP_RET_ALLOW;
@@ -184,7 +193,7 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd)
  
         /* Ensure unexpected behavior doesn't result in failing open. */
         if (unlikely(WARN_ON(f == NULL)))
-               return SECCOMP_RET_KILL;
+               return SECCOMP_RET_KILL_PROCESS;
  
         if (!sd) {
                 populate_seccomp_data(&sd_local);
@@ -198,8 +207,10 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd)
         for (; f; f = f->prev) {
                 u32 cur_ret = BPF_PROG_RUN(f->prog, sd);
  
-               if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
+               if (ACTION_ONLY(cur_ret) < ACTION_ONLY(ret)) {
                         ret = cur_ret;
+                       *match = f;
+               }
         }
         return ret;
  }
@@ -444,6 +455,10 @@ static long seccomp_attach_filter(unsigned int flags,
                         return ret;
         }
  
+       /* Set log flag, if present. */
+       if (flags & SECCOMP_FILTER_FLAG_LOG)
+               filter->log = true;
+
         /*
          * If there is an existing filter, make it the prev and don't drop its
          * task reference.
@@ -458,14 +473,19 @@ static long seccomp_attach_filter(unsigned int flags,
         return 0;
  }
  
+void __get_seccomp_filter(struct seccomp_filter *filter)
+{
+       /* Reference count is bounded by the number of total processes. */
+       refcount_inc(&filter->usage);
+}
+
  /* get_seccomp_filter - increments the reference count of the filter on @tsk */
  void get_seccomp_filter(struct task_struct *tsk)
  {
         struct seccomp_filter *orig = tsk->seccomp.filter;
         if (!orig)
                 return;
-       /* Reference count is bounded by the number of total processes. */
-       refcount_inc(&orig->usage);
+       __get_seccomp_filter(orig);
  }
  
  static inline void seccomp_filter_free(struct seccomp_filter *filter)
@@ -476,10 +496,8 @@ static inline void seccomp_filter_free(struct seccomp_filter *filter)
         }
  }
  
-/* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */
-void put_seccomp_filter(struct task_struct *tsk)
+static void __put_seccomp_filter(struct seccomp_filter *orig)
  {
-       struct seccomp_filter *orig = tsk->seccomp.filter;
         /* Clean up single-reference branches iteratively. */
         while (orig && refcount_dec_and_test(&orig->usage)) {
                 struct seccomp_filter *freeme = orig;
@@ -488,6 +506,12 @@ void put_seccomp_filter(struct task_struct *tsk)
         }
  }
  
+/* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */
+void put_seccomp_filter(struct task_struct *tsk)
+{
+       __put_seccomp_filter(tsk->seccomp.filter);
+}
+
  static void seccomp_init_siginfo(siginfo_t *info, int syscall, int reason)
  {
         memset(info, 0, sizeof(*info));
@@ -514,6 +538,65 @@ static void seccomp_send_sigsys(int syscall, int reason)
  }
  #endif /* CONFIG_SECCOMP_FILTER */
  
+/* For use with seccomp_actions_logged */
+#define SECCOMP_LOG_KILL_PROCESS       (1 << 0)
+#define SECCOMP_LOG_KILL_THREAD                (1 << 1)
+#define SECCOMP_LOG_TRAP               (1 << 2)
+#define SECCOMP_LOG_ERRNO              (1 << 3)
+#define SECCOMP_LOG_TRACE              (1 << 4)
+#define SECCOMP_LOG_LOG                        (1 << 5)
+#define SECCOMP_LOG_ALLOW              (1 << 6)
+
+static u32 seccomp_actions_logged = SECCOMP_LOG_KILL_PROCESS |
+                                   SECCOMP_LOG_KILL_THREAD  |
+                                   SECCOMP_LOG_TRAP  |
+                                   SECCOMP_LOG_ERRNO |
+                                   SECCOMP_LOG_TRACE |
+                                   SECCOMP_LOG_LOG;
+
+static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
+                              bool requested)
+{
+       bool log = false;
+
+       switch (action) {
+       case SECCOMP_RET_ALLOW:
+               break;
+       case SECCOMP_RET_TRAP:
+               log = requested && seccomp_actions_logged & SECCOMP_LOG_TRAP;
+               break;
+       case SECCOMP_RET_ERRNO:
+               log = requested && seccomp_actions_logged & SECCOMP_LOG_ERRNO;
+               break;
+       case SECCOMP_RET_TRACE:
+               log = requested && seccomp_actions_logged & SECCOMP_LOG_TRACE;
+               break;
+       case SECCOMP_RET_LOG:
+               log = seccomp_actions_logged & SECCOMP_LOG_LOG;
+               break;
+       case SECCOMP_RET_KILL_THREAD:
+               log = seccomp_actions_logged & SECCOMP_LOG_KILL_THREAD;
+               break;
+       case SECCOMP_RET_KILL_PROCESS:
+       default:
+               log = seccomp_actions_logged & SECCOMP_LOG_KILL_PROCESS;
+       }
+
+       /*
+        * Force an audit message to be emitted when the action is RET_KILL_*,
+        * RET_LOG, or the FILTER_FLAG_LOG bit was set and the action is
+        * allowed to be logged by the admin.
+        */
+       if (log)
+               return __audit_seccomp(syscall, signr, action);
+
+       /*
+        * Let the audit subsystem decide if the action should be audited based
+        * on whether the current task itself is being audited.
+        */
+       return audit_seccomp(syscall, signr, action);
+}
+
  /*
   * Secure computing mode 1 allows only read/write/exit/sigreturn.
   * To be fully secure this must be combined with rlimit
@@ -539,7 +622,7 @@ static void __secure_computing_strict(int this_syscall)
  #ifdef SECCOMP_DEBUG
         dump_stack();
  #endif
-       audit_seccomp(this_syscall, SIGKILL, SECCOMP_RET_KILL);
+       seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL_THREAD, true);
         do_exit(SIGKILL);
  }
  
@@ -566,6 +649,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
                             const bool recheck_after_trace)
  {
         u32 filter_ret, action;
+       struct seccomp_filter *match = NULL;
         int data;
  
         /*
@@ -574,9 +658,9 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
          */
         rmb();
  
-       filter_ret = seccomp_run_filters(sd);
+       filter_ret = seccomp_run_filters(sd, &match);
         data = filter_ret & SECCOMP_RET_DATA;
-       action = filter_ret & SECCOMP_RET_ACTION;
+       action = filter_ret & SECCOMP_RET_ACTION_FULL;
  
         switch (action) {
         case SECCOMP_RET_ERRNO:
@@ -637,14 +721,25 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
  
                 return 0;
  
+       case SECCOMP_RET_LOG:
+               seccomp_log(this_syscall, 0, action, true);
+               return 0;
+
         case SECCOMP_RET_ALLOW:
+               /*
+                * Note that the "match" filter will always be NULL for
+                * this action since SECCOMP_RET_ALLOW is the starting
+                * state in seccomp_run_filters().
+                */
                 return 0;
  
-       case SECCOMP_RET_KILL:
+       case SECCOMP_RET_KILL_THREAD:
+       case SECCOMP_RET_KILL_PROCESS:
         default:
-               audit_seccomp(this_syscall, SIGSYS, action);
+               seccomp_log(this_syscall, SIGSYS, action, true);
                 /* Dump core only if this is the last remaining thread. */
-               if (get_nr_threads(current) == 1) {
+               if (action == SECCOMP_RET_KILL_PROCESS ||
+                   get_nr_threads(current) == 1) {
                         siginfo_t info;
  
                         /* Show the original registers in the dump. */
@@ -653,13 +748,16 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
                         seccomp_init_siginfo(&info, this_syscall, data);
                         do_coredump(&info);
                 }
-               do_exit(SIGSYS);
+               if (action == SECCOMP_RET_KILL_PROCESS)
+                       do_group_exit(SIGSYS);
+               else
+                       do_exit(SIGSYS);
         }
  
         unreachable();
  
  skip:
-       audit_seccomp(this_syscall, 0, action);
+       seccomp_log(this_syscall, 0, action, match ? match->log : false);
         return -1;
  }
  #else
@@ -794,6 +892,29 @@ static inline long seccomp_set_mode_filter(unsigned int flags,
  }
  #endif
  
+static long seccomp_get_action_avail(const char __user *uaction)
+{
+       u32 action;
+
+       if (copy_from_user(&action, uaction, sizeof(action)))
+               return -EFAULT;
+
+       switch (action) {
+       case SECCOMP_RET_KILL_PROCESS:
+       case SECCOMP_RET_KILL_THREAD:
+       case SECCOMP_RET_TRAP:
+       case SECCOMP_RET_ERRNO:
+       case SECCOMP_RET_TRACE:
+       case SECCOMP_RET_LOG:
+       case SECCOMP_RET_ALLOW:
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       return 0;
+}
+
  /* Common entry point for both prctl and syscall. */
  static long do_seccomp(unsigned int op, unsigned int flags,
                        const char __user *uargs)
@@ -805,6 +926,11 @@ static long do_seccomp(unsigned int op, unsigned int flags,
                 return seccomp_set_mode_strict();
         case SECCOMP_SET_MODE_FILTER:
                 return seccomp_set_mode_filter(flags, uargs);
+       case SECCOMP_GET_ACTION_AVAIL:
+               if (flags != 0)
+                       return -EINVAL;
+
+               return seccomp_get_action_avail(uargs);
         default:
                 return -EINVAL;
         }
@@ -908,13 +1034,13 @@ long seccomp_get_filter(struct task_struct *task, unsigned long filter_off,
         if (!data)
                 goto out;
  
-       get_seccomp_filter(task);
+       __get_seccomp_filter(filter);
         spin_unlock_irq(&task->sighand->siglock);
  
         if (copy_to_user(data, fprog->filter, bpf_classic_proglen(fprog)))
                 ret = -EFAULT;
  
-       put_seccomp_filter(task);
+       __put_seccomp_filter(filter);
         return ret;
  
  out:
@@ -922,3 +1048,185 @@ out:
         return ret;
  }
  #endif
+
+#ifdef CONFIG_SYSCTL
+
+/* Human readable action names for friendly sysctl interaction */
+#define SECCOMP_RET_KILL_PROCESS_NAME  "kill_process"
+#define SECCOMP_RET_KILL_THREAD_NAME   "kill_thread"
+#define SECCOMP_RET_TRAP_NAME          "trap"
+#define SECCOMP_RET_ERRNO_NAME         "errno"
+#define SECCOMP_RET_TRACE_NAME         "trace"
+#define SECCOMP_RET_LOG_NAME           "log"
+#define SECCOMP_RET_ALLOW_NAME         "allow"
+
+static const char seccomp_actions_avail[] =
+                               SECCOMP_RET_KILL_PROCESS_NAME   " "
+                               SECCOMP_RET_KILL_THREAD_NAME    " "
+                               SECCOMP_RET_TRAP_NAME           " "
+                               SECCOMP_RET_ERRNO_NAME          " "
+                               SECCOMP_RET_TRACE_NAME          " "
+                               SECCOMP_RET_LOG_NAME            " "
+                               SECCOMP_RET_ALLOW_NAME;
+
+struct seccomp_log_name {
+       u32             log;
+       const char      *name;
+};
+
+static const struct seccomp_log_name seccomp_log_names[] = {
+       { SECCOMP_LOG_KILL_PROCESS, SECCOMP_RET_KILL_PROCESS_NAME },
+       { SECCOMP_LOG_KILL_THREAD, SECCOMP_RET_KILL_THREAD_NAME },
+       { SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME },
+       { SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME },
+       { SECCOMP_LOG_TRACE, SECCOMP_RET_TRACE_NAME },
+       { SECCOMP_LOG_LOG, SECCOMP_RET_LOG_NAME },
+       { SECCOMP_LOG_ALLOW, SECCOMP_RET_ALLOW_NAME },
+       { }
+};
+
+static bool seccomp_names_from_actions_logged(char *names, size_t size,
+                                             u32 actions_logged)
+{
+       const struct seccomp_log_name *cur;
+       bool append_space = false;
+
+       for (cur = seccomp_log_names; cur->name && size; cur++) {
+               ssize_t ret;
+
+               if (!(actions_logged & cur->log))
+                       continue;
+
+               if (append_space) {
+                       ret = strscpy(names, " ", size);
+                       if (ret < 0)
+                               return false;
+
+                       names += ret;
+                       size -= ret;
+               } else
+                       append_space = true;
+
+               ret = strscpy(names, cur->name, size);
+               if (ret < 0)
+                       return false;
+
+               names += ret;
+               size -= ret;
+       }
+
+       return true;
+}
+
+static bool seccomp_action_logged_from_name(u32 *action_logged,
+                                           const char *name)
+{
+       const struct seccomp_log_name *cur;
+
+       for (cur = seccomp_log_names; cur->name; cur++) {
+               if (!strcmp(cur->name, name)) {
+                       *action_logged = cur->log;
+                       return true;
+               }
+       }
+
+       return false;
+}
+
+static bool seccomp_actions_logged_from_names(u32 *actions_logged, char *names)
+{
+       char *name;
+
+       *actions_logged = 0;
+       while ((name = strsep(&names, " ")) && *name) {
+               u32 action_logged = 0;
+
+               if (!seccomp_action_logged_from_name(&action_logged, name))
+                       return false;
+
+               *actions_logged |= action_logged;
+       }
+
+       return true;
+}
+
+static int seccomp_actions_logged_handler(struct ctl_table *ro_table, int write,
+                                         void __user *buffer, size_t *lenp,
+                                         loff_t *ppos)
+{
+       char names[sizeof(seccomp_actions_avail)];
+       struct ctl_table table;
+       int ret;
+
+       if (write && !capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       memset(names, 0, sizeof(names));
+
+       if (!write) {
+               if (!seccomp_names_from_actions_logged(names, sizeof(names),
+                                                      seccomp_actions_logged))
+                       return -EINVAL;
+       }
+
+       table = *ro_table;
+       table.data = names;
+       table.maxlen = sizeof(names);
+       ret = proc_dostring(&table, write, buffer, lenp, ppos);
+       if (ret)
+               return ret;
+
+       if (write) {
+               u32 actions_logged;
+
+               if (!seccomp_actions_logged_from_names(&actions_logged,
+                                                      table.data))
+                       return -EINVAL;
+
+               if (actions_logged & SECCOMP_LOG_ALLOW)
+                       return -EINVAL;
+
+               seccomp_actions_logged = actions_logged;
+       }
+
+       return 0;
+}
+
+static struct ctl_path seccomp_sysctl_path[] = {
+       { .procname = "kernel", },
+       { .procname = "seccomp", },
+       { }
+};
+
+static struct ctl_table seccomp_sysctl_table[] = {
+       {
+               .procname       = "actions_avail",
+               .data           = (void *) &seccomp_actions_avail,
+               .maxlen         = sizeof(seccomp_actions_avail),
+               .mode           = 0444,
+               .proc_handler   = proc_dostring,
+       },
+       {
+               .procname       = "actions_logged",
+               .mode           = 0644,
+               .proc_handler   = seccomp_actions_logged_handler,
+       },
+       { }
+};
+
+static int __init seccomp_sysctl_init(void)
+{
+       struct ctl_table_header *hdr;
+
+       hdr = register_sysctl_paths(seccomp_sysctl_path, seccomp_sysctl_table);
+       if (!hdr)
+               pr_warn("seccomp: sysctl registration failed\n");
+       else
+               kmemleak_not_leak(hdr);
+
+       return 0;
+}
+
+device_initcall(seccomp_sysctl_init)
+
+#endif /* CONFIG_SYSCTL */
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c

index 2a685b4..45a3928 100644 (file)
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -648,6 +648,12 @@ int blk_trace_startstop(struct request_queue *q, int start)
  }
  EXPORT_SYMBOL_GPL(blk_trace_startstop);
  
+/*
+ * When reading or writing the blktrace sysfs files, the references to the
+ * opened sysfs or device files should prevent the underlying block device
+ * from being removed. So no further delete protection is really needed.
+ */
+
  /**
   * blk_trace_ioctl: - handle the ioctls associated with tracing
   * @bdev:      the block device
@@ -665,7 +671,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
         if (!q)
                 return -ENXIO;
  
-       mutex_lock(&bdev->bd_mutex);
+       mutex_lock(&q->blk_trace_mutex);
  
         switch (cmd) {
         case BLKTRACESETUP:
@@ -691,7 +697,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
                 break;
         }
  
-       mutex_unlock(&bdev->bd_mutex);
+       mutex_unlock(&q->blk_trace_mutex);
         return ret;
  }
  
@@ -1727,7 +1733,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
         if (q == NULL)
                 goto out_bdput;
  
-       mutex_lock(&bdev->bd_mutex);
+       mutex_lock(&q->blk_trace_mutex);
  
         if (attr == &dev_attr_enable) {
                 ret = sprintf(buf, "%u\n", !!q->blk_trace);
@@ -1746,7 +1752,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
                 ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba);
  
  out_unlock_bdev:
-       mutex_unlock(&bdev->bd_mutex);
+       mutex_unlock(&q->blk_trace_mutex);
  out_bdput:
         bdput(bdev);
  out:
@@ -1788,7 +1794,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
         if (q == NULL)
                 goto out_bdput;
  
-       mutex_lock(&bdev->bd_mutex);
+       mutex_lock(&q->blk_trace_mutex);
  
         if (attr == &dev_attr_enable) {
                 if (value)
@@ -1814,7 +1820,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
         }
  
  out_unlock_bdev:
-       mutex_unlock(&bdev->bd_mutex);
+       mutex_unlock(&q->blk_trace_mutex);
  out_bdput:
         bdput(bdev);
  out:
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c

index 5360b7a..752e5da 100644 (file)
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4020,11 +4020,17 @@ static int tracing_open(struct inode *inode, struct file *file)
         /* If this file was open for write, then erase contents */
         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
                 int cpu = tracing_get_cpu(inode);
+               struct trace_buffer *trace_buf = &tr->trace_buffer;
+
+#ifdef CONFIG_TRACER_MAX_TRACE
+               if (tr->current_trace->print_max)
+                       trace_buf = &tr->max_buffer;
+#endif
  
                 if (cpu == RING_BUFFER_ALL_CPUS)
-                       tracing_reset_online_cpus(&tr->trace_buffer);
+                       tracing_reset_online_cpus(trace_buf);
                 else
-                       tracing_reset(&tr->trace_buffer, cpu);
+                       tracing_reset(trace_buf, cpu);
         }
  
         if (file->f_mode & FMODE_READ) {
@@ -5358,6 +5364,13 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf)
         if (t == tr->current_trace)
                 goto out;
  
+       /* Some tracers won't work on kernel command line */
+       if (system_state < SYSTEM_RUNNING && t->noboot) {
+               pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
+                       t->name);
+               goto out;
+       }
+
         /* Some tracers are only allowed for the top level buffer */
         if (!trace_ok_for_array(t, tr)) {
                 ret = -EINVAL;
@@ -5667,7 +5680,7 @@ static int tracing_wait_pipe(struct file *filp)
                  *
                  * iter->pos will be 0 if we haven't read anything.
                  */
-               if (!tracing_is_on() && iter->pos)
+               if (!tracer_tracing_is_on(iter->tr) && iter->pos)
                         break;
  
                 mutex_unlock(&iter->mutex);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h

index fb5d54d..652c682 100644 (file)
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -444,6 +444,8 @@ struct tracer {
  #ifdef CONFIG_TRACER_MAX_TRACE
         bool                    use_max_tr;
  #endif
+       /* True if tracer cannot be enabled in kernel param */
+       bool                    noboot;
  };
  
  
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c

index cd7480d..dca78fc 100644 (file)
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -282,6 +282,7 @@ static struct tracer mmio_tracer __read_mostly =
         .close          = mmio_close,
         .read           = mmio_read,
         .print_line     = mmio_print_line,
+       .noboot         = true,
  };
  
  __init static int init_mmio_trace(void)
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c

index a4df67c..49cb414 100644 (file)
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -96,23 +96,9 @@ check_stack(unsigned long ip, unsigned long *stack)
         if (in_nmi())
                 return;
  
-       /*
-        * There's a slight chance that we are tracing inside the
-        * RCU infrastructure, and rcu_irq_enter() will not work
-        * as expected.
-        */
-       if (unlikely(rcu_irq_enter_disabled()))
-               return;
-
         local_irq_save(flags);
         arch_spin_lock(&stack_trace_max_lock);
  
-       /*
-        * RCU may not be watching, make it see us.
-        * The stack trace code uses rcu_sched.
-        */
-       rcu_irq_enter();
-
         /* In case another CPU set the tracer_frame on us */
         if (unlikely(!frame_size))
                 this_size -= tracer_frame;
@@ -205,7 +191,6 @@ check_stack(unsigned long ip, unsigned long *stack)
         }
  
   out:
-       rcu_irq_exit();
         arch_spin_unlock(&stack_trace_max_lock);
         local_irq_restore(flags);
  }
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug

index b19c491..2689b7c 100644 (file)
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -219,7 +219,8 @@ config FRAME_WARN
         range 0 8192
         default 0 if KASAN
         default 2048 if GCC_PLUGIN_LATENT_ENTROPY
-       default 1024 if !64BIT
+       default 1280 if (!64BIT && PARISC)
+       default 1024 if (!64BIT && !PARISC)
         default 2048 if 64BIT
         help
           Tell gcc to warn at build time for stack frames larger than this.
diff --git a/lib/iov_iter.c b/lib/iov_iter.c

index 52c8dd6..1c1c06d 100644 (file)
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -687,8 +687,10 @@ EXPORT_SYMBOL(_copy_from_iter_full_nocache);
  
  static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
  {
-       size_t v = n + offset;
-       if (likely(n <= v && v <= (PAGE_SIZE << compound_order(page))))
+       struct page *head = compound_head(page);
+       size_t v = n + offset + page_address(page) - page_address(head);
+
+       if (likely(n <= v && v <= (PAGE_SIZE << compound_order(head))))
                 return true;
         WARN_ON(1);
         return false;
diff --git a/lib/rhashtable.c b/lib/rhashtable.c

index 707ca5d..ddd7dde 100644 (file)
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -735,9 +735,9 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
   * rhashtable_walk_start - Start a hash table walk
   * @iter:      Hash table iterator
   *
- * Start a hash table walk.  Note that we take the RCU lock in all
- * cases including when we return an error.  So you must always call
- * rhashtable_walk_stop to clean up.
+ * Start a hash table walk at the current iterator position.  Note that we take
+ * the RCU lock in all cases including when we return an error.  So you must
+ * always call rhashtable_walk_stop to clean up.
   *
   * Returns zero if successful.
   *
@@ -846,7 +846,8 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_next);
   * rhashtable_walk_stop - Finish a hash table walk
   * @iter:      Hash table iterator
   *
- * Finish a hash table walk.
+ * Finish a hash table walk.  Does not reset the iterator to the start of the
+ * hash table.
   */
  void rhashtable_walk_stop(struct rhashtable_iter *iter)
         __releases(RCU)
diff --git a/mm/filemap.c b/mm/filemap.c

index 870971e..db250d0 100644 (file)
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2926,9 +2926,15 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
          * we're writing.  Either one is a pretty crazy thing to do,
          * so we don't support it 100%.  If this invalidation
          * fails, tough, the write still worked...
+        *
+        * Most of the time we do not need this since dio_complete() will do
+        * the invalidation for us. However there are some file systems that
+        * do not end up with dio_complete() being called, so let's not break
+        * them by removing it completely
          */
-       invalidate_inode_pages2_range(mapping,
-                               pos >> PAGE_SHIFT, end);
+       if (mapping->nrpages)
+               invalidate_inode_pages2_range(mapping,
+                                       pos >> PAGE_SHIFT, end);
  
         if (written > 0) {
                 pos += written;
diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig

index c18115d..db82a40 100644 (file)
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -126,14 +126,4 @@ config BT_DEBUGFS
           Provide extensive information about internal Bluetooth states
           in debugfs.
  
-config BT_LEGACY_IOCTL
-       bool "Enable legacy ioctl interfaces"
-       depends on BT && BT_BREDR
-       default y
-       help
-         Enable support for legacy ioctl interfaces.  This is only needed
-         for old and deprecated applications using direct ioctl calls for
-         controller management.  Since Linux 3.4 all configuration and
-         setup is done via mgmt interface and this is no longer needed.
-
  source "drivers/bluetooth/Kconfig"
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c

index 0bad296..65d734c 100644 (file)
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -878,7 +878,6 @@ static int hci_sock_release(struct socket *sock)
         return 0;
  }
  
-#ifdef CONFIG_BT_LEGACY_IOCTL
  static int hci_sock_blacklist_add(struct hci_dev *hdev, void __user *arg)
  {
         bdaddr_t bdaddr;
@@ -1050,7 +1049,6 @@ done:
         release_sock(sk);
         return err;
  }
-#endif
  
  static int hci_sock_bind(struct socket *sock, struct sockaddr *addr,
                          int addr_len)
@@ -1971,11 +1969,7 @@ static const struct proto_ops hci_sock_ops = {
         .getname        = hci_sock_getname,
         .sendmsg        = hci_sock_sendmsg,
         .recvmsg        = hci_sock_recvmsg,
-#ifdef CONFIG_BT_LEGACY_IOCTL
         .ioctl          = hci_sock_ioctl,
-#else
-       .ioctl          = sock_no_ioctl,
-#endif
         .poll           = datagram_poll,
         .listen         = sock_no_listen,
         .shutdown       = sock_no_shutdown,
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c

index f358d0b..79d14d7 100644 (file)
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -2445,19 +2445,34 @@ static void apply_upmap(struct ceph_osdmap *osdmap,
  
         pg = lookup_pg_mapping(&osdmap->pg_upmap_items, pgid);
         if (pg) {
-               for (i = 0; i < raw->size; i++) {
-                       for (j = 0; j < pg->pg_upmap_items.len; j++) {
-                               int from = pg->pg_upmap_items.from_to[j][0];
-                               int to = pg->pg_upmap_items.from_to[j][1];
-
-                               if (from == raw->osds[i]) {
-                                       if (!(to != CRUSH_ITEM_NONE &&
-                                             to < osdmap->max_osd &&
-                                             osdmap->osd_weight[to] == 0))
-                                               raw->osds[i] = to;
+               /*
+                * Note: this approach does not allow a bidirectional swap,
+                * e.g., [[1,2],[2,1]] applied to [0,1,2] -> [0,2,1].
+                */
+               for (i = 0; i < pg->pg_upmap_items.len; i++) {
+                       int from = pg->pg_upmap_items.from_to[i][0];
+                       int to = pg->pg_upmap_items.from_to[i][1];
+                       int pos = -1;
+                       bool exists = false;
+
+                       /* make sure replacement doesn't already appear */
+                       for (j = 0; j < raw->size; j++) {
+                               int osd = raw->osds[j];
+
+                               if (osd == to) {
+                                       exists = true;
                                         break;
                                 }
+                               /* ignore mapping if target is marked out */
+                               if (osd == from && pos < 0 &&
+                                   !(to != CRUSH_ITEM_NONE &&
+                                     to < osdmap->max_osd &&
+                                     osdmap->osd_weight[to] == 0)) {
+                                       pos = j;
+                               }
                         }
+                       if (!exists && pos >= 0)
+                               raw->osds[pos] = to;
                 }
         }
  }
diff --git a/net/compat.c b/net/compat.c

index 6ded6c8..2238171 100644 (file)
--- a/net/compat.c
+++ b/net/compat.c
@@ -185,6 +185,13 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk,
                 ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, ucmlen);
         }
  
+       /*
+        * check the length of messages copied in is the same as the
+        * what we get from the first loop
+        */
+       if ((char *)kcmsg - (char *)kcmsg_base != kcmlen)
+               goto Einval;
+
         /* Ok, looks like we made it.  Hook it up and return success. */
         kmsg->msg_control = kcmsg_base;
         kmsg->msg_controllen = kcmlen;
diff --git a/net/core/dev.c b/net/core/dev.c

index fb766d9..588b473 100644 (file)
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1948,8 +1948,12 @@ again:
                 goto again;
         }
  out_unlock:
-       if (pt_prev)
-               pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
+       if (pt_prev) {
+               if (!skb_orphan_frags_rx(skb2, GFP_ATOMIC))
+                       pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
+               else
+                       kfree_skb(skb2);
+       }
         rcu_read_unlock();
  }
  EXPORT_SYMBOL_GPL(dev_queue_xmit_nit);
@@ -3892,6 +3896,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
                 __skb_pull(skb, off);
         else if (off < 0)
                 __skb_push(skb, -off);
+       skb->mac_header += off;
  
         switch (act) {
         case XDP_REDIRECT:
diff --git a/net/core/ethtool.c b/net/core/ethtool.c

index 6a582ae..3228411 100644 (file)
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -525,6 +525,8 @@ convert_link_ksettings_to_legacy_settings(
                 = link_ksettings->base.eth_tp_mdix;
         legacy_settings->eth_tp_mdix_ctrl
                 = link_ksettings->base.eth_tp_mdix_ctrl;
+       legacy_settings->transceiver
+               = link_ksettings->base.transceiver;
         return retval;
  }
  
diff --git a/net/core/filter.c b/net/core/filter.c

index 24dd33d..82edad5 100644 (file)
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1794,7 +1794,7 @@ struct redirect_info {
         u32 flags;
         struct bpf_map *map;
         struct bpf_map *map_to_flush;
-       const struct bpf_prog *map_owner;
+       unsigned long   map_owner;
  };
  
  static DEFINE_PER_CPU(struct redirect_info, redirect_info);
@@ -2500,11 +2500,17 @@ void xdp_do_flush_map(void)
  }
  EXPORT_SYMBOL_GPL(xdp_do_flush_map);
  
+static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog,
+                                  unsigned long aux)
+{
+       return (unsigned long)xdp_prog->aux != aux;
+}
+
  static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
                                struct bpf_prog *xdp_prog)
  {
         struct redirect_info *ri = this_cpu_ptr(&redirect_info);
-       const struct bpf_prog *map_owner = ri->map_owner;
+       unsigned long map_owner = ri->map_owner;
         struct bpf_map *map = ri->map;
         struct net_device *fwd = NULL;
         u32 index = ri->ifindex;
@@ -2512,9 +2518,9 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
  
         ri->ifindex = 0;
         ri->map = NULL;
-       ri->map_owner = NULL;
+       ri->map_owner = 0;
  
-       if (unlikely(map_owner != xdp_prog)) {
+       if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
                 err = -EFAULT;
                 map = NULL;
                 goto err;
@@ -2574,7 +2580,7 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
                             struct bpf_prog *xdp_prog)
  {
         struct redirect_info *ri = this_cpu_ptr(&redirect_info);
-       const struct bpf_prog *map_owner = ri->map_owner;
+       unsigned long map_owner = ri->map_owner;
         struct bpf_map *map = ri->map;
         struct net_device *fwd = NULL;
         u32 index = ri->ifindex;
@@ -2583,10 +2589,10 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
  
         ri->ifindex = 0;
         ri->map = NULL;
-       ri->map_owner = NULL;
+       ri->map_owner = 0;
  
         if (map) {
-               if (unlikely(map_owner != xdp_prog)) {
+               if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
                         err = -EFAULT;
                         map = NULL;
                         goto err;
@@ -2632,7 +2638,7 @@ BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
         ri->ifindex = ifindex;
         ri->flags = flags;
         ri->map = NULL;
-       ri->map_owner = NULL;
+       ri->map_owner = 0;
  
         return XDP_REDIRECT;
  }
@@ -2646,7 +2652,7 @@ static const struct bpf_func_proto bpf_xdp_redirect_proto = {
  };
  
  BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags,
-          const struct bpf_prog *, map_owner)
+          unsigned long, map_owner)
  {
         struct redirect_info *ri = this_cpu_ptr(&redirect_info);
  
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c

index b9c64b4..c039c93 100644 (file)
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -266,7 +266,7 @@ static inline int sk_reuseport_match(struct inet_bind_bucket *tb,
  #if IS_ENABLED(CONFIG_IPV6)
         if (tb->fast_sk_family == AF_INET6)
                 return ipv6_rcv_saddr_equal(&tb->fast_v6_rcv_saddr,
-                                           &sk->sk_v6_rcv_saddr,
+                                           inet6_rcv_saddr(sk),
                                             tb->fast_rcv_saddr,
                                             sk->sk_rcv_saddr,
                                             tb->fast_ipv6_only,
@@ -321,13 +321,14 @@ tb_found:
                         goto fail_unlock;
         }
  success:
-       if (!hlist_empty(&tb->owners)) {
+       if (hlist_empty(&tb->owners)) {
                 tb->fastreuse = reuse;
                 if (sk->sk_reuseport) {
                         tb->fastreuseport = FASTREUSEPORT_ANY;
                         tb->fastuid = uid;
                         tb->fast_rcv_saddr = sk->sk_rcv_saddr;
                         tb->fast_ipv6_only = ipv6_only_sock(sk);
+                       tb->fast_sk_family = sk->sk_family;
  #if IS_ENABLED(CONFIG_IPV6)
                         tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
  #endif
@@ -354,6 +355,7 @@ success:
                                 tb->fastuid = uid;
                                 tb->fast_rcv_saddr = sk->sk_rcv_saddr;
                                 tb->fast_ipv6_only = ipv6_only_sock(sk);
+                               tb->fast_sk_family = sk->sk_family;
  #if IS_ENABLED(CONFIG_IPV6)
                                 tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
  #endif
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c

index 1c839c9..0bc9e46 100644 (file)
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1806,40 +1806,6 @@ static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
         return !after(end_seq, tcp_wnd_end(tp));
  }
  
-/* This checks if the data bearing packet SKB (usually tcp_send_head(sk))
- * should be put on the wire right now.  If so, it returns the number of
- * packets allowed by the congestion window.
- */
-static unsigned int tcp_snd_test(const struct sock *sk, struct sk_buff *skb,
-                                unsigned int cur_mss, int nonagle)
-{
-       const struct tcp_sock *tp = tcp_sk(sk);
-       unsigned int cwnd_quota;
-
-       tcp_init_tso_segs(skb, cur_mss);
-
-       if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))
-               return 0;
-
-       cwnd_quota = tcp_cwnd_test(tp, skb);
-       if (cwnd_quota && !tcp_snd_wnd_test(tp, skb, cur_mss))
-               cwnd_quota = 0;
-
-       return cwnd_quota;
-}
-
-/* Test if sending is allowed right now. */
-bool tcp_may_send_now(struct sock *sk)
-{
-       const struct tcp_sock *tp = tcp_sk(sk);
-       struct sk_buff *skb = tcp_send_head(sk);
-
-       return skb &&
-               tcp_snd_test(sk, skb, tcp_current_mss(sk),
-                            (tcp_skb_is_last(sk, skb) ?
-                             tp->nonagle : TCP_NAGLE_PUSH));
-}
-
  /* Trim TSO SKB to LEN bytes, put the remaining data into a new packet
   * which is put after SKB on the list.  It is very much like
   * tcp_fragment() except that it may make several kinds of assumptions
@@ -3423,6 +3389,10 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
                 goto done;
         }
  
+       /* data was not sent, this is our new send_head */
+       sk->sk_send_head = syn_data;
+       tp->packets_out -= tcp_skb_pcount(syn_data);
+
  fallback:
         /* Send a regular SYN with Fast Open cookie request option */
         if (fo->cookie.len > 0)
@@ -3475,6 +3445,11 @@ int tcp_connect(struct sock *sk)
          */
         tp->snd_nxt = tp->write_seq;
         tp->pushed_seq = tp->write_seq;
+       buff = tcp_send_head(sk);
+       if (unlikely(buff)) {
+               tp->snd_nxt     = TCP_SKB_CB(buff)->seq;
+               tp->pushed_seq  = TCP_SKB_CB(buff)->seq;
+       }
         TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
  
         /* Timer for repeating the SYN until an answer. */
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c

index c2e2a78..96861c7 100644 (file)
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1399,10 +1399,18 @@ static inline int ipv6_saddr_preferred(int type)
         return 0;
  }
  
-static inline bool ipv6_use_optimistic_addr(struct inet6_dev *idev)
+static bool ipv6_use_optimistic_addr(struct net *net,
+                                    struct inet6_dev *idev)
  {
  #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
-       return idev && idev->cnf.optimistic_dad && idev->cnf.use_optimistic;
+       if (!idev)
+               return false;
+       if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad)
+               return false;
+       if (!net->ipv6.devconf_all->use_optimistic && !idev->cnf.use_optimistic)
+               return false;
+
+       return true;
  #else
         return false;
  #endif
@@ -1472,7 +1480,7 @@ static int ipv6_get_saddr_eval(struct net *net,
                 /* Rule 3: Avoid deprecated and optimistic addresses */
                 u8 avoid = IFA_F_DEPRECATED;
  
-               if (!ipv6_use_optimistic_addr(score->ifa->idev))
+               if (!ipv6_use_optimistic_addr(net, score->ifa->idev))
                         avoid |= IFA_F_OPTIMISTIC;
                 ret = ipv6_saddr_preferred(score->addr_type) ||
                       !(score->ifa->flags & avoid);
@@ -2460,7 +2468,8 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
                 int max_addresses = in6_dev->cnf.max_addresses;
  
  #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
-               if (in6_dev->cnf.optimistic_dad &&
+               if ((net->ipv6.devconf_all->optimistic_dad ||
+                    in6_dev->cnf.optimistic_dad) &&
                     !net->ipv6.devconf_all->forwarding && sllao)
                         addr_flags |= IFA_F_OPTIMISTIC;
  #endif
@@ -3051,7 +3060,8 @@ void addrconf_add_linklocal(struct inet6_dev *idev,
         u32 addr_flags = flags | IFA_F_PERMANENT;
  
  #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
-       if (idev->cnf.optimistic_dad &&
+       if ((dev_net(idev->dev)->ipv6.devconf_all->optimistic_dad ||
+            idev->cnf.optimistic_dad) &&
             !dev_net(idev->dev)->ipv6.devconf_all->forwarding)
                 addr_flags |= IFA_F_OPTIMISTIC;
  #endif
@@ -3810,6 +3820,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
                 goto out;
  
         if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
+           dev_net(dev)->ipv6.devconf_all->accept_dad < 1 ||
             idev->cnf.accept_dad < 1 ||
             !(ifp->flags&IFA_F_TENTATIVE) ||
             ifp->flags & IFA_F_NODAD) {
@@ -3841,7 +3852,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
          */
         if (ifp->flags & IFA_F_OPTIMISTIC) {
                 ip6_ins_rt(ifp->rt);
-               if (ipv6_use_optimistic_addr(idev)) {
+               if (ipv6_use_optimistic_addr(dev_net(dev), idev)) {
                         /* Because optimistic nodes can use this address,
                          * notify listeners. If DAD fails, RTM_DELADDR is sent.
                          */
@@ -3897,7 +3908,9 @@ static void addrconf_dad_work(struct work_struct *w)
                 action = DAD_ABORT;
                 ifp->state = INET6_IFADDR_STATE_POSTDAD;
  
-               if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6 &&
+               if ((dev_net(idev->dev)->ipv6.devconf_all->accept_dad > 1 ||
+                    idev->cnf.accept_dad > 1) &&
+                   !idev->cnf.disable_ipv6 &&
                     !(ifp->flags & IFA_F_STABLE_PRIVACY)) {
                         struct in6_addr addr;
  
@@ -4940,9 +4953,10 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
  
         /* Don't send DELADDR notification for TENTATIVE address,
          * since NEWADDR notification is sent only after removing
-        * TENTATIVE flag.
+        * TENTATIVE flag, if DAD has not failed.
          */
-       if (ifa->flags & IFA_F_TENTATIVE && event == RTM_DELADDR)
+       if (ifa->flags & IFA_F_TENTATIVE && !(ifa->flags & IFA_F_DADFAILED) &&
+           event == RTM_DELADDR)
                 return;
  
         skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c

index b7a72d4..20f66f4 100644 (file)
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -940,24 +940,25 @@ done:
  }
  
  static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
-                       unsigned short type,
-                       const void *daddr, const void *saddr, unsigned int len)
+                        unsigned short type, const void *daddr,
+                        const void *saddr, unsigned int len)
  {
         struct ip6_tnl *t = netdev_priv(dev);
-       struct ipv6hdr *ipv6h = skb_push(skb, t->hlen);
-       __be16 *p = (__be16 *)(ipv6h+1);
+       struct ipv6hdr *ipv6h;
+       __be16 *p;
  
-       ip6_flow_hdr(ipv6h, 0,
-                    ip6_make_flowlabel(dev_net(dev), skb,
-                                       t->fl.u.ip6.flowlabel, true,
-                                       &t->fl.u.ip6));
+       ipv6h = skb_push(skb, t->hlen + sizeof(*ipv6h));
+       ip6_flow_hdr(ipv6h, 0, ip6_make_flowlabel(dev_net(dev), skb,
+                                                 t->fl.u.ip6.flowlabel,
+                                                 true, &t->fl.u.ip6));
         ipv6h->hop_limit = t->parms.hop_limit;
         ipv6h->nexthdr = NEXTHDR_GRE;
         ipv6h->saddr = t->parms.laddr;
         ipv6h->daddr = t->parms.raddr;
  
-       p[0]            = t->parms.o_flags;
-       p[1]            = htons(type);
+       p = (__be16 *)(ipv6h + 1);
+       p[0] = t->parms.o_flags;
+       p[1] = htons(type);
  
         /*
          *      Set the source hardware address.
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c

index ae73164..f2f21c2 100644 (file)
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -2259,6 +2259,9 @@ static int __init ip6_tunnel_init(void)
  {
         int  err;
  
+       if (!ipv6_mod_enabled())
+               return -EOPNOTSUPP;
+
         err = register_pernet_device(&ip6_tnl_net_ops);
         if (err < 0)
                 goto out_pernet;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c

index e2ecfb1..40d7234 100644 (file)
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1015,6 +1015,7 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
                  */
                 offset = skb_transport_offset(skb);
                 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
+               csum = skb->csum;
  
                 skb->ip_summed = CHECKSUM_NONE;
  
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h

index f236c0b..51063d9 100644 (file)
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -1041,12 +1041,24 @@ out:
  static int
  mtype_head(struct ip_set *set, struct sk_buff *skb)
  {
-       const struct htype *h = set->data;
+       struct htype *h = set->data;
         const struct htable *t;
         struct nlattr *nested;
         size_t memsize;
         u8 htable_bits;
  
+       /* If any members have expired, set->elements will be wrong
+        * mytype_expire function will update it with the right count.
+        * we do not hold set->lock here, so grab it first.
+        * set->elements can still be incorrect in the case of a huge set,
+        * because elements might time out during the listing.
+        */
+       if (SET_WITH_TIMEOUT(set)) {
+               spin_lock_bh(&set->lock);
+               mtype_expire(set, h);
+               spin_unlock_bh(&set->lock);
+       }
+
         rcu_read_lock_bh();
         t = rcu_dereference_bh_nfnl(h->table);
         memsize = mtype_ahash_memsize(h, t) + set->ext_size;
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c

index f393a70..af8345f 100644 (file)
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -429,7 +429,7 @@ nf_nat_setup_info(struct nf_conn *ct,
  
                 srchash = hash_by_src(net,
                                       &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-               lock = &nf_nat_locks[srchash % ARRAY_SIZE(nf_nat_locks)];
+               lock = &nf_nat_locks[srchash % CONNTRACK_LOCKS];
                 spin_lock_bh(lock);
                 hlist_add_head_rcu(&ct->nat_bysource,
                                    &nf_nat_bysource[srchash]);
@@ -532,9 +532,9 @@ static void __nf_nat_cleanup_conntrack(struct nf_conn *ct)
         unsigned int h;
  
         h = hash_by_src(nf_ct_net(ct), &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-       spin_lock_bh(&nf_nat_locks[h % ARRAY_SIZE(nf_nat_locks)]);
+       spin_lock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]);
         hlist_del_rcu(&ct->nat_bysource);
-       spin_unlock_bh(&nf_nat_locks[h % ARRAY_SIZE(nf_nat_locks)]);
+       spin_unlock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]);
  }
  
  static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
@@ -807,8 +807,8 @@ static int __init nf_nat_init(void)
  
         /* Leave them the same for the moment. */
         nf_nat_htable_size = nf_conntrack_htable_size;
-       if (nf_nat_htable_size < ARRAY_SIZE(nf_nat_locks))
-               nf_nat_htable_size = ARRAY_SIZE(nf_nat_locks);
+       if (nf_nat_htable_size < CONNTRACK_LOCKS)
+               nf_nat_htable_size = CONNTRACK_LOCKS;
  
         nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0);
         if (!nf_nat_bysource)
@@ -821,7 +821,7 @@ static int __init nf_nat_init(void)
                 return ret;
         }
  
-       for (i = 0; i < ARRAY_SIZE(nf_nat_locks); i++)
+       for (i = 0; i < CONNTRACK_LOCKS; i++)
                 spin_lock_init(&nf_nat_locks[i]);
  
         nf_ct_helper_expectfn_register(&follow_master_nat);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c

index c261729..d288f52 100644 (file)
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1684,10 +1684,6 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
  
         mutex_lock(&fanout_mutex);
  
-       err = -EINVAL;
-       if (!po->running)
-               goto out;
-
         err = -EALREADY;
         if (po->fanout)
                 goto out;
@@ -1749,7 +1745,10 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
                 list_add(&match->list, &fanout_list);
         }
         err = -EINVAL;
-       if (match->type == type &&
+
+       spin_lock(&po->bind_lock);
+       if (po->running &&
+           match->type == type &&
             match->prot_hook.type == po->prot_hook.type &&
             match->prot_hook.dev == po->prot_hook.dev) {
                 err = -ENOSPC;
@@ -1761,6 +1760,13 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
                         err = 0;
                 }
         }
+       spin_unlock(&po->bind_lock);
+
+       if (err && !refcount_read(&match->sk_ref)) {
+               list_del(&match->list);
+               kfree(match);
+       }
+
  out:
         if (err && rollover) {
                 kfree(rollover);
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c

index 1a267e7..d230cb4 100644 (file)
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -922,28 +922,28 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
  
                 if (!tc_flags_valid(fnew->flags)) {
                         err = -EINVAL;
-                       goto errout;
+                       goto errout_idr;
                 }
         }
  
         err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr);
         if (err)
-               goto errout;
+               goto errout_idr;
  
         err = fl_check_assign_mask(head, &mask);
         if (err)
-               goto errout;
+               goto errout_idr;
  
         if (!tc_skip_sw(fnew->flags)) {
                 if (!fold && fl_lookup(head, &fnew->mkey)) {
                         err = -EEXIST;
-                       goto errout;
+                       goto errout_idr;
                 }
  
                 err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
                                              head->ht_params);
                 if (err)
-                       goto errout;
+                       goto errout_idr;
         }
  
         if (!tc_skip_hw(fnew->flags)) {
@@ -952,7 +952,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
                                            &mask.key,
                                            fnew);
                 if (err)
-                       goto errout;
+                       goto errout_idr;
         }
  
         if (!tc_in_hw(fnew->flags))
@@ -981,6 +981,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
         kfree(tb);
         return 0;
  
+errout_idr:
+       if (fnew->handle)
+               idr_remove_ext(&head->handle_idr, fnew->handle);
  errout:
         tcf_exts_destroy(&fnew->exts);
         kfree(fnew);
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c

index 21cc45c..eeac606 100644 (file)
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -32,6 +32,7 @@ static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp,
         if (tc_skip_sw(head->flags))
                 return -1;
  
+       *res = head->res;
         return tcf_exts_exec(skb, &head->exts, res);
  }
  
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c

index 92237e7..bf8c81e 100644 (file)
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -685,6 +685,7 @@ void qdisc_reset(struct Qdisc *qdisc)
                 qdisc->gso_skb = NULL;
         }
         qdisc->q.qlen = 0;
+       qdisc->qstats.backlog = 0;
  }
  EXPORT_SYMBOL(qdisc_reset);
  
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c

index daaf214..3f88b75 100644 (file)
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -958,6 +958,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
         }
  
         if (cl != NULL) {
+               int old_flags;
+
                 if (parentid) {
                         if (cl->cl_parent &&
                             cl->cl_parent->cl_common.classid != parentid)
@@ -978,6 +980,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
                 }
  
                 sch_tree_lock(sch);
+               old_flags = cl->cl_flags;
+
                 if (rsc != NULL)
                         hfsc_change_rsc(cl, rsc, cur_time);
                 if (fsc != NULL)
@@ -986,10 +990,21 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
                         hfsc_change_usc(cl, usc, cur_time);
  
                 if (cl->qdisc->q.qlen != 0) {
-                       if (cl->cl_flags & HFSC_RSC)
-                               update_ed(cl, qdisc_peek_len(cl->qdisc));
-                       if (cl->cl_flags & HFSC_FSC)
-                               update_vf(cl, 0, cur_time);
+                       int len = qdisc_peek_len(cl->qdisc);
+
+                       if (cl->cl_flags & HFSC_RSC) {
+                               if (old_flags & HFSC_RSC)
+                                       update_ed(cl, len);
+                               else
+                                       init_ed(cl, len);
+                       }
+
+                       if (cl->cl_flags & HFSC_FSC) {
+                               if (old_flags & HFSC_FSC)
+                                       update_vf(cl, 0, cur_time);
+                               else
+                                       init_vf(cl, len);
+                       }
                 }
                 sch_tree_unlock(sch);
  
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c

index 8c6d24b..745f145 100644 (file)
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -282,6 +282,7 @@ int smc_netinfo_by_tcpsk(struct socket *clcsock,
                          __be32 *subnet, u8 *prefix_len)
  {
         struct dst_entry *dst = sk_dst_get(clcsock->sk);
+       struct in_device *in_dev;
         struct sockaddr_in addr;
         int rc = -ENOENT;
         int len;
@@ -298,14 +299,17 @@ int smc_netinfo_by_tcpsk(struct socket *clcsock,
         /* get address to which the internal TCP socket is bound */
         kernel_getsockname(clcsock, (struct sockaddr *)&addr, &len);
         /* analyze IPv4 specific data of net_device belonging to TCP socket */
-       for_ifa(dst->dev->ip_ptr) {
-               if (ifa->ifa_address != addr.sin_addr.s_addr)
+       rcu_read_lock();
+       in_dev = __in_dev_get_rcu(dst->dev);
+       for_ifa(in_dev) {
+               if (!inet_ifa_match(addr.sin_addr.s_addr, ifa))
                         continue;
                 *prefix_len = inet_mask_len(ifa->ifa_mask);
                 *subnet = ifa->ifa_address & ifa->ifa_mask;
                 rc = 0;
                 break;
-       } endfor_ifa(dst->dev->ip_ptr);
+       } endfor_ifa(in_dev);
+       rcu_read_unlock();
  
  out_rel:
         dst_release(dst);
@@ -509,7 +513,7 @@ decline_rdma:
         /* RDMA setup failed, switch back to TCP */
         smc->use_fallback = true;
         if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
-               rc = smc_clc_send_decline(smc, reason_code, 0);
+               rc = smc_clc_send_decline(smc, reason_code);
                 if (rc < sizeof(struct smc_clc_msg_decline))
                         goto out_err;
         }
@@ -804,8 +808,6 @@ static void smc_listen_work(struct work_struct *work)
                 rc = local_contact;
                 if (rc == -ENOMEM)
                         reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
-               else if (rc == -ENOLINK)
-                       reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */
                 goto decline_rdma;
         }
         link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
@@ -899,7 +901,7 @@ decline_rdma:
         smc_conn_free(&new_smc->conn);
         new_smc->use_fallback = true;
         if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
-               rc = smc_clc_send_decline(new_smc, reason_code, 0);
+               rc = smc_clc_send_decline(new_smc, reason_code);
                 if (rc < sizeof(struct smc_clc_msg_decline))
                         goto out_err;
         }
diff --git a/net/smc/smc.h b/net/smc/smc.h

index 6e44313..0ccd6fa 100644 (file)
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -149,7 +149,7 @@ struct smc_connection {
         atomic_t                sndbuf_space;   /* remaining space in sndbuf */
         u16                     tx_cdc_seq;     /* sequence # for CDC send */
         spinlock_t              send_lock;      /* protect wr_sends */
-       struct work_struct      tx_work;        /* retry of smc_cdc_msg_send */
+       struct delayed_work     tx_work;        /* retry of smc_cdc_msg_send */
  
         struct smc_host_cdc_msg local_rx_ctrl;  /* filled during event_handl.
                                                  * .prod cf. TCP rcv_nxt
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c

index 3934913..b7dd274 100644 (file)
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -95,9 +95,10 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
         }
         if (clcm->type == SMC_CLC_DECLINE) {
                 reason_code = SMC_CLC_DECL_REPLY;
-               if (ntohl(((struct smc_clc_msg_decline *)buf)->peer_diagnosis)
-                       == SMC_CLC_DECL_SYNCERR)
+               if (((struct smc_clc_msg_decline *)buf)->hdr.flag) {
                         smc->conn.lgr->sync_err = true;
+                       smc_lgr_terminate(smc->conn.lgr);
+               }
         }
  
  out:
@@ -105,8 +106,7 @@ out:
  }
  
  /* send CLC DECLINE message across internal TCP socket */
-int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info,
-                        u8 out_of_sync)
+int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
  {
         struct smc_clc_msg_decline dclc;
         struct msghdr msg;
@@ -118,7 +118,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info,
         dclc.hdr.type = SMC_CLC_DECLINE;
         dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline));
         dclc.hdr.version = SMC_CLC_V1;
-       dclc.hdr.flag = out_of_sync ? 1 : 0;
+       dclc.hdr.flag = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? 1 : 0;
         memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid));
         dclc.peer_diagnosis = htonl(peer_diag_info);
         memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h

index 13db8ce..1c55414 100644 (file)
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -106,8 +106,7 @@ struct smc_ib_device;
  
  int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
                      u8 expected_type);
-int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info,
-                        u8 out_of_sync);
+int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);
  int smc_clc_send_proposal(struct smc_sock *smc, struct smc_ib_device *smcibdev,
                           u8 ibport);
  int smc_clc_send_confirm(struct smc_sock *smc);
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c

index 3c2e166..f0d16fb 100644 (file)
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -174,15 +174,15 @@ int smc_close_active(struct smc_sock *smc)
  {
         struct smc_cdc_conn_state_flags *txflags =
                 &smc->conn.local_tx_ctrl.conn_state_flags;
-       long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT;
         struct smc_connection *conn = &smc->conn;
         struct sock *sk = &smc->sk;
         int old_state;
+       long timeout;
         int rc = 0;
  
-       if (sock_flag(sk, SOCK_LINGER) &&
-           !(current->flags & PF_EXITING))
-               timeout = sk->sk_lingertime;
+       timeout = current->flags & PF_EXITING ?
+                 0 : sock_flag(sk, SOCK_LINGER) ?
+                     sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
  
  again:
         old_state = sk->sk_state;
@@ -208,7 +208,7 @@ again:
         case SMC_ACTIVE:
                 smc_close_stream_wait(smc, timeout);
                 release_sock(sk);
-               cancel_work_sync(&conn->tx_work);
+               cancel_delayed_work_sync(&conn->tx_work);
                 lock_sock(sk);
                 if (sk->sk_state == SMC_ACTIVE) {
                         /* send close request */
@@ -234,7 +234,7 @@ again:
                 if (!smc_cdc_rxed_any_close(conn))
                         smc_close_stream_wait(smc, timeout);
                 release_sock(sk);
-               cancel_work_sync(&conn->tx_work);
+               cancel_delayed_work_sync(&conn->tx_work);
                 lock_sock(sk);
                 if (sk->sk_err != ECONNABORTED) {
                         /* confirm close from peer */
@@ -263,7 +263,9 @@ again:
                 /* peer sending PeerConnectionClosed will cause transition */
                 break;
         case SMC_PROCESSABORT:
-               cancel_work_sync(&conn->tx_work);
+               release_sock(sk);
+               cancel_delayed_work_sync(&conn->tx_work);
+               lock_sock(sk);
                 smc_close_abort(conn);
                 sk->sk_state = SMC_CLOSED;
                 smc_close_wait_tx_pends(smc);
@@ -411,13 +413,14 @@ void smc_close_sock_put_work(struct work_struct *work)
  int smc_close_shutdown_write(struct smc_sock *smc)
  {
         struct smc_connection *conn = &smc->conn;
-       long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT;
         struct sock *sk = &smc->sk;
         int old_state;
+       long timeout;
         int rc = 0;
  
-       if (sock_flag(sk, SOCK_LINGER))
-               timeout = sk->sk_lingertime;
+       timeout = current->flags & PF_EXITING ?
+                 0 : sock_flag(sk, SOCK_LINGER) ?
+                     sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
  
  again:
         old_state = sk->sk_state;
@@ -425,7 +428,7 @@ again:
         case SMC_ACTIVE:
                 smc_close_stream_wait(smc, timeout);
                 release_sock(sk);
-               cancel_work_sync(&conn->tx_work);
+               cancel_delayed_work_sync(&conn->tx_work);
                 lock_sock(sk);
                 /* send close wr request */
                 rc = smc_close_wr(conn);
@@ -439,7 +442,7 @@ again:
                 if (!smc_cdc_rxed_any_close(conn))
                         smc_close_stream_wait(smc, timeout);
                 release_sock(sk);
-               cancel_work_sync(&conn->tx_work);
+               cancel_delayed_work_sync(&conn->tx_work);
                 lock_sock(sk);
                 /* confirm close from peer */
                 rc = smc_close_wr(conn);
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c

index 1a16d51..20b66e7 100644 (file)
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -25,8 +25,9 @@
  #include "smc_cdc.h"
  #include "smc_close.h"
  
-#define SMC_LGR_NUM_INCR       256
-#define SMC_LGR_FREE_DELAY     (600 * HZ)
+#define SMC_LGR_NUM_INCR               256
+#define SMC_LGR_FREE_DELAY_SERV                (600 * HZ)
+#define SMC_LGR_FREE_DELAY_CLNT                (SMC_LGR_FREE_DELAY_SERV + 10)
  
  static u32 smc_lgr_num;                        /* unique link group number */
  
@@ -107,8 +108,15 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
                 __smc_lgr_unregister_conn(conn);
         }
         write_unlock_bh(&lgr->conns_lock);
-       if (reduced && !lgr->conns_num)
-               schedule_delayed_work(&lgr->free_work, SMC_LGR_FREE_DELAY);
+       if (!reduced || lgr->conns_num)
+               return;
+       /* client link group creation always follows the server link group
+        * creation. For client use a somewhat higher removal delay time,
+        * otherwise there is a risk of out-of-sync link groups.
+        */
+       mod_delayed_work(system_wq, &lgr->free_work,
+                        lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT :
+                                                SMC_LGR_FREE_DELAY_SERV);
  }
  
  static void smc_lgr_free_work(struct work_struct *work)
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c

index 547e0e1..0b58522 100644 (file)
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -380,6 +380,7 @@ static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport)
         ndev = smcibdev->ibdev->get_netdev(smcibdev->ibdev, ibport);
         if (ndev) {
                 memcpy(&smcibdev->mac, ndev->dev_addr, ETH_ALEN);
+               dev_put(ndev);
         } else if (!rc) {
                 memcpy(&smcibdev->mac[ibport - 1][0],
                        &smcibdev->gid[ibport - 1].raw[8], 3);
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c

index 78f7af2..31f8453 100644 (file)
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -181,8 +181,10 @@ static int smc_pnet_enter(struct smc_pnetentry *new_pnetelem)
                              sizeof(new_pnetelem->ndev->name)) ||
                     smc_pnet_same_ibname(pnetelem,
                                          new_pnetelem->smcibdev->ibdev->name,
-                                        new_pnetelem->ib_port))
+                                        new_pnetelem->ib_port)) {
+                       dev_put(pnetelem->ndev);
                         goto found;
+               }
         }
         list_add_tail(&new_pnetelem->list, &smc_pnettable.pnetlist);
         rc = 0;
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c

index b17a333..3e631ae 100644 (file)
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -148,6 +148,8 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len,
                                 read_done = sock_intr_errno(timeo);
                                 break;
                         }
+                       if (!timeo)
+                               return -EAGAIN;
                 }
  
                 if (!atomic_read(&conn->bytes_to_rcv)) {
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c

index 3c656be..3866573 100644 (file)
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -24,6 +24,8 @@
  #include "smc_cdc.h"
  #include "smc_tx.h"
  
+#define SMC_TX_WORK_DELAY      HZ
+
  /***************************** sndbuf producer *******************************/
  
  /* callback implementation for sk.sk_write_space()
@@ -406,7 +408,8 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
                                 goto out_unlock;
                         }
                         rc = 0;
-                       schedule_work(&conn->tx_work);
+                       schedule_delayed_work(&conn->tx_work,
+                                             SMC_TX_WORK_DELAY);
                 }
                 goto out_unlock;
         }
@@ -430,7 +433,7 @@ out_unlock:
   */
  static void smc_tx_work(struct work_struct *work)
  {
-       struct smc_connection *conn = container_of(work,
+       struct smc_connection *conn = container_of(to_delayed_work(work),
                                                    struct smc_connection,
                                                    tx_work);
         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
@@ -468,7 +471,8 @@ void smc_tx_consumer_update(struct smc_connection *conn)
                 if (!rc)
                         rc = smc_cdc_msg_send(conn, wr_buf, pend);
                 if (rc < 0) {
-                       schedule_work(&conn->tx_work);
+                       schedule_delayed_work(&conn->tx_work,
+                                             SMC_TX_WORK_DELAY);
                         return;
                 }
                 smc_curs_write(&conn->rx_curs_confirmed,
@@ -487,6 +491,6 @@ void smc_tx_consumer_update(struct smc_connection *conn)
  void smc_tx_init(struct smc_sock *smc)
  {
         smc->sk.sk_write_space = smc_tx_write_space;
-       INIT_WORK(&smc->conn.tx_work, smc_tx_work);
+       INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
         spin_lock_init(&smc->conn.send_lock);
  }
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c

index ab56bda..525d91e 100644 (file)
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -244,7 +244,7 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
         int rc;
  
         ib_req_notify_cq(link->smcibdev->roce_cq_send,
-                        IB_CQ_SOLICITED_MASK | IB_CQ_REPORT_MISSED_EVENTS);
+                        IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
         pend = container_of(priv, struct smc_wr_tx_pend, priv);
         rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx],
                           &failed_wr);
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c

index 5a936a6..df062e0 100644 (file)
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -401,7 +401,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
         if (unlikely(n != mw->mw_nents))
                 goto out_mapmr_err;
  
-       dprintk("RPC:       %s: Using frmr %p to map %u segments (%u bytes)\n",
+       dprintk("RPC:       %s: Using frmr %p to map %u segments (%llu bytes)\n",
                 __func__, frmr, mw->mw_nents, mr->length);
  
         key = (u8)(mr->rkey & 0x000000FF);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c

index 0df8023..6908742 100644 (file)
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -9987,6 +9987,9 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
                 if (err)
                         return err;
  
+               if (!setup.chandef.chan)
+                       return -EINVAL;
+
                 err = validate_beacon_tx_rate(rdev, setup.chandef.chan->band,
                                               &setup.beacon_rate);
                 if (err)
@@ -10903,6 +10906,9 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info)
         if (err)
                 return err;
  
+       if (!tb[NL80211_REKEY_DATA_REPLAY_CTR] || !tb[NL80211_REKEY_DATA_KEK] ||
+           !tb[NL80211_REKEY_DATA_KCK])
+               return -EINVAL;
         if (nla_len(tb[NL80211_REKEY_DATA_REPLAY_CTR]) != NL80211_REPLAY_CTR_LEN)
                 return -ERANGE;
         if (nla_len(tb[NL80211_REKEY_DATA_KEK]) != NL80211_KEK_LEN)
diff --git a/scripts/dtc/dtx_diff b/scripts/dtc/dtx_diff

index f9a3d8d..8c4fbad 100755 (executable)
--- a/scripts/dtc/dtx_diff
+++ b/scripts/dtc/dtx_diff
@@ -86,6 +86,7 @@ eod
  compile_to_dts() {
  
         dtx="$1"
+       dtc_include="$2"
  
         if [ -d "${dtx}" ] ; then
  
@@ -113,7 +114,7 @@ compile_to_dts() {
                 # -----  input is DTS (source)
  
                 if ( cpp ${cpp_flags} -x assembler-with-cpp ${dtx} \
-                       | ${DTC} -I dts ) ; then
+                       | ${DTC} ${dtc_include} -I dts ) ; then
                         return
                 fi
  
@@ -320,18 +321,13 @@ fi
  
  cpp_flags="\
         -nostdinc                                  \
-       -I${srctree}/arch/${ARCH}/boot/dts         \
         -I${srctree}/scripts/dtc/include-prefixes  \
-       -I${srctree}/drivers/of/testcase-data      \
         -undef -D__DTS__"
  
-dtc_flags="\
-       -i ${srctree}/arch/${ARCH}/boot/dts/ \
-       -i ${srctree}/kernel/dts             \
-       ${dtx_path_1_dtc_include}            \
-       ${dtx_path_2_dtc_include}"
-
-DTC="${DTC} ${dtc_flags} -O dts -qq -f ${dtc_sort} -o -"
+DTC="\
+       ${DTC}                                     \
+       -i ${srctree}/scripts/dtc/include-prefixes \
+       -O dts -qq -f ${dtc_sort} -o -"
  
  
  # -----  do the diff or decompile
@@ -339,11 +335,11 @@ DTC="${DTC} ${dtc_flags} -O dts -qq -f ${dtc_sort} -o -"
  if (( ${cmd_diff} )) ; then
  
         diff ${diff_flags} --label "${dtx_file_1}" --label "${dtx_file_2}" \
-               <(compile_to_dts "${dtx_file_1}") \
-               <(compile_to_dts "${dtx_file_2}")
+               <(compile_to_dts "${dtx_file_1}" "${dtx_path_1_dtc_include}") \
+               <(compile_to_dts "${dtx_file_2}" "${dtx_path_2_dtc_include}")
  
  else
  
-       compile_to_dts "${dtx_file_1}"
+       compile_to_dts "${dtx_file_1}" "${dtx_path_1_dtc_include}"
  
  fi
diff --git a/scripts/mkversion b/scripts/mkversion

deleted file mode 100644 (file)

index c12addc..0000000
--- a/scripts/mkversion
+++ /dev/null
@@ -1,6 +0,0 @@
-if [ ! -f .version ]
-then
-    echo 1
-else
-    expr 0`cat .version` + 1
-fi
diff --git a/scripts/package/Makefile b/scripts/package/Makefile

index 71b4a8a..73f9f31 100644 (file)
--- a/scripts/package/Makefile
+++ b/scripts/package/Makefile
@@ -50,8 +50,6 @@ rpm-pkg rpm: FORCE
         $(MAKE) clean
         $(CONFIG_SHELL) $(MKSPEC) >$(objtree)/kernel.spec
         $(call cmd,src_tar,$(KERNELPATH),kernel.spec)
-       $(CONFIG_SHELL) $(srctree)/scripts/mkversion > $(objtree)/.tmp_version
-       mv -f $(objtree)/.tmp_version $(objtree)/.version
         rpmbuild $(RPMOPTS) --target $(UTS_MACHINE) -ta $(KERNELPATH).tar.gz
         rm $(KERNELPATH).tar.gz kernel.spec
  
@@ -60,9 +58,6 @@ rpm-pkg rpm: FORCE
  binrpm-pkg: FORCE
         $(MAKE) KBUILD_SRC=
         $(CONFIG_SHELL) $(MKSPEC) prebuilt > $(objtree)/binkernel.spec
-       $(CONFIG_SHELL) $(srctree)/scripts/mkversion > $(objtree)/.tmp_version
-       mv -f $(objtree)/.tmp_version $(objtree)/.version
-
         rpmbuild $(RPMOPTS) --define "_builddir $(objtree)" --target \
                 $(UTS_MACHINE) -bb $(objtree)/binkernel.spec
         rm binkernel.spec
diff --git a/scripts/package/builddeb b/scripts/package/builddeb

index aad6700..0bc8747 100755 (executable)
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -92,12 +92,10 @@ else
  fi
  sourcename=$KDEB_SOURCENAME
  tmpdir="$objtree/debian/tmp"
-fwdir="$objtree/debian/fwtmp"
  kernel_headers_dir="$objtree/debian/hdrtmp"
  libc_headers_dir="$objtree/debian/headertmp"
  dbg_dir="$objtree/debian/dbgtmp"
  packagename=linux-image-$version
-fwpackagename=linux-firmware-image-$version
  kernel_headers_packagename=linux-headers-$version
  libc_headers_packagename=linux-libc-dev
  dbg_packagename=$packagename-dbg
@@ -126,10 +124,9 @@ esac
  BUILD_DEBUG="$(grep -s '^CONFIG_DEBUG_INFO=y' $KCONFIG_CONFIG || true)"
  
  # Setup the directory structure
-rm -rf "$tmpdir" "$fwdir" "$kernel_headers_dir" "$libc_headers_dir" "$dbg_dir" $objtree/debian/files
+rm -rf "$tmpdir" "$kernel_headers_dir" "$libc_headers_dir" "$dbg_dir" $objtree/debian/files
  mkdir -m 755 -p "$tmpdir/DEBIAN"
  mkdir -p "$tmpdir/lib" "$tmpdir/boot"
-mkdir -p "$fwdir/lib/firmware/$version/"
  mkdir -p "$kernel_headers_dir/lib/modules/$version/"
  
  # Build and install the kernel
@@ -306,7 +303,6 @@ else
         cat <<EOF >> debian/control
  
  Package: $packagename
-Suggests: $fwpackagename
  Architecture: any
  Description: Linux kernel, version $version
   This package contains the Linux kernel, modules and corresponding other
@@ -345,22 +341,6 @@ Description: Linux kernel headers for $KERNELRELEASE on \${kernel:debarch}
   This is useful for people who need to build external modules
  EOF
  
-# Do we have firmware? Move it out of the way and build it into a package.
-if [ -e "$tmpdir/lib/firmware" ]; then
-       mv "$tmpdir/lib/firmware"/* "$fwdir/lib/firmware/$version/"
-       rmdir "$tmpdir/lib/firmware"
-
-       cat <<EOF >> debian/control
-
-Package: $fwpackagename
-Architecture: all
-Description: Linux kernel firmware, version $version
- This package contains firmware from the Linux kernel, version $version.
-EOF
-
-       create_package "$fwpackagename" "$fwdir"
-fi
-
  cat <<EOF >> debian/control
  
  Package: $libc_headers_packagename
diff --git a/scripts/package/mkspec b/scripts/package/mkspec

index bb43f15..f47f17a 100755 (executable)
--- a/scripts/package/mkspec
+++ b/scripts/package/mkspec
@@ -27,9 +27,7 @@ __KERNELRELEASE=`echo $KERNELRELEASE | sed -e "s/-/_/g"`
  echo "Name: kernel"
  echo "Summary: The Linux Kernel"
  echo "Version: $__KERNELRELEASE"
-# we need to determine the NEXT version number so that uname and
-# rpm -q will agree
-echo "Release: `. $srctree/scripts/mkversion`"
+echo "Release: $(cat .version 2>/dev/null || echo 1)"
  echo "License: GPL"
  echo "Group: System Environment/Kernel"
  echo "Vendor: The Linux Community"
@@ -77,7 +75,7 @@ fi
  echo "%build"
  
  if ! $PREBUILT; then
-echo "make clean && make %{?_smp_mflags}"
+echo "make clean && make %{?_smp_mflags} KBUILD_BUILD_VERSION=%{release}"
  echo ""
  fi
  
@@ -88,11 +86,8 @@ echo 'mkdir -p $RPM_BUILD_ROOT/boot/efi $RPM_BUILD_ROOT/lib/modules'
  echo "%else"
  echo 'mkdir -p $RPM_BUILD_ROOT/boot $RPM_BUILD_ROOT/lib/modules'
  echo "%endif"
-echo 'mkdir -p $RPM_BUILD_ROOT'"/lib/firmware/$KERNELRELEASE"
  
-echo 'INSTALL_MOD_PATH=$RPM_BUILD_ROOT make %{?_smp_mflags} KBUILD_SRC= mod-fw= modules_install'
-echo 'INSTALL_FW_PATH=$RPM_BUILD_ROOT'"/lib/firmware/$KERNELRELEASE"
-echo 'make INSTALL_FW_PATH=$INSTALL_FW_PATH' firmware_install
+echo 'INSTALL_MOD_PATH=$RPM_BUILD_ROOT make %{?_smp_mflags} KBUILD_SRC= modules_install'
  echo "%ifarch ia64"
  echo 'cp $KBUILD_IMAGE $RPM_BUILD_ROOT'"/boot/efi/vmlinuz-$KERNELRELEASE"
  echo 'ln -s '"efi/vmlinuz-$KERNELRELEASE" '$RPM_BUILD_ROOT'"/boot/"
@@ -119,7 +114,7 @@ if ! $PREBUILT; then
  echo 'rm -f $RPM_BUILD_ROOT'"/lib/modules/$KERNELRELEASE/build"
  echo 'rm -f $RPM_BUILD_ROOT'"/lib/modules/$KERNELRELEASE/source"
  echo "mkdir -p "'$RPM_BUILD_ROOT'"/usr/src/kernels/$KERNELRELEASE"
-echo "EXCLUDES=\"$RCS_TAR_IGNORE --exclude .tmp_versions --exclude=*vmlinux* --exclude=*.o --exclude=*.ko --exclude=*.cmd --exclude=Documentation --exclude=firmware --exclude .config.old --exclude .missing-syscalls.d\""
+echo "EXCLUDES=\"$RCS_TAR_IGNORE --exclude .tmp_versions --exclude=*vmlinux* --exclude=*.o --exclude=*.ko --exclude=*.cmd --exclude=Documentation --exclude .config.old --exclude .missing-syscalls.d\""
  echo "tar "'$EXCLUDES'" -cf- . | (cd "'$RPM_BUILD_ROOT'"/usr/src/kernels/$KERNELRELEASE;tar xvf -)"
  echo 'cd $RPM_BUILD_ROOT'"/lib/modules/$KERNELRELEASE"
  echo "ln -sf /usr/src/kernels/$KERNELRELEASE build"
@@ -154,7 +149,6 @@ echo '%defattr (-, root, root)'
  echo "/lib/modules/$KERNELRELEASE"
  echo "%exclude /lib/modules/$KERNELRELEASE/build"
  echo "%exclude /lib/modules/$KERNELRELEASE/source"
-echo "/lib/firmware/$KERNELRELEASE"
  echo "/boot/*"
  echo ""
  echo "%files headers"
diff --git a/security/apparmor/.gitignore b/security/apparmor/.gitignore

index 9cdec70..d5b291e 100644 (file)
--- a/security/apparmor/.gitignore
+++ b/security/apparmor/.gitignore
@@ -1,5 +1,6 @@
  #
  # Generated include files
  #
+net_names.h
  capability_names.h
  rlim_names.h
diff --git a/security/apparmor/Makefile b/security/apparmor/Makefile

index a16b195..dafdd38 100644 (file)
--- a/security/apparmor/Makefile
+++ b/security/apparmor/Makefile
@@ -4,11 +4,44 @@ obj-$(CONFIG_SECURITY_APPARMOR) += apparmor.o
  
  apparmor-y := apparmorfs.o audit.o capability.o context.o ipc.o lib.o match.o \
                path.o domain.o policy.o policy_unpack.o procattr.o lsm.o \
-              resource.o secid.o file.o policy_ns.o label.o
+              resource.o secid.o file.o policy_ns.o label.o mount.o net.o
  apparmor-$(CONFIG_SECURITY_APPARMOR_HASH) += crypto.o
  
-clean-files := capability_names.h rlim_names.h
+clean-files := capability_names.h rlim_names.h net_names.h
  
+# Build a lower case string table of address family names
+# Transform lines from
+#    #define AF_LOCAL          1       /* POSIX name for AF_UNIX       */
+#    #define AF_INET           2       /* Internet IP Protocol         */
+# to
+#    [1] = "local",
+#    [2] = "inet",
+#
+# and build the securityfs entries for the mapping.
+# Transforms lines from
+#    #define AF_INET           2       /* Internet IP Protocol         */
+# to
+#    #define AA_SFS_AF_MASK "local inet"
+quiet_cmd_make-af = GEN     $@
+cmd_make-af = echo "static const char *address_family_names[] = {" > $@ ;\
+       sed $< >>$@ -r -n -e "/AF_MAX/d" -e "/AF_LOCAL/d" -e "/AF_ROUTE/d" -e \
+        's/^\#define[ \t]+AF_([A-Z0-9_]+)[ \t]+([0-9]+)(.*)/[\2] = "\L\1",/p';\
+       echo "};" >> $@ ;\
+       printf '%s' '\#define AA_SFS_AF_MASK "' >> $@ ;\
+       sed -r -n -e "/AF_MAX/d" -e "/AF_LOCAL/d" -e "/AF_ROUTE/d" -e \
+        's/^\#define[ \t]+AF_([A-Z0-9_]+)[ \t]+([0-9]+)(.*)/\L\1/p'\
+        $< | tr '\n' ' ' | sed -e 's/ $$/"\n/' >> $@
+
+# Build a lower case string table of sock type names
+# Transform lines from
+#    SOCK_STREAM       = 1,
+# to
+#    [1] = "stream",
+quiet_cmd_make-sock = GEN     $@
+cmd_make-sock = echo "static const char *sock_type_names[] = {" >> $@ ;\
+       sed $^ >>$@ -r -n \
+       -e 's/^\tSOCK_([A-Z0-9_]+)[\t]+=[ \t]+([0-9]+)(.*)/[\2] = "\L\1",/p';\
+       echo "};" >> $@
  
  # Build a lower case string table of capability names
  # Transforms lines from
@@ -61,6 +94,7 @@ cmd_make-rlim = echo "static const char *const rlim_names[RLIM_NLIMITS] = {" \
             tr '\n' ' ' | sed -e 's/ $$/"\n/' >> $@
  
  $(obj)/capability.o : $(obj)/capability_names.h
+$(obj)/net.o : $(obj)/net_names.h
  $(obj)/resource.o : $(obj)/rlim_names.h
  $(obj)/capability_names.h : $(srctree)/include/uapi/linux/capability.h \
                             $(src)/Makefile
@@ -68,3 +102,8 @@ $(obj)/capability_names.h : $(srctree)/include/uapi/linux/capability.h \
  $(obj)/rlim_names.h : $(srctree)/include/uapi/asm-generic/resource.h \
                       $(src)/Makefile
         $(call cmd,make-rlim)
+$(obj)/net_names.h : $(srctree)/include/linux/socket.h \
+                    $(srctree)/include/linux/net.h \
+                    $(src)/Makefile
+       $(call cmd,make-af)
+       $(call cmd,make-sock)
diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c

index 853c2ec..518d592 100644 (file)
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -32,6 +32,7 @@
  #include "include/audit.h"
  #include "include/context.h"
  #include "include/crypto.h"
+#include "include/ipc.h"
  #include "include/policy_ns.h"
  #include "include/label.h"
  #include "include/policy.h"
@@ -248,8 +249,10 @@ static struct dentry *aafs_create(const char *name, umode_t mode,
  
         inode_lock(dir);
         dentry = lookup_one_len(name, parent, strlen(name));
-       if (IS_ERR(dentry))
+       if (IS_ERR(dentry)) {
+               error = PTR_ERR(dentry);
                 goto fail_lock;
+       }
  
         if (d_really_is_positive(dentry)) {
                 error = -EEXIST;
@@ -1443,6 +1446,10 @@ void __aafs_profile_migrate_dents(struct aa_profile *old,
  {
         int i;
  
+       AA_BUG(!old);
+       AA_BUG(!new);
+       AA_BUG(!mutex_is_locked(&profiles_ns(old)->lock));
+
         for (i = 0; i < AAFS_PROF_SIZEOF; i++) {
                 new->dents[i] = old->dents[i];
                 if (new->dents[i])
@@ -1506,6 +1513,9 @@ int __aafs_profile_mkdir(struct aa_profile *profile, struct dentry *parent)
         struct dentry *dent = NULL, *dir;
         int error;
  
+       AA_BUG(!profile);
+       AA_BUG(!mutex_is_locked(&profiles_ns(profile)->lock));
+
         if (!parent) {
                 struct aa_profile *p;
                 p = aa_deref_parent(profile);
@@ -1731,6 +1741,7 @@ void __aafs_ns_rmdir(struct aa_ns *ns)
  
         if (!ns)
                 return;
+       AA_BUG(!mutex_is_locked(&ns->lock));
  
         list_for_each_entry(child, &ns->base.profiles, base.list)
                 __aafs_profile_rmdir(child);
@@ -1903,6 +1914,10 @@ static struct aa_ns *__next_ns(struct aa_ns *root, struct aa_ns *ns)
  {
         struct aa_ns *parent, *next;
  
+       AA_BUG(!root);
+       AA_BUG(!ns);
+       AA_BUG(ns != root && !mutex_is_locked(&ns->parent->lock));
+
         /* is next namespace a child */
         if (!list_empty(&ns->sub_ns)) {
                 next = list_first_entry(&ns->sub_ns, typeof(*ns), base.list);
@@ -1937,6 +1952,9 @@ static struct aa_ns *__next_ns(struct aa_ns *root, struct aa_ns *ns)
  static struct aa_profile *__first_profile(struct aa_ns *root,
                                           struct aa_ns *ns)
  {
+       AA_BUG(!root);
+       AA_BUG(ns && !mutex_is_locked(&ns->lock));
+
         for (; ns; ns = __next_ns(root, ns)) {
                 if (!list_empty(&ns->base.profiles))
                         return list_first_entry(&ns->base.profiles,
@@ -1959,6 +1977,8 @@ static struct aa_profile *__next_profile(struct aa_profile *p)
         struct aa_profile *parent;
         struct aa_ns *ns = p->ns;
  
+       AA_BUG(!mutex_is_locked(&profiles_ns(p)->lock));
+
         /* is next profile a child */
         if (!list_empty(&p->base.profiles))
                 return list_first_entry(&p->base.profiles, typeof(*p),
@@ -2127,6 +2147,11 @@ static struct aa_sfs_entry aa_sfs_entry_ptrace[] = {
         { }
  };
  
+static struct aa_sfs_entry aa_sfs_entry_signal[] = {
+       AA_SFS_FILE_STRING("mask", AA_SFS_SIG_MASK),
+       { }
+};
+
  static struct aa_sfs_entry aa_sfs_entry_domain[] = {
         AA_SFS_FILE_BOOLEAN("change_hat",       1),
         AA_SFS_FILE_BOOLEAN("change_hatv",      1),
@@ -2151,9 +2176,14 @@ static struct aa_sfs_entry aa_sfs_entry_policy[] = {
         { }
  };
  
+static struct aa_sfs_entry aa_sfs_entry_mount[] = {
+       AA_SFS_FILE_STRING("mask", "mount umount pivot_root"),
+       { }
+};
+
  static struct aa_sfs_entry aa_sfs_entry_ns[] = {
         AA_SFS_FILE_BOOLEAN("profile",          1),
-       AA_SFS_FILE_BOOLEAN("pivot_root",       1),
+       AA_SFS_FILE_BOOLEAN("pivot_root",       0),
         { }
  };
  
@@ -2172,22 +2202,25 @@ static struct aa_sfs_entry aa_sfs_entry_features[] = {
         AA_SFS_DIR("policy",                    aa_sfs_entry_policy),
         AA_SFS_DIR("domain",                    aa_sfs_entry_domain),
         AA_SFS_DIR("file",                      aa_sfs_entry_file),
+       AA_SFS_DIR("network",                   aa_sfs_entry_network),
+       AA_SFS_DIR("mount",                     aa_sfs_entry_mount),
         AA_SFS_DIR("namespaces",                aa_sfs_entry_ns),
         AA_SFS_FILE_U64("capability",           VFS_CAP_FLAGS_MASK),
         AA_SFS_DIR("rlimit",                    aa_sfs_entry_rlimit),
         AA_SFS_DIR("caps",                      aa_sfs_entry_caps),
         AA_SFS_DIR("ptrace",                    aa_sfs_entry_ptrace),
+       AA_SFS_DIR("signal",                    aa_sfs_entry_signal),
         AA_SFS_DIR("query",                     aa_sfs_entry_query),
         { }
  };
  
  static struct aa_sfs_entry aa_sfs_entry_apparmor[] = {
-       AA_SFS_FILE_FOPS(".access", 0640, &aa_sfs_access),
+       AA_SFS_FILE_FOPS(".access", 0666, &aa_sfs_access),
         AA_SFS_FILE_FOPS(".stacked", 0444, &seq_ns_stacked_fops),
         AA_SFS_FILE_FOPS(".ns_stacked", 0444, &seq_ns_nsstacked_fops),
-       AA_SFS_FILE_FOPS(".ns_level", 0666, &seq_ns_level_fops),
-       AA_SFS_FILE_FOPS(".ns_name", 0640, &seq_ns_name_fops),
-       AA_SFS_FILE_FOPS("profiles", 0440, &aa_sfs_profiles_fops),
+       AA_SFS_FILE_FOPS(".ns_level", 0444, &seq_ns_level_fops),
+       AA_SFS_FILE_FOPS(".ns_name", 0444, &seq_ns_name_fops),
+       AA_SFS_FILE_FOPS("profiles", 0444, &aa_sfs_profiles_fops),
         AA_SFS_DIR("features", aa_sfs_entry_features),
         { }
  };
diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c

index 17a601c..dd754b7 100644 (file)
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -374,8 +374,8 @@ static const char *next_name(int xtype, const char *name)
   *
   * Returns: refcounted label, or NULL on failure (MAYBE NULL)
   */
-static struct aa_label *x_table_lookup(struct aa_profile *profile, u32 xindex,
-                                      const char **name)
+struct aa_label *x_table_lookup(struct aa_profile *profile, u32 xindex,
+                               const char **name)
  {
         struct aa_label *label = NULL;
         u32 xtype = xindex & AA_X_TYPE_MASK;
diff --git a/security/apparmor/file.c b/security/apparmor/file.c

index 3382518..db80221 100644 (file)
--- a/security/apparmor/file.c
+++ b/security/apparmor/file.c
@@ -21,6 +21,7 @@
  #include "include/context.h"
  #include "include/file.h"
  #include "include/match.h"
+#include "include/net.h"
  #include "include/path.h"
  #include "include/policy.h"
  #include "include/label.h"
@@ -566,6 +567,32 @@ static int __file_path_perm(const char *op, struct aa_label *label,
         return error;
  }
  
+static int __file_sock_perm(const char *op, struct aa_label *label,
+                           struct aa_label *flabel, struct file *file,
+                           u32 request, u32 denied)
+{
+       struct socket *sock = (struct socket *) file->private_data;
+       int error;
+
+       AA_BUG(!sock);
+
+       /* revalidation due to label out of date. No revocation at this time */
+       if (!denied && aa_label_is_subset(flabel, label))
+               return 0;
+
+       /* TODO: improve to skip profiles cached in flabel */
+       error = aa_sock_file_perm(label, op, request, sock);
+       if (denied) {
+               /* TODO: improve to skip profiles checked above */
+               /* check every profile in file label to is cached */
+               last_error(error, aa_sock_file_perm(flabel, op, request, sock));
+       }
+       if (!error)
+               update_file_ctx(file_ctx(file), label, request);
+
+       return error;
+}
+
  /**
   * aa_file_perm - do permission revalidation check & audit for @file
   * @op: operation being checked
@@ -610,6 +637,9 @@ int aa_file_perm(const char *op, struct aa_label *label, struct file *file,
                 error = __file_path_perm(op, label, flabel, file, request,
                                          denied);
  
+       else if (S_ISSOCK(file_inode(file)->i_mode))
+               error = __file_sock_perm(op, label, flabel, file, request,
+                                        denied);
  done:
         rcu_read_unlock();
  
diff --git a/security/apparmor/include/apparmor.h b/security/apparmor/include/apparmor.h

index aaf893f..829082c 100644 (file)
--- a/security/apparmor/include/apparmor.h
+++ b/security/apparmor/include/apparmor.h
@@ -27,7 +27,9 @@
  #define AA_CLASS_NET           4
  #define AA_CLASS_RLIMITS       5
  #define AA_CLASS_DOMAIN                6
+#define AA_CLASS_MOUNT         7
  #define AA_CLASS_PTRACE                9
+#define AA_CLASS_SIGNAL                10
  #define AA_CLASS_LABEL         16
  
  #define AA_CLASS_LAST          AA_CLASS_LABEL
diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h

index c68839a..ff4316e 100644 (file)
--- a/security/apparmor/include/audit.h
+++ b/security/apparmor/include/audit.h
@@ -71,6 +71,10 @@ enum audit_type {
  #define OP_FMPROT "file_mprotect"
  #define OP_INHERIT "file_inherit"
  
+#define OP_PIVOTROOT "pivotroot"
+#define OP_MOUNT "mount"
+#define OP_UMOUNT "umount"
+
  #define OP_CREATE "create"
  #define OP_POST_CREATE "post_create"
  #define OP_BIND "bind"
@@ -86,6 +90,7 @@ enum audit_type {
  #define OP_SHUTDOWN "socket_shutdown"
  
  #define OP_PTRACE "ptrace"
+#define OP_SIGNAL "signal"
  
  #define OP_EXEC "exec"
  
@@ -116,20 +121,36 @@ struct apparmor_audit_data {
                 /* these entries require a custom callback fn */
                 struct {
                         struct aa_label *peer;
-                       struct {
-                               const char *target;
-                               kuid_t ouid;
-                       } fs;
+                       union {
+                               struct {
+                                       kuid_t ouid;
+                                       const char *target;
+                               } fs;
+                               struct {
+                                       int type, protocol;
+                                       struct sock *peer_sk;
+                                       void *addr;
+                                       int addrlen;
+                               } net;
+                               int signal;
+                               struct {
+                                       int rlim;
+                                       unsigned long max;
+                               } rlim;
+                       };
                 };
                 struct {
-                       const char *name;
-                       long pos;
+                       struct aa_profile *profile;
                         const char *ns;
+                       long pos;
                 } iface;
                 struct {
-                       int rlim;
-                       unsigned long max;
-               } rlim;
+                       const char *src_name;
+                       const char *type;
+                       const char *trans;
+                       const char *data;
+                       unsigned long flags;
+               } mnt;
         };
  };
  
diff --git a/security/apparmor/include/domain.h b/security/apparmor/include/domain.h

index 24c5976..ac9862f 100644 (file)
--- a/security/apparmor/include/domain.h
+++ b/security/apparmor/include/domain.h
@@ -15,6 +15,8 @@
  #include <linux/binfmts.h>
  #include <linux/types.h>
  
+#include "label.h"
+
  #ifndef __AA_DOMAIN_H
  #define __AA_DOMAIN_H
  
@@ -29,6 +31,9 @@ struct aa_domain {
  #define AA_CHANGE_ONEXEC  4
  #define AA_CHANGE_STACK 8
  
+struct aa_label *x_table_lookup(struct aa_profile *profile, u32 xindex,
+                               const char **name);
+
  int apparmor_bprm_set_creds(struct linux_binprm *bprm);
  
  void aa_free_domain_entries(struct aa_domain *domain);
diff --git a/security/apparmor/include/ipc.h b/security/apparmor/include/ipc.h

index 656fdb8..5ffc218 100644 (file)
--- a/security/apparmor/include/ipc.h
+++ b/security/apparmor/include/ipc.h
@@ -27,8 +27,14 @@ struct aa_profile;
  
  #define AA_PTRACE_PERM_MASK (AA_PTRACE_READ | AA_PTRACE_TRACE | \
                              AA_MAY_BE_READ | AA_MAY_BE_TRACED)
+#define AA_SIGNAL_PERM_MASK (MAY_READ | MAY_WRITE)
+
+#define AA_SFS_SIG_MASK "hup int quit ill trap abrt bus fpe kill usr1 " \
+       "segv usr2 pipe alrm term stkflt chld cont stop stp ttin ttou urg " \
+       "xcpu xfsz vtalrm prof winch io pwr sys emt lost"
  
  int aa_may_ptrace(struct aa_label *tracer, struct aa_label *tracee,
                   u32 request);
+int aa_may_signal(struct aa_label *sender, struct aa_label *target, int sig);
  
  #endif /* __AA_IPC_H */
diff --git a/security/apparmor/include/label.h b/security/apparmor/include/label.h

index 9a283b7..af22dcb 100644 (file)
--- a/security/apparmor/include/label.h
+++ b/security/apparmor/include/label.h
@@ -310,6 +310,7 @@ bool aa_update_label_name(struct aa_ns *ns, struct aa_label *label, gfp_t gfp);
  #define FLAG_SHOW_MODE 1
  #define FLAG_VIEW_SUBNS 2
  #define FLAG_HIDDEN_UNCONFINED 4
+#define FLAG_ABS_ROOT 8
  int aa_label_snxprint(char *str, size_t size, struct aa_ns *view,
                       struct aa_label *label, int flags);
  int aa_label_asxprint(char **strp, struct aa_ns *ns, struct aa_label *label,
diff --git a/security/apparmor/include/mount.h b/security/apparmor/include/mount.h

new file mode 100644 (file)

index 0000000..25d6067
--- /dev/null
+++ b/security/apparmor/include/mount.h
@@ -0,0 +1,54 @@
+/*
+ * AppArmor security module
+ *
+ * This file contains AppArmor file mediation function definitions.
+ *
+ * Copyright 2017 Canonical Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#ifndef __AA_MOUNT_H
+#define __AA_MOUNT_H
+
+#include <linux/fs.h>
+#include <linux/path.h>
+
+#include "domain.h"
+#include "policy.h"
+
+/* mount perms */
+#define AA_MAY_PIVOTROOT       0x01
+#define AA_MAY_MOUNT           0x02
+#define AA_MAY_UMOUNT          0x04
+#define AA_AUDIT_DATA          0x40
+#define AA_MNT_CONT_MATCH      0x40
+
+#define AA_MS_IGNORE_MASK (MS_KERNMOUNT | MS_NOSEC | MS_ACTIVE | MS_BORN)
+
+int aa_remount(struct aa_label *label, const struct path *path,
+              unsigned long flags, void *data);
+
+int aa_bind_mount(struct aa_label *label, const struct path *path,
+                 const char *old_name, unsigned long flags);
+
+
+int aa_mount_change_type(struct aa_label *label, const struct path *path,
+                        unsigned long flags);
+
+int aa_move_mount(struct aa_label *label, const struct path *path,
+                 const char *old_name);
+
+int aa_new_mount(struct aa_label *label, const char *dev_name,
+                const struct path *path, const char *type, unsigned long flags,
+                void *data);
+
+int aa_umount(struct aa_label *label, struct vfsmount *mnt, int flags);
+
+int aa_pivotroot(struct aa_label *label, const struct path *old_path,
+                const struct path *new_path);
+
+#endif /* __AA_MOUNT_H */
diff --git a/security/apparmor/include/net.h b/security/apparmor/include/net.h

new file mode 100644 (file)

index 0000000..140c8ef
--- /dev/null
+++ b/security/apparmor/include/net.h
@@ -0,0 +1,114 @@
+/*
+ * AppArmor security module
+ *
+ * This file contains AppArmor network mediation definitions.
+ *
+ * Copyright (C) 1998-2008 Novell/SUSE
+ * Copyright 2009-2017 Canonical Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#ifndef __AA_NET_H
+#define __AA_NET_H
+
+#include <net/sock.h>
+#include <linux/path.h>
+
+#include "apparmorfs.h"
+#include "label.h"
+#include "perms.h"
+#include "policy.h"
+
+#define AA_MAY_SEND            AA_MAY_WRITE
+#define AA_MAY_RECEIVE         AA_MAY_READ
+
+#define AA_MAY_SHUTDOWN                AA_MAY_DELETE
+
+#define AA_MAY_CONNECT         AA_MAY_OPEN
+#define AA_MAY_ACCEPT          0x00100000
+
+#define AA_MAY_BIND            0x00200000
+#define AA_MAY_LISTEN          0x00400000
+
+#define AA_MAY_SETOPT          0x01000000
+#define AA_MAY_GETOPT          0x02000000
+
+#define NET_PERMS_MASK (AA_MAY_SEND | AA_MAY_RECEIVE | AA_MAY_CREATE |    \
+                       AA_MAY_SHUTDOWN | AA_MAY_BIND | AA_MAY_LISTEN |   \
+                       AA_MAY_CONNECT | AA_MAY_ACCEPT | AA_MAY_SETATTR | \
+                       AA_MAY_GETATTR | AA_MAY_SETOPT | AA_MAY_GETOPT)
+
+#define NET_FS_PERMS (AA_MAY_SEND | AA_MAY_RECEIVE | AA_MAY_CREATE |   \
+                     AA_MAY_SHUTDOWN | AA_MAY_CONNECT | AA_MAY_RENAME |\
+                     AA_MAY_SETATTR | AA_MAY_GETATTR | AA_MAY_CHMOD |  \
+                     AA_MAY_CHOWN | AA_MAY_CHGRP | AA_MAY_LOCK |       \
+                     AA_MAY_MPROT)
+
+#define NET_PEER_MASK (AA_MAY_SEND | AA_MAY_RECEIVE | AA_MAY_CONNECT | \
+                      AA_MAY_ACCEPT)
+struct aa_sk_ctx {
+       struct aa_label *label;
+       struct aa_label *peer;
+       struct path path;
+};
+
+#define SK_CTX(X) ((X)->sk_security)
+#define SOCK_ctx(X) SOCK_INODE(X)->i_security
+#define DEFINE_AUDIT_NET(NAME, OP, SK, F, T, P)                                  \
+       struct lsm_network_audit NAME ## _net = { .sk = (SK),             \
+                                                 .family = (F)};         \
+       DEFINE_AUDIT_DATA(NAME,                                           \
+                         ((SK) && (F) != AF_UNIX) ? LSM_AUDIT_DATA_NET : \
+                                                    LSM_AUDIT_DATA_NONE, \
+                         OP);                                            \
+       NAME.u.net = &(NAME ## _net);                                     \
+       aad(&NAME)->net.type = (T);                                       \
+       aad(&NAME)->net.protocol = (P)
+
+#define DEFINE_AUDIT_SK(NAME, OP, SK)                                  \
+       DEFINE_AUDIT_NET(NAME, OP, SK, (SK)->sk_family, (SK)->sk_type,  \
+                        (SK)->sk_protocol)
+
+/* struct aa_net - network confinement data
+ * @allow: basic network families permissions
+ * @audit: which network permissions to force audit
+ * @quiet: which network permissions to quiet rejects
+ */
+struct aa_net {
+       u16 allow[AF_MAX];
+       u16 audit[AF_MAX];
+       u16 quiet[AF_MAX];
+};
+
+
+extern struct aa_sfs_entry aa_sfs_entry_network[];
+
+void audit_net_cb(struct audit_buffer *ab, void *va);
+int aa_profile_af_perm(struct aa_profile *profile, struct common_audit_data *sa,
+                      u32 request, u16 family, int type);
+int aa_af_perm(struct aa_label *label, const char *op, u32 request, u16 family,
+              int type, int protocol);
+static inline int aa_profile_af_sk_perm(struct aa_profile *profile,
+                                       struct common_audit_data *sa,
+                                       u32 request,
+                                       struct sock *sk)
+{
+       return aa_profile_af_perm(profile, sa, request, sk->sk_family,
+                                 sk->sk_type);
+}
+int aa_sk_perm(const char *op, u32 request, struct sock *sk);
+
+int aa_sock_file_perm(struct aa_label *label, const char *op, u32 request,
+                     struct socket *sock);
+
+
+static inline void aa_free_net_rules(struct aa_net *new)
+{
+       /* NOP */
+}
+
+#endif /* __AA_NET_H */
diff --git a/security/apparmor/include/perms.h b/security/apparmor/include/perms.h

index 2b27bb7..af04d5a 100644 (file)
--- a/security/apparmor/include/perms.h
+++ b/security/apparmor/include/perms.h
@@ -135,9 +135,10 @@ extern struct aa_perms allperms;
  
  
  void aa_perm_mask_to_str(char *str, const char *chrs, u32 mask);
-void aa_audit_perm_names(struct audit_buffer *ab, const char **names, u32 mask);
+void aa_audit_perm_names(struct audit_buffer *ab, const char * const *names,
+                        u32 mask);
  void aa_audit_perm_mask(struct audit_buffer *ab, u32 mask, const char *chrs,
-                       u32 chrsmask, const char **names, u32 namesmask);
+                       u32 chrsmask, const char * const *names, u32 namesmask);
  void aa_apply_modes_to_perms(struct aa_profile *profile,
                              struct aa_perms *perms);
  void aa_compute_perms(struct aa_dfa *dfa, unsigned int state,
diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h

index 17fe41a..4364088 100644 (file)
--- a/security/apparmor/include/policy.h
+++ b/security/apparmor/include/policy.h
@@ -30,6 +30,7 @@
  #include "file.h"
  #include "lib.h"
  #include "label.h"
+#include "net.h"
  #include "perms.h"
  #include "resource.h"
  
@@ -111,6 +112,7 @@ struct aa_data {
   * @policy: general match rules governing policy
   * @file: The set of rules governing basic file access and domain transitions
   * @caps: capabilities for the profile
+ * @net: network controls for the profile
   * @rlimits: rlimits for the profile
   *
   * @dents: dentries for the profiles file entries in apparmorfs
@@ -148,6 +150,7 @@ struct aa_profile {
         struct aa_policydb policy;
         struct aa_file_rules file;
         struct aa_caps caps;
+       struct aa_net net;
         struct aa_rlimit rlimits;
  
         struct aa_loaddata *rawdata;
@@ -220,6 +223,16 @@ static inline unsigned int PROFILE_MEDIATES_SAFE(struct aa_profile *profile,
         return 0;
  }
  
+static inline unsigned int PROFILE_MEDIATES_AF(struct aa_profile *profile,
+                                              u16 AF) {
+       unsigned int state = PROFILE_MEDIATES(profile, AA_CLASS_NET);
+       u16 be_af = cpu_to_be16(AF);
+
+       if (!state)
+               return 0;
+       return aa_dfa_match_len(profile->policy.dfa, state, (char *) &be_af, 2);
+}
+
  /**
   * aa_get_profile - increment refcount on profile @p
   * @p: profile  (MAYBE NULL)
diff --git a/security/apparmor/include/sig_names.h b/security/apparmor/include/sig_names.h

new file mode 100644 (file)

index 0000000..92e62fe
--- /dev/null
+++ b/security/apparmor/include/sig_names.h
@@ -0,0 +1,98 @@
+#include <linux/signal.h>
+
+#define SIGUNKNOWN 0
+#define MAXMAPPED_SIG 35
+/* provide a mapping of arch signal to internal signal # for mediation
+ * those that are always an alias SIGCLD for SIGCLHD and SIGPOLL for SIGIO
+ * map to the same entry those that may/or may not get a separate entry
+ */
+static const int sig_map[MAXMAPPED_SIG] = {
+       [0] = MAXMAPPED_SIG,    /* existence test */
+       [SIGHUP] = 1,
+       [SIGINT] = 2,
+       [SIGQUIT] = 3,
+       [SIGILL] = 4,
+       [SIGTRAP] = 5,          /* -, 5, - */
+       [SIGABRT] = 6,          /*  SIGIOT: -, 6, - */
+       [SIGBUS] = 7,           /* 10, 7, 10 */
+       [SIGFPE] = 8,
+       [SIGKILL] = 9,
+       [SIGUSR1] = 10,         /* 30, 10, 16 */
+       [SIGSEGV] = 11,
+       [SIGUSR2] = 12,         /* 31, 12, 17 */
+       [SIGPIPE] = 13,
+       [SIGALRM] = 14,
+       [SIGTERM] = 15,
+#ifdef SIGSTKFLT
+       [SIGSTKFLT] = 16,       /* -, 16, - */
+#endif
+       [SIGCHLD] = 17,         /* 20, 17, 18.  SIGCHLD -, -, 18 */
+       [SIGCONT] = 18,         /* 19, 18, 25 */
+       [SIGSTOP] = 19,         /* 17, 19, 23 */
+       [SIGTSTP] = 20,         /* 18, 20, 24 */
+       [SIGTTIN] = 21,         /* 21, 21, 26 */
+       [SIGTTOU] = 22,         /* 22, 22, 27 */
+       [SIGURG] = 23,          /* 16, 23, 21 */
+       [SIGXCPU] = 24,         /* 24, 24, 30 */
+       [SIGXFSZ] = 25,         /* 25, 25, 31 */
+       [SIGVTALRM] = 26,       /* 26, 26, 28 */
+       [SIGPROF] = 27,         /* 27, 27, 29 */
+       [SIGWINCH] = 28,        /* 28, 28, 20 */
+       [SIGIO] = 29,           /* SIGPOLL: 23, 29, 22 */
+       [SIGPWR] = 30,          /* 29, 30, 19.  SIGINFO 29, -, - */
+#ifdef SIGSYS
+       [SIGSYS] = 31,          /* 12, 31, 12. often SIG LOST/UNUSED */
+#endif
+#ifdef SIGEMT
+       [SIGEMT] = 32,          /* 7, - , 7 */
+#endif
+#if defined(SIGLOST) && SIGPWR != SIGLOST              /* sparc */
+       [SIGLOST] = 33,         /* unused on Linux */
+#endif
+#if defined(SIGUNUSED) && \
+    defined(SIGLOST) && defined(SIGSYS) && SIGLOST != SIGSYS
+       [SIGUNUSED] = 34,       /* -, 31, - */
+#endif
+};
+
+/* this table is ordered post sig_map[sig] mapping */
+static const char *const sig_names[MAXMAPPED_SIG + 1] = {
+       "unknown",
+       "hup",
+       "int",
+       "quit",
+       "ill",
+       "trap",
+       "abrt",
+       "bus",
+       "fpe",
+       "kill",
+       "usr1",
+       "segv",
+       "usr2",
+       "pipe",
+       "alrm",
+       "term",
+       "stkflt",
+       "chld",
+       "cont",
+       "stop",
+       "stp",
+       "ttin",
+       "ttou",
+       "urg",
+       "xcpu",
+       "xfsz",
+       "vtalrm",
+       "prof",
+       "winch",
+       "io",
+       "pwr",
+       "sys",
+       "emt",
+       "lost",
+       "unused",
+
+       "exists",       /* always last existence test mapped to MAXMAPPED_SIG */
+};
+
diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c

index 11e66b5..66fb9ed 100644 (file)
--- a/security/apparmor/ipc.c
+++ b/security/apparmor/ipc.c
@@ -20,6 +20,7 @@
  #include "include/context.h"
  #include "include/policy.h"
  #include "include/ipc.h"
+#include "include/sig_names.h"
  
  /**
   * audit_ptrace_mask - convert mask to permission string
@@ -121,3 +122,101 @@ int aa_may_ptrace(struct aa_label *tracer, struct aa_label *tracee,
  }
  
  
+static inline int map_signal_num(int sig)
+{
+       if (sig > SIGRTMAX)
+               return SIGUNKNOWN;
+       else if (sig >= SIGRTMIN)
+               return sig - SIGRTMIN + 128;    /* rt sigs mapped to 128 */
+       else if (sig <= MAXMAPPED_SIG)
+               return sig_map[sig];
+       return SIGUNKNOWN;
+}
+
+/**
+ * audit_file_mask - convert mask to permission string
+ * @buffer: buffer to write string to (NOT NULL)
+ * @mask: permission mask to convert
+ */
+static void audit_signal_mask(struct audit_buffer *ab, u32 mask)
+{
+       if (mask & MAY_READ)
+               audit_log_string(ab, "receive");
+       if (mask & MAY_WRITE)
+               audit_log_string(ab, "send");
+}
+
+/**
+ * audit_cb - call back for signal specific audit fields
+ * @ab: audit_buffer  (NOT NULL)
+ * @va: audit struct to audit values of  (NOT NULL)
+ */
+static void audit_signal_cb(struct audit_buffer *ab, void *va)
+{
+       struct common_audit_data *sa = va;
+
+       if (aad(sa)->request & AA_SIGNAL_PERM_MASK) {
+               audit_log_format(ab, " requested_mask=");
+               audit_signal_mask(ab, aad(sa)->request);
+               if (aad(sa)->denied & AA_SIGNAL_PERM_MASK) {
+                       audit_log_format(ab, " denied_mask=");
+                       audit_signal_mask(ab, aad(sa)->denied);
+               }
+       }
+       if (aad(sa)->signal <= MAXMAPPED_SIG)
+               audit_log_format(ab, " signal=%s", sig_names[aad(sa)->signal]);
+       else
+               audit_log_format(ab, " signal=rtmin+%d",
+                                aad(sa)->signal - 128);
+       audit_log_format(ab, " peer=");
+       aa_label_xaudit(ab, labels_ns(aad(sa)->label), aad(sa)->peer,
+                       FLAGS_NONE, GFP_ATOMIC);
+}
+
+/* TODO: update to handle compound name&name2, conditionals */
+static void profile_match_signal(struct aa_profile *profile, const char *label,
+                                int signal, struct aa_perms *perms)
+{
+       unsigned int state;
+
+       /* TODO: secondary cache check <profile, profile, perm> */
+       state = aa_dfa_next(profile->policy.dfa,
+                           profile->policy.start[AA_CLASS_SIGNAL],
+                           signal);
+       state = aa_dfa_match(profile->policy.dfa, state, label);
+       aa_compute_perms(profile->policy.dfa, state, perms);
+}
+
+static int profile_signal_perm(struct aa_profile *profile,
+                              struct aa_profile *peer, u32 request,
+                              struct common_audit_data *sa)
+{
+       struct aa_perms perms;
+
+       if (profile_unconfined(profile) ||
+           !PROFILE_MEDIATES(profile, AA_CLASS_SIGNAL))
+               return 0;
+
+       aad(sa)->peer = &peer->label;
+       profile_match_signal(profile, peer->base.hname, aad(sa)->signal,
+                            &perms);
+       aa_apply_modes_to_perms(profile, &perms);
+       return aa_check_perms(profile, &perms, request, sa, audit_signal_cb);
+}
+
+static int aa_signal_cross_perm(struct aa_profile *sender,
+                               struct aa_profile *target,
+                               struct common_audit_data *sa)
+{
+       return xcheck(profile_signal_perm(sender, target, MAY_WRITE, sa),
+                     profile_signal_perm(target, sender, MAY_READ, sa));
+}
+
+int aa_may_signal(struct aa_label *sender, struct aa_label *target, int sig)
+{
+       DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, OP_SIGNAL);
+
+       aad(&sa)->signal = map_signal_num(sig);
+       return xcheck_labels_profiles(sender, target, aa_signal_cross_perm,
+                                     &sa);
+}
diff --git a/security/apparmor/label.c b/security/apparmor/label.c

index e052eab..c5b99b9 100644 (file)
--- a/security/apparmor/label.c
+++ b/security/apparmor/label.c
@@ -49,7 +49,7 @@ static void free_proxy(struct aa_proxy *proxy)
                 /* p->label will not updated any more as p is dead */
                 aa_put_label(rcu_dereference_protected(proxy->label, true));
                 memset(proxy, 0, sizeof(*proxy));
-               proxy->label = (struct aa_label *) PROXY_POISON;
+               RCU_INIT_POINTER(proxy->label, (struct aa_label *)PROXY_POISON);
                 kfree(proxy);
         }
  }
@@ -1450,9 +1450,11 @@ bool aa_update_label_name(struct aa_ns *ns, struct aa_label *label, gfp_t gfp)
   * cached label name is present and visible
   * @label->hname only exists if label is namespace hierachical
   */
-static inline bool use_label_hname(struct aa_ns *ns, struct aa_label *label)
+static inline bool use_label_hname(struct aa_ns *ns, struct aa_label *label,
+                                  int flags)
  {
-       if (label->hname && labels_ns(label) == ns)
+       if (label->hname && (!ns || labels_ns(label) == ns) &&
+           !(flags & ~FLAG_SHOW_MODE))
                 return true;
  
         return false;
@@ -1495,7 +1497,7 @@ static int aa_profile_snxprint(char *str, size_t size, struct aa_ns *view,
                 view = profiles_ns(profile);
  
         if (view != profile->ns &&
-           (!prev_ns || (prev_ns && *prev_ns != profile->ns))) {
+           (!prev_ns || (*prev_ns != profile->ns))) {
                 if (prev_ns)
                         *prev_ns = profile->ns;
                 ns_name = aa_ns_name(view, profile->ns,
@@ -1605,8 +1607,13 @@ int aa_label_snxprint(char *str, size_t size, struct aa_ns *ns,
         AA_BUG(!str && size != 0);
         AA_BUG(!label);
  
-       if (!ns)
+       if (flags & FLAG_ABS_ROOT) {
+               ns = root_ns;
+               len = snprintf(str, size, "=");
+               update_for_len(total, len, size, str);
+       } else if (!ns) {
                 ns = labels_ns(label);
+       }
  
         label_for_each(i, label, profile) {
                 if (aa_ns_visible(ns, profile->ns, flags & FLAG_VIEW_SUBNS)) {
@@ -1710,10 +1717,8 @@ void aa_label_xaudit(struct audit_buffer *ab, struct aa_ns *ns,
         AA_BUG(!ab);
         AA_BUG(!label);
  
-       if (!ns)
-               ns = labels_ns(label);
-
-       if (!use_label_hname(ns, label) || display_mode(ns, label, flags)) {
+       if (!use_label_hname(ns, label, flags) ||
+           display_mode(ns, label, flags)) {
                 len  = aa_label_asxprint(&name, ns, label, flags, gfp);
                 if (len == -1) {
                         AA_DEBUG("label print error");
@@ -1738,10 +1743,7 @@ void aa_label_seq_xprint(struct seq_file *f, struct aa_ns *ns,
         AA_BUG(!f);
         AA_BUG(!label);
  
-       if (!ns)
-               ns = labels_ns(label);
-
-       if (!use_label_hname(ns, label)) {
+       if (!use_label_hname(ns, label, flags)) {
                 char *str;
                 int len;
  
@@ -1764,10 +1766,7 @@ void aa_label_xprintk(struct aa_ns *ns, struct aa_label *label, int flags,
  {
         AA_BUG(!label);
  
-       if (!ns)
-               ns = labels_ns(label);
-
-       if (!use_label_hname(ns, label)) {
+       if (!use_label_hname(ns, label, flags)) {
                 char *str;
                 int len;
  
@@ -1874,6 +1873,9 @@ struct aa_label *aa_label_parse(struct aa_label *base, const char *str,
                 if (*str == '&')
                         str++;
         }
+       if (*str == '=')
+               base = &root_ns->unconfined->label;
+
         error = vec_setup(profile, vec, len, gfp);
         if (error)
                 return ERR_PTR(error);
diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c

index 08ca26b..8818621 100644 (file)
--- a/security/apparmor/lib.c
+++ b/security/apparmor/lib.c
@@ -211,7 +211,8 @@ void aa_perm_mask_to_str(char *str, const char *chrs, u32 mask)
         *str = '\0';
  }
  
-void aa_audit_perm_names(struct audit_buffer *ab, const char **names, u32 mask)
+void aa_audit_perm_names(struct audit_buffer *ab, const char * const *names,
+                        u32 mask)
  {
         const char *fmt = "%s";
         unsigned int i, perm = 1;
@@ -229,7 +230,7 @@ void aa_audit_perm_names(struct audit_buffer *ab, const char **names, u32 mask)
  }
  
  void aa_audit_perm_mask(struct audit_buffer *ab, u32 mask, const char *chrs,
-                       u32 chrsmask, const char **names, u32 namesmask)
+                       u32 chrsmask, const char * const *names, u32 namesmask)
  {
         char str[33];
  
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c

index 7a82c0f..72b915d 100644 (file)
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -33,11 +33,13 @@
  #include "include/context.h"
  #include "include/file.h"
  #include "include/ipc.h"
+#include "include/net.h"
  #include "include/path.h"
  #include "include/label.h"
  #include "include/policy.h"
  #include "include/policy_ns.h"
  #include "include/procattr.h"
+#include "include/mount.h"
  
  /* Flag indicating whether initialization completed */
  int apparmor_initialized;
@@ -511,6 +513,65 @@ static int apparmor_file_mprotect(struct vm_area_struct *vma,
                            !(vma->vm_flags & VM_SHARED) ? MAP_PRIVATE : 0);
  }
  
+static int apparmor_sb_mount(const char *dev_name, const struct path *path,
+                            const char *type, unsigned long flags, void *data)
+{
+       struct aa_label *label;
+       int error = 0;
+
+       /* Discard magic */
+       if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
+               flags &= ~MS_MGC_MSK;
+
+       flags &= ~AA_MS_IGNORE_MASK;
+
+       label = __begin_current_label_crit_section();
+       if (!unconfined(label)) {
+               if (flags & MS_REMOUNT)
+                       error = aa_remount(label, path, flags, data);
+               else if (flags & MS_BIND)
+                       error = aa_bind_mount(label, path, dev_name, flags);
+               else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE |
+                                 MS_UNBINDABLE))
+                       error = aa_mount_change_type(label, path, flags);
+               else if (flags & MS_MOVE)
+                       error = aa_move_mount(label, path, dev_name);
+               else
+                       error = aa_new_mount(label, dev_name, path, type,
+                                            flags, data);
+       }
+       __end_current_label_crit_section(label);
+
+       return error;
+}
+
+static int apparmor_sb_umount(struct vfsmount *mnt, int flags)
+{
+       struct aa_label *label;
+       int error = 0;
+
+       label = __begin_current_label_crit_section();
+       if (!unconfined(label))
+               error = aa_umount(label, mnt, flags);
+       __end_current_label_crit_section(label);
+
+       return error;
+}
+
+static int apparmor_sb_pivotroot(const struct path *old_path,
+                                const struct path *new_path)
+{
+       struct aa_label *label;
+       int error = 0;
+
+       label = aa_get_current_label();
+       if (!unconfined(label))
+               error = aa_pivotroot(label, old_path, new_path);
+       aa_put_label(label);
+
+       return error;
+}
+
  static int apparmor_getprocattr(struct task_struct *task, char *name,
                                 char **value)
  {
@@ -656,12 +717,398 @@ static int apparmor_task_setrlimit(struct task_struct *task,
         return error;
  }
  
+static int apparmor_task_kill(struct task_struct *target, struct siginfo *info,
+                             int sig, u32 secid)
+{
+       struct aa_label *cl, *tl;
+       int error;
+
+       if (secid)
+               /* TODO: after secid to label mapping is done.
+                *  Dealing with USB IO specific behavior
+                */
+               return 0;
+       cl = __begin_current_label_crit_section();
+       tl = aa_get_task_label(target);
+       error = aa_may_signal(cl, tl, sig);
+       aa_put_label(tl);
+       __end_current_label_crit_section(cl);
+
+       return error;
+}
+
+/**
+ * apparmor_sk_alloc_security - allocate and attach the sk_security field
+ */
+static int apparmor_sk_alloc_security(struct sock *sk, int family, gfp_t flags)
+{
+       struct aa_sk_ctx *ctx;
+
+       ctx = kzalloc(sizeof(*ctx), flags);
+       if (!ctx)
+               return -ENOMEM;
+
+       SK_CTX(sk) = ctx;
+
+       return 0;
+}
+
+/**
+ * apparmor_sk_free_security - free the sk_security field
+ */
+static void apparmor_sk_free_security(struct sock *sk)
+{
+       struct aa_sk_ctx *ctx = SK_CTX(sk);
+
+       SK_CTX(sk) = NULL;
+       aa_put_label(ctx->label);
+       aa_put_label(ctx->peer);
+       path_put(&ctx->path);
+       kfree(ctx);
+}
+
+/**
+ * apparmor_clone_security - clone the sk_security field
+ */
+static void apparmor_sk_clone_security(const struct sock *sk,
+                                      struct sock *newsk)
+{
+       struct aa_sk_ctx *ctx = SK_CTX(sk);
+       struct aa_sk_ctx *new = SK_CTX(newsk);
+
+       new->label = aa_get_label(ctx->label);
+       new->peer = aa_get_label(ctx->peer);
+       new->path = ctx->path;
+       path_get(&new->path);
+}
+
+static int aa_sock_create_perm(struct aa_label *label, int family, int type,
+                              int protocol)
+{
+       AA_BUG(!label);
+       AA_BUG(in_interrupt());
+
+       return aa_af_perm(label, OP_CREATE, AA_MAY_CREATE, family, type,
+                         protocol);
+}
+
+
+/**
+ * apparmor_socket_create - check perms before creating a new socket
+ */
+static int apparmor_socket_create(int family, int type, int protocol, int kern)
+{
+       struct aa_label *label;
+       int error = 0;
+
+       label = begin_current_label_crit_section();
+       if (!(kern || unconfined(label)))
+               error = aa_sock_create_perm(label, family, type, protocol);
+       end_current_label_crit_section(label);
+
+       return error;
+}
+
+/**
+ * apparmor_socket_post_create - setup the per-socket security struct
+ *
+ * Note:
+ * -   kernel sockets currently labeled unconfined but we may want to
+ *     move to a special kernel label
+ * -   socket may not have sk here if created with sock_create_lite or
+ *     sock_alloc. These should be accept cases which will be handled in
+ *     sock_graft.
+ */
+static int apparmor_socket_post_create(struct socket *sock, int family,
+                                      int type, int protocol, int kern)
+{
+       struct aa_label *label;
+
+       if (kern) {
+               struct aa_ns *ns = aa_get_current_ns();
+
+               label = aa_get_label(ns_unconfined(ns));
+               aa_put_ns(ns);
+       } else
+               label = aa_get_current_label();
+
+       if (sock->sk) {
+               struct aa_sk_ctx *ctx = SK_CTX(sock->sk);
+
+               aa_put_label(ctx->label);
+               ctx->label = aa_get_label(label);
+       }
+       aa_put_label(label);
+
+       return 0;
+}
+
+/**
+ * apparmor_socket_bind - check perms before bind addr to socket
+ */
+static int apparmor_socket_bind(struct socket *sock,
+                               struct sockaddr *address, int addrlen)
+{
+       AA_BUG(!sock);
+       AA_BUG(!sock->sk);
+       AA_BUG(!address);
+       AA_BUG(in_interrupt());
+
+       return aa_sk_perm(OP_BIND, AA_MAY_BIND, sock->sk);
+}
+
+/**
+ * apparmor_socket_connect - check perms before connecting @sock to @address
+ */
+static int apparmor_socket_connect(struct socket *sock,
+                                  struct sockaddr *address, int addrlen)
+{
+       AA_BUG(!sock);
+       AA_BUG(!sock->sk);
+       AA_BUG(!address);
+       AA_BUG(in_interrupt());
+
+       return aa_sk_perm(OP_CONNECT, AA_MAY_CONNECT, sock->sk);
+}
+
+/**
+ * apparmor_socket_list - check perms before allowing listen
+ */
+static int apparmor_socket_listen(struct socket *sock, int backlog)
+{
+       AA_BUG(!sock);
+       AA_BUG(!sock->sk);
+       AA_BUG(in_interrupt());
+
+       return aa_sk_perm(OP_LISTEN, AA_MAY_LISTEN, sock->sk);
+}
+
+/**
+ * apparmor_socket_accept - check perms before accepting a new connection.
+ *
+ * Note: while @newsock is created and has some information, the accept
+ *       has not been done.
+ */
+static int apparmor_socket_accept(struct socket *sock, struct socket *newsock)
+{
+       AA_BUG(!sock);
+       AA_BUG(!sock->sk);
+       AA_BUG(!newsock);
+       AA_BUG(in_interrupt());
+
+       return aa_sk_perm(OP_ACCEPT, AA_MAY_ACCEPT, sock->sk);
+}
+
+static int aa_sock_msg_perm(const char *op, u32 request, struct socket *sock,
+                           struct msghdr *msg, int size)
+{
+       AA_BUG(!sock);
+       AA_BUG(!sock->sk);
+       AA_BUG(!msg);
+       AA_BUG(in_interrupt());
+
+       return aa_sk_perm(op, request, sock->sk);
+}
+
+/**
+ * apparmor_socket_sendmsg - check perms before sending msg to another socket
+ */
+static int apparmor_socket_sendmsg(struct socket *sock,
+                                  struct msghdr *msg, int size)
+{
+       return aa_sock_msg_perm(OP_SENDMSG, AA_MAY_SEND, sock, msg, size);
+}
+
+/**
+ * apparmor_socket_recvmsg - check perms before receiving a message
+ */
+static int apparmor_socket_recvmsg(struct socket *sock,
+                                  struct msghdr *msg, int size, int flags)
+{
+       return aa_sock_msg_perm(OP_RECVMSG, AA_MAY_RECEIVE, sock, msg, size);
+}
+
+/* revaliation, get/set attr, shutdown */
+static int aa_sock_perm(const char *op, u32 request, struct socket *sock)
+{
+       AA_BUG(!sock);
+       AA_BUG(!sock->sk);
+       AA_BUG(in_interrupt());
+
+       return aa_sk_perm(op, request, sock->sk);
+}
+
+/**
+ * apparmor_socket_getsockname - check perms before getting the local address
+ */
+static int apparmor_socket_getsockname(struct socket *sock)
+{
+       return aa_sock_perm(OP_GETSOCKNAME, AA_MAY_GETATTR, sock);
+}
+
+/**
+ * apparmor_socket_getpeername - check perms before getting remote address
+ */
+static int apparmor_socket_getpeername(struct socket *sock)
+{
+       return aa_sock_perm(OP_GETPEERNAME, AA_MAY_GETATTR, sock);
+}
+
+/* revaliation, get/set attr, opt */
+static int aa_sock_opt_perm(const char *op, u32 request, struct socket *sock,
+                           int level, int optname)
+{
+       AA_BUG(!sock);
+       AA_BUG(!sock->sk);
+       AA_BUG(in_interrupt());
+
+       return aa_sk_perm(op, request, sock->sk);
+}
+
+/**
+ * apparmor_getsockopt - check perms before getting socket options
+ */
+static int apparmor_socket_getsockopt(struct socket *sock, int level,
+                                     int optname)
+{
+       return aa_sock_opt_perm(OP_GETSOCKOPT, AA_MAY_GETOPT, sock,
+                               level, optname);
+}
+
+/**
+ * apparmor_setsockopt - check perms before setting socket options
+ */
+static int apparmor_socket_setsockopt(struct socket *sock, int level,
+                                     int optname)
+{
+       return aa_sock_opt_perm(OP_SETSOCKOPT, AA_MAY_SETOPT, sock,
+                               level, optname);
+}
+
+/**
+ * apparmor_socket_shutdown - check perms before shutting down @sock conn
+ */
+static int apparmor_socket_shutdown(struct socket *sock, int how)
+{
+       return aa_sock_perm(OP_SHUTDOWN, AA_MAY_SHUTDOWN, sock);
+}
+
+/**
+ * apparmor_socket_sock_recv_skb - check perms before associating skb to sk
+ *
+ * Note: can not sleep may be called with locks held
+ *
+ * dont want protocol specific in __skb_recv_datagram()
+ * to deny an incoming connection  socket_sock_rcv_skb()
+ */
+static int apparmor_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+       return 0;
+}
+
+
+static struct aa_label *sk_peer_label(struct sock *sk)
+{
+       struct aa_sk_ctx *ctx = SK_CTX(sk);
+
+       if (ctx->peer)
+               return ctx->peer;
+
+       return ERR_PTR(-ENOPROTOOPT);
+}
+
+/**
+ * apparmor_socket_getpeersec_stream - get security context of peer
+ *
+ * Note: for tcp only valid if using ipsec or cipso on lan
+ */
+static int apparmor_socket_getpeersec_stream(struct socket *sock,
+                                            char __user *optval,
+                                            int __user *optlen,
+                                            unsigned int len)
+{
+       char *name;
+       int slen, error = 0;
+       struct aa_label *label;
+       struct aa_label *peer;
+
+       label = begin_current_label_crit_section();
+       peer = sk_peer_label(sock->sk);
+       if (IS_ERR(peer)) {
+               error = PTR_ERR(peer);
+               goto done;
+       }
+       slen = aa_label_asxprint(&name, labels_ns(label), peer,
+                                FLAG_SHOW_MODE | FLAG_VIEW_SUBNS |
+                                FLAG_HIDDEN_UNCONFINED, GFP_KERNEL);
+       /* don't include terminating \0 in slen, it breaks some apps */
+       if (slen < 0) {
+               error = -ENOMEM;
+       } else {
+               if (slen > len) {
+                       error = -ERANGE;
+               } else if (copy_to_user(optval, name, slen)) {
+                       error = -EFAULT;
+                       goto out;
+               }
+               if (put_user(slen, optlen))
+                       error = -EFAULT;
+out:
+               kfree(name);
+
+       }
+
+done:
+       end_current_label_crit_section(label);
+
+       return error;
+}
+
+/**
+ * apparmor_socket_getpeersec_dgram - get security label of packet
+ * @sock: the peer socket
+ * @skb: packet data
+ * @secid: pointer to where to put the secid of the packet
+ *
+ * Sets the netlabel socket state on sk from parent
+ */
+static int apparmor_socket_getpeersec_dgram(struct socket *sock,
+                                           struct sk_buff *skb, u32 *secid)
+
+{
+       /* TODO: requires secid support */
+       return -ENOPROTOOPT;
+}
+
+/**
+ * apparmor_sock_graft - Initialize newly created socket
+ * @sk: child sock
+ * @parent: parent socket
+ *
+ * Note: could set off of SOCK_CTX(parent) but need to track inode and we can
+ *       just set sk security information off of current creating process label
+ *       Labeling of sk for accept case - probably should be sock based
+ *       instead of task, because of the case where an implicitly labeled
+ *       socket is shared by different tasks.
+ */
+static void apparmor_sock_graft(struct sock *sk, struct socket *parent)
+{
+       struct aa_sk_ctx *ctx = SK_CTX(sk);
+
+       if (!ctx->label)
+               ctx->label = aa_get_current_label();
+}
+
  static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = {
         LSM_HOOK_INIT(ptrace_access_check, apparmor_ptrace_access_check),
         LSM_HOOK_INIT(ptrace_traceme, apparmor_ptrace_traceme),
         LSM_HOOK_INIT(capget, apparmor_capget),
         LSM_HOOK_INIT(capable, apparmor_capable),
  
+       LSM_HOOK_INIT(sb_mount, apparmor_sb_mount),
+       LSM_HOOK_INIT(sb_umount, apparmor_sb_umount),
+       LSM_HOOK_INIT(sb_pivotroot, apparmor_sb_pivotroot),
+
         LSM_HOOK_INIT(path_link, apparmor_path_link),
         LSM_HOOK_INIT(path_unlink, apparmor_path_unlink),
         LSM_HOOK_INIT(path_symlink, apparmor_path_symlink),
@@ -686,6 +1133,30 @@ static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = {
         LSM_HOOK_INIT(getprocattr, apparmor_getprocattr),
         LSM_HOOK_INIT(setprocattr, apparmor_setprocattr),
  
+       LSM_HOOK_INIT(sk_alloc_security, apparmor_sk_alloc_security),
+       LSM_HOOK_INIT(sk_free_security, apparmor_sk_free_security),
+       LSM_HOOK_INIT(sk_clone_security, apparmor_sk_clone_security),
+
+       LSM_HOOK_INIT(socket_create, apparmor_socket_create),
+       LSM_HOOK_INIT(socket_post_create, apparmor_socket_post_create),
+       LSM_HOOK_INIT(socket_bind, apparmor_socket_bind),
+       LSM_HOOK_INIT(socket_connect, apparmor_socket_connect),
+       LSM_HOOK_INIT(socket_listen, apparmor_socket_listen),
+       LSM_HOOK_INIT(socket_accept, apparmor_socket_accept),
+       LSM_HOOK_INIT(socket_sendmsg, apparmor_socket_sendmsg),
+       LSM_HOOK_INIT(socket_recvmsg, apparmor_socket_recvmsg),
+       LSM_HOOK_INIT(socket_getsockname, apparmor_socket_getsockname),
+       LSM_HOOK_INIT(socket_getpeername, apparmor_socket_getpeername),
+       LSM_HOOK_INIT(socket_getsockopt, apparmor_socket_getsockopt),
+       LSM_HOOK_INIT(socket_setsockopt, apparmor_socket_setsockopt),
+       LSM_HOOK_INIT(socket_shutdown, apparmor_socket_shutdown),
+       LSM_HOOK_INIT(socket_sock_rcv_skb, apparmor_socket_sock_rcv_skb),
+       LSM_HOOK_INIT(socket_getpeersec_stream,
+                     apparmor_socket_getpeersec_stream),
+       LSM_HOOK_INIT(socket_getpeersec_dgram,
+                     apparmor_socket_getpeersec_dgram),
+       LSM_HOOK_INIT(sock_graft, apparmor_sock_graft),
+
         LSM_HOOK_INIT(cred_alloc_blank, apparmor_cred_alloc_blank),
         LSM_HOOK_INIT(cred_free, apparmor_cred_free),
         LSM_HOOK_INIT(cred_prepare, apparmor_cred_prepare),
@@ -696,6 +1167,7 @@ static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = {
         LSM_HOOK_INIT(bprm_committed_creds, apparmor_bprm_committed_creds),
  
         LSM_HOOK_INIT(task_setrlimit, apparmor_task_setrlimit),
+       LSM_HOOK_INIT(task_kill, apparmor_task_kill),
  };
  
  /*
diff --git a/security/apparmor/mount.c b/security/apparmor/mount.c

new file mode 100644 (file)

index 0000000..82a64b5
--- /dev/null
+++ b/security/apparmor/mount.c
@@ -0,0 +1,696 @@
+/*
+ * AppArmor security module
+ *
+ * This file contains AppArmor mediation of files
+ *
+ * Copyright (C) 1998-2008 Novell/SUSE
+ * Copyright 2009-2017 Canonical Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+
+#include "include/apparmor.h"
+#include "include/audit.h"
+#include "include/context.h"
+#include "include/domain.h"
+#include "include/file.h"
+#include "include/match.h"
+#include "include/mount.h"
+#include "include/path.h"
+#include "include/policy.h"
+
+
+static void audit_mnt_flags(struct audit_buffer *ab, unsigned long flags)
+{
+       if (flags & MS_RDONLY)
+               audit_log_format(ab, "ro");
+       else
+               audit_log_format(ab, "rw");
+       if (flags & MS_NOSUID)
+               audit_log_format(ab, ", nosuid");
+       if (flags & MS_NODEV)
+               audit_log_format(ab, ", nodev");
+       if (flags & MS_NOEXEC)
+               audit_log_format(ab, ", noexec");
+       if (flags & MS_SYNCHRONOUS)
+               audit_log_format(ab, ", sync");
+       if (flags & MS_REMOUNT)
+               audit_log_format(ab, ", remount");
+       if (flags & MS_MANDLOCK)
+               audit_log_format(ab, ", mand");
+       if (flags & MS_DIRSYNC)
+               audit_log_format(ab, ", dirsync");
+       if (flags & MS_NOATIME)
+               audit_log_format(ab, ", noatime");
+       if (flags & MS_NODIRATIME)
+               audit_log_format(ab, ", nodiratime");
+       if (flags & MS_BIND)
+               audit_log_format(ab, flags & MS_REC ? ", rbind" : ", bind");
+       if (flags & MS_MOVE)
+               audit_log_format(ab, ", move");
+       if (flags & MS_SILENT)
+               audit_log_format(ab, ", silent");
+       if (flags & MS_POSIXACL)
+               audit_log_format(ab, ", acl");
+       if (flags & MS_UNBINDABLE)
+               audit_log_format(ab, flags & MS_REC ? ", runbindable" :
+                                ", unbindable");
+       if (flags & MS_PRIVATE)
+               audit_log_format(ab, flags & MS_REC ? ", rprivate" :
+                                ", private");
+       if (flags & MS_SLAVE)
+               audit_log_format(ab, flags & MS_REC ? ", rslave" :
+                                ", slave");
+       if (flags & MS_SHARED)
+               audit_log_format(ab, flags & MS_REC ? ", rshared" :
+                                ", shared");
+       if (flags & MS_RELATIME)
+               audit_log_format(ab, ", relatime");
+       if (flags & MS_I_VERSION)
+               audit_log_format(ab, ", iversion");
+       if (flags & MS_STRICTATIME)
+               audit_log_format(ab, ", strictatime");
+       if (flags & MS_NOUSER)
+               audit_log_format(ab, ", nouser");
+}
+
+/**
+ * audit_cb - call back for mount specific audit fields
+ * @ab: audit_buffer  (NOT NULL)
+ * @va: audit struct to audit values of  (NOT NULL)
+ */
+static void audit_cb(struct audit_buffer *ab, void *va)
+{
+       struct common_audit_data *sa = va;
+
+       if (aad(sa)->mnt.type) {
+               audit_log_format(ab, " fstype=");
+               audit_log_untrustedstring(ab, aad(sa)->mnt.type);
+       }
+       if (aad(sa)->mnt.src_name) {
+               audit_log_format(ab, " srcname=");
+               audit_log_untrustedstring(ab, aad(sa)->mnt.src_name);
+       }
+       if (aad(sa)->mnt.trans) {
+               audit_log_format(ab, " trans=");
+               audit_log_untrustedstring(ab, aad(sa)->mnt.trans);
+       }
+       if (aad(sa)->mnt.flags) {
+               audit_log_format(ab, " flags=\"");
+               audit_mnt_flags(ab, aad(sa)->mnt.flags);
+               audit_log_format(ab, "\"");
+       }
+       if (aad(sa)->mnt.data) {
+               audit_log_format(ab, " options=");
+               audit_log_untrustedstring(ab, aad(sa)->mnt.data);
+       }
+}
+
+/**
+ * audit_mount - handle the auditing of mount operations
+ * @profile: the profile being enforced  (NOT NULL)
+ * @op: operation being mediated (NOT NULL)
+ * @name: name of object being mediated (MAYBE NULL)
+ * @src_name: src_name of object being mediated (MAYBE_NULL)
+ * @type: type of filesystem (MAYBE_NULL)
+ * @trans: name of trans (MAYBE NULL)
+ * @flags: filesystem idependent mount flags
+ * @data: filesystem mount flags
+ * @request: permissions requested
+ * @perms: the permissions computed for the request (NOT NULL)
+ * @info: extra information message (MAYBE NULL)
+ * @error: 0 if operation allowed else failure error code
+ *
+ * Returns: %0 or error on failure
+ */
+static int audit_mount(struct aa_profile *profile, const char *op,
+                      const char *name, const char *src_name,
+                      const char *type, const char *trans,
+                      unsigned long flags, const void *data, u32 request,
+                      struct aa_perms *perms, const char *info, int error)
+{
+       int audit_type = AUDIT_APPARMOR_AUTO;
+       DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, op);
+
+       if (likely(!error)) {
+               u32 mask = perms->audit;
+
+               if (unlikely(AUDIT_MODE(profile) == AUDIT_ALL))
+                       mask = 0xffff;
+
+               /* mask off perms that are not being force audited */
+               request &= mask;
+
+               if (likely(!request))
+                       return 0;
+               audit_type = AUDIT_APPARMOR_AUDIT;
+       } else {
+               /* only report permissions that were denied */
+               request = request & ~perms->allow;
+
+               if (request & perms->kill)
+                       audit_type = AUDIT_APPARMOR_KILL;
+
+               /* quiet known rejects, assumes quiet and kill do not overlap */
+               if ((request & perms->quiet) &&
+                   AUDIT_MODE(profile) != AUDIT_NOQUIET &&
+                   AUDIT_MODE(profile) != AUDIT_ALL)
+                       request &= ~perms->quiet;
+
+               if (!request)
+                       return error;
+       }
+
+       aad(&sa)->name = name;
+       aad(&sa)->mnt.src_name = src_name;
+       aad(&sa)->mnt.type = type;
+       aad(&sa)->mnt.trans = trans;
+       aad(&sa)->mnt.flags = flags;
+       if (data && (perms->audit & AA_AUDIT_DATA))
+               aad(&sa)->mnt.data = data;
+       aad(&sa)->info = info;
+       aad(&sa)->error = error;
+
+       return aa_audit(audit_type, profile, &sa, audit_cb);
+}
+
+/**
+ * match_mnt_flags - Do an ordered match on mount flags
+ * @dfa: dfa to match against
+ * @state: state to start in
+ * @flags: mount flags to match against
+ *
+ * Mount flags are encoded as an ordered match. This is done instead of
+ * checking against a simple bitmask, to allow for logical operations
+ * on the flags.
+ *
+ * Returns: next state after flags match
+ */
+static unsigned int match_mnt_flags(struct aa_dfa *dfa, unsigned int state,
+                                   unsigned long flags)
+{
+       unsigned int i;
+
+       for (i = 0; i <= 31 ; ++i) {
+               if ((1 << i) & flags)
+                       state = aa_dfa_next(dfa, state, i + 1);
+       }
+
+       return state;
+}
+
+/**
+ * compute_mnt_perms - compute mount permission associated with @state
+ * @dfa: dfa to match against (NOT NULL)
+ * @state: state match finished in
+ *
+ * Returns: mount permissions
+ */
+static struct aa_perms compute_mnt_perms(struct aa_dfa *dfa,
+                                          unsigned int state)
+{
+       struct aa_perms perms;
+
+       perms.kill = 0;
+       perms.allow = dfa_user_allow(dfa, state);
+       perms.audit = dfa_user_audit(dfa, state);
+       perms.quiet = dfa_user_quiet(dfa, state);
+       perms.xindex = dfa_user_xindex(dfa, state);
+
+       return perms;
+}
+
+static const char * const mnt_info_table[] = {
+       "match succeeded",
+       "failed mntpnt match",
+       "failed srcname match",
+       "failed type match",
+       "failed flags match",
+       "failed data match"
+};
+
+/*
+ * Returns 0 on success else element that match failed in, this is the
+ * index into the mnt_info_table above
+ */
+static int do_match_mnt(struct aa_dfa *dfa, unsigned int start,
+                       const char *mntpnt, const char *devname,
+                       const char *type, unsigned long flags,
+                       void *data, bool binary, struct aa_perms *perms)
+{
+       unsigned int state;
+
+       AA_BUG(!dfa);
+       AA_BUG(!perms);
+
+       state = aa_dfa_match(dfa, start, mntpnt);
+       state = aa_dfa_null_transition(dfa, state);
+       if (!state)
+               return 1;
+
+       if (devname)
+               state = aa_dfa_match(dfa, state, devname);
+       state = aa_dfa_null_transition(dfa, state);
+       if (!state)
+               return 2;
+
+       if (type)
+               state = aa_dfa_match(dfa, state, type);
+       state = aa_dfa_null_transition(dfa, state);
+       if (!state)
+               return 3;
+
+       state = match_mnt_flags(dfa, state, flags);
+       if (!state)
+               return 4;
+       *perms = compute_mnt_perms(dfa, state);
+       if (perms->allow & AA_MAY_MOUNT)
+               return 0;
+
+       /* only match data if not binary and the DFA flags data is expected */
+       if (data && !binary && (perms->allow & AA_MNT_CONT_MATCH)) {
+               state = aa_dfa_null_transition(dfa, state);
+               if (!state)
+                       return 4;
+
+               state = aa_dfa_match(dfa, state, data);
+               if (!state)
+                       return 5;
+               *perms = compute_mnt_perms(dfa, state);
+               if (perms->allow & AA_MAY_MOUNT)
+                       return 0;
+       }
+
+       /* failed at end of flags match */
+       return 4;
+}
+
+
+static int path_flags(struct aa_profile *profile, const struct path *path)
+{
+       AA_BUG(!profile);
+       AA_BUG(!path);
+
+       return profile->path_flags |
+               (S_ISDIR(path->dentry->d_inode->i_mode) ? PATH_IS_DIR : 0);
+}
+
+/**
+ * match_mnt_path_str - handle path matching for mount
+ * @profile: the confining profile
+ * @mntpath: for the mntpnt (NOT NULL)
+ * @buffer: buffer to be used to lookup mntpath
+ * @devnme: string for the devname/src_name (MAY BE NULL OR ERRPTR)
+ * @type: string for the dev type (MAYBE NULL)
+ * @flags: mount flags to match
+ * @data: fs mount data (MAYBE NULL)
+ * @binary: whether @data is binary
+ * @devinfo: error str if (IS_ERR(@devname))
+ *
+ * Returns: 0 on success else error
+ */
+static int match_mnt_path_str(struct aa_profile *profile,
+                             const struct path *mntpath, char *buffer,
+                             const char *devname, const char *type,
+                             unsigned long flags, void *data, bool binary,
+                             const char *devinfo)
+{
+       struct aa_perms perms = { };
+       const char *mntpnt = NULL, *info = NULL;
+       int pos, error;
+
+       AA_BUG(!profile);
+       AA_BUG(!mntpath);
+       AA_BUG(!buffer);
+
+       error = aa_path_name(mntpath, path_flags(profile, mntpath), buffer,
+                            &mntpnt, &info, profile->disconnected);
+       if (error)
+               goto audit;
+       if (IS_ERR(devname)) {
+               error = PTR_ERR(devname);
+               devname = NULL;
+               info = devinfo;
+               goto audit;
+       }
+
+       error = -EACCES;
+       pos = do_match_mnt(profile->policy.dfa,
+                          profile->policy.start[AA_CLASS_MOUNT],
+                          mntpnt, devname, type, flags, data, binary, &perms);
+       if (pos) {
+               info = mnt_info_table[pos];
+               goto audit;
+       }
+       error = 0;
+
+audit:
+       return audit_mount(profile, OP_MOUNT, mntpnt, devname, type, NULL,
+                          flags, data, AA_MAY_MOUNT, &perms, info, error);
+}
+
+/**
+ * match_mnt - handle path matching for mount
+ * @profile: the confining profile
+ * @mntpath: for the mntpnt (NOT NULL)
+ * @buffer: buffer to be used to lookup mntpath
+ * @devpath: path devname/src_name (MAYBE NULL)
+ * @devbuffer: buffer to be used to lookup devname/src_name
+ * @type: string for the dev type (MAYBE NULL)
+ * @flags: mount flags to match
+ * @data: fs mount data (MAYBE NULL)
+ * @binary: whether @data is binary
+ *
+ * Returns: 0 on success else error
+ */
+static int match_mnt(struct aa_profile *profile, const struct path *path,
+                    char *buffer, struct path *devpath, char *devbuffer,
+                    const char *type, unsigned long flags, void *data,
+                    bool binary)
+{
+       const char *devname = NULL, *info = NULL;
+       int error = -EACCES;
+
+       AA_BUG(!profile);
+       AA_BUG(devpath && !devbuffer);
+
+       if (devpath) {
+               error = aa_path_name(devpath, path_flags(profile, devpath),
+                                    devbuffer, &devname, &info,
+                                    profile->disconnected);
+               if (error)
+                       devname = ERR_PTR(error);
+       }
+
+       return match_mnt_path_str(profile, path, buffer, devname, type, flags,
+                                 data, binary, info);
+}
+
+int aa_remount(struct aa_label *label, const struct path *path,
+              unsigned long flags, void *data)
+{
+       struct aa_profile *profile;
+       char *buffer = NULL;
+       bool binary;
+       int error;
+
+       AA_BUG(!label);
+       AA_BUG(!path);
+
+       binary = path->dentry->d_sb->s_type->fs_flags & FS_BINARY_MOUNTDATA;
+
+       get_buffers(buffer);
+       error = fn_for_each_confined(label, profile,
+                       match_mnt(profile, path, buffer, NULL, NULL, NULL,
+                                 flags, data, binary));
+       put_buffers(buffer);
+
+       return error;
+}
+
+int aa_bind_mount(struct aa_label *label, const struct path *path,
+                 const char *dev_name, unsigned long flags)
+{
+       struct aa_profile *profile;
+       char *buffer = NULL, *old_buffer = NULL;
+       struct path old_path;
+       int error;
+
+       AA_BUG(!label);
+       AA_BUG(!path);
+
+       if (!dev_name || !*dev_name)
+               return -EINVAL;
+
+       flags &= MS_REC | MS_BIND;
+
+       error = kern_path(dev_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path);
+       if (error)
+               return error;
+
+       get_buffers(buffer, old_buffer);
+       error = fn_for_each_confined(label, profile,
+                       match_mnt(profile, path, buffer, &old_path, old_buffer,
+                                 NULL, flags, NULL, false));
+       put_buffers(buffer, old_buffer);
+       path_put(&old_path);
+
+       return error;
+}
+
+int aa_mount_change_type(struct aa_label *label, const struct path *path,
+                        unsigned long flags)
+{
+       struct aa_profile *profile;
+       char *buffer = NULL;
+       int error;
+
+       AA_BUG(!label);
+       AA_BUG(!path);
+
+       /* These are the flags allowed by do_change_type() */
+       flags &= (MS_REC | MS_SILENT | MS_SHARED | MS_PRIVATE | MS_SLAVE |
+                 MS_UNBINDABLE);
+
+       get_buffers(buffer);
+       error = fn_for_each_confined(label, profile,
+                       match_mnt(profile, path, buffer, NULL, NULL, NULL,
+                                 flags, NULL, false));
+       put_buffers(buffer);
+
+       return error;
+}
+
+int aa_move_mount(struct aa_label *label, const struct path *path,
+                 const char *orig_name)
+{
+       struct aa_profile *profile;
+       char *buffer = NULL, *old_buffer = NULL;
+       struct path old_path;
+       int error;
+
+       AA_BUG(!label);
+       AA_BUG(!path);
+
+       if (!orig_name || !*orig_name)
+               return -EINVAL;
+
+       error = kern_path(orig_name, LOOKUP_FOLLOW, &old_path);
+       if (error)
+               return error;
+
+       get_buffers(buffer, old_buffer);
+       error = fn_for_each_confined(label, profile,
+                       match_mnt(profile, path, buffer, &old_path, old_buffer,
+                                 NULL, MS_MOVE, NULL, false));
+       put_buffers(buffer, old_buffer);
+       path_put(&old_path);
+
+       return error;
+}
+
+int aa_new_mount(struct aa_label *label, const char *dev_name,
+                const struct path *path, const char *type, unsigned long flags,
+                void *data)
+{
+       struct aa_profile *profile;
+       char *buffer = NULL, *dev_buffer = NULL;
+       bool binary = true;
+       int error;
+       int requires_dev = 0;
+       struct path tmp_path, *dev_path = NULL;
+
+       AA_BUG(!label);
+       AA_BUG(!path);
+
+       if (type) {
+               struct file_system_type *fstype;
+
+               fstype = get_fs_type(type);
+               if (!fstype)
+                       return -ENODEV;
+               binary = fstype->fs_flags & FS_BINARY_MOUNTDATA;
+               requires_dev = fstype->fs_flags & FS_REQUIRES_DEV;
+               put_filesystem(fstype);
+
+               if (requires_dev) {
+                       if (!dev_name || !*dev_name)
+                               return -ENOENT;
+
+                       error = kern_path(dev_name, LOOKUP_FOLLOW, &tmp_path);
+                       if (error)
+                               return error;
+                       dev_path = &tmp_path;
+               }
+       }
+
+       get_buffers(buffer, dev_buffer);
+       if (dev_path) {
+               error = fn_for_each_confined(label, profile,
+                       match_mnt(profile, path, buffer, dev_path, dev_buffer,
+                                 type, flags, data, binary));
+       } else {
+               error = fn_for_each_confined(label, profile,
+                       match_mnt_path_str(profile, path, buffer, dev_name,
+                                          type, flags, data, binary, NULL));
+       }
+       put_buffers(buffer, dev_buffer);
+       if (dev_path)
+               path_put(dev_path);
+
+       return error;
+}
+
+static int profile_umount(struct aa_profile *profile, struct path *path,
+                         char *buffer)
+{
+       struct aa_perms perms = { };
+       const char *name = NULL, *info = NULL;
+       unsigned int state;
+       int error;
+
+       AA_BUG(!profile);
+       AA_BUG(!path);
+
+       error = aa_path_name(path, path_flags(profile, path), buffer, &name,
+                            &info, profile->disconnected);
+       if (error)
+               goto audit;
+
+       state = aa_dfa_match(profile->policy.dfa,
+                            profile->policy.start[AA_CLASS_MOUNT],
+                            name);
+       perms = compute_mnt_perms(profile->policy.dfa, state);
+       if (AA_MAY_UMOUNT & ~perms.allow)
+               error = -EACCES;
+
+audit:
+       return audit_mount(profile, OP_UMOUNT, name, NULL, NULL, NULL, 0, NULL,
+                          AA_MAY_UMOUNT, &perms, info, error);
+}
+
+int aa_umount(struct aa_label *label, struct vfsmount *mnt, int flags)
+{
+       struct aa_profile *profile;
+       char *buffer = NULL;
+       int error;
+       struct path path = { .mnt = mnt, .dentry = mnt->mnt_root };
+
+       AA_BUG(!label);
+       AA_BUG(!mnt);
+
+       get_buffers(buffer);
+       error = fn_for_each_confined(label, profile,
+                       profile_umount(profile, &path, buffer));
+       put_buffers(buffer);
+
+       return error;
+}
+
+/* helper fn for transition on pivotroot
+ *
+ * Returns: label for transition or ERR_PTR. Does not return NULL
+ */
+static struct aa_label *build_pivotroot(struct aa_profile *profile,
+                                       const struct path *new_path,
+                                       char *new_buffer,
+                                       const struct path *old_path,
+                                       char *old_buffer)
+{
+       const char *old_name, *new_name = NULL, *info = NULL;
+       const char *trans_name = NULL;
+       struct aa_perms perms = { };
+       unsigned int state;
+       int error;
+
+       AA_BUG(!profile);
+       AA_BUG(!new_path);
+       AA_BUG(!old_path);
+
+       if (profile_unconfined(profile))
+               return aa_get_newest_label(&profile->label);
+
+       error = aa_path_name(old_path, path_flags(profile, old_path),
+                            old_buffer, &old_name, &info,
+                            profile->disconnected);
+       if (error)
+               goto audit;
+       error = aa_path_name(new_path, path_flags(profile, new_path),
+                            new_buffer, &new_name, &info,
+                            profile->disconnected);
+       if (error)
+               goto audit;
+
+       error = -EACCES;
+       state = aa_dfa_match(profile->policy.dfa,
+                            profile->policy.start[AA_CLASS_MOUNT],
+                            new_name);
+       state = aa_dfa_null_transition(profile->policy.dfa, state);
+       state = aa_dfa_match(profile->policy.dfa, state, old_name);
+       perms = compute_mnt_perms(profile->policy.dfa, state);
+
+       if (AA_MAY_PIVOTROOT & perms.allow)
+               error = 0;
+
+audit:
+       error = audit_mount(profile, OP_PIVOTROOT, new_name, old_name,
+                           NULL, trans_name, 0, NULL, AA_MAY_PIVOTROOT,
+                           &perms, info, error);
+       if (error)
+               return ERR_PTR(error);
+
+       return aa_get_newest_label(&profile->label);
+}
+
+int aa_pivotroot(struct aa_label *label, const struct path *old_path,
+                const struct path *new_path)
+{
+       struct aa_profile *profile;
+       struct aa_label *target = NULL;
+       char *old_buffer = NULL, *new_buffer = NULL, *info = NULL;
+       int error;
+
+       AA_BUG(!label);
+       AA_BUG(!old_path);
+       AA_BUG(!new_path);
+
+       get_buffers(old_buffer, new_buffer);
+       target = fn_label_build(label, profile, GFP_ATOMIC,
+                       build_pivotroot(profile, new_path, new_buffer,
+                                       old_path, old_buffer));
+       if (!target) {
+               info = "label build failed";
+               error = -ENOMEM;
+               goto fail;
+       } else if (!IS_ERR(target)) {
+               error = aa_replace_current_label(target);
+               if (error) {
+                       /* TODO: audit target */
+                       aa_put_label(target);
+                       goto out;
+               }
+       } else
+               /* already audited error */
+               error = PTR_ERR(target);
+out:
+       put_buffers(old_buffer, new_buffer);
+
+       return error;
+
+fail:
+       /* TODO: add back in auditing of new_name and old_name */
+       error = fn_for_each(label, profile,
+                       audit_mount(profile, OP_PIVOTROOT, NULL /*new_name */,
+                                   NULL /* old_name */,
+                                   NULL, NULL,
+                                   0, NULL, AA_MAY_PIVOTROOT, &nullperms, info,
+                                   error));
+       goto out;
+}
diff --git a/security/apparmor/net.c b/security/apparmor/net.c

new file mode 100644 (file)

index 0000000..33d5443
--- /dev/null
+++ b/security/apparmor/net.c
@@ -0,0 +1,184 @@
+/*
+ * AppArmor security module
+ *
+ * This file contains AppArmor network mediation
+ *
+ * Copyright (C) 1998-2008 Novell/SUSE
+ * Copyright 2009-2017 Canonical Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#include "include/apparmor.h"
+#include "include/audit.h"
+#include "include/context.h"
+#include "include/label.h"
+#include "include/net.h"
+#include "include/policy.h"
+
+#include "net_names.h"
+
+
+struct aa_sfs_entry aa_sfs_entry_network[] = {
+       AA_SFS_FILE_STRING("af_mask",   AA_SFS_AF_MASK),
+       { }
+};
+
+static const char * const net_mask_names[] = {
+       "unknown",
+       "send",
+       "receive",
+       "unknown",
+
+       "create",
+       "shutdown",
+       "connect",
+       "unknown",
+
+       "setattr",
+       "getattr",
+       "setcred",
+       "getcred",
+
+       "chmod",
+       "chown",
+       "chgrp",
+       "lock",
+
+       "mmap",
+       "mprot",
+       "unknown",
+       "unknown",
+
+       "accept",
+       "bind",
+       "listen",
+       "unknown",
+
+       "setopt",
+       "getopt",
+       "unknown",
+       "unknown",
+
+       "unknown",
+       "unknown",
+       "unknown",
+       "unknown",
+};
+
+
+/* audit callback for net specific fields */
+void audit_net_cb(struct audit_buffer *ab, void *va)
+{
+       struct common_audit_data *sa = va;
+
+       audit_log_format(ab, " family=");
+       if (address_family_names[sa->u.net->family])
+               audit_log_string(ab, address_family_names[sa->u.net->family]);
+       else
+               audit_log_format(ab, "\"unknown(%d)\"", sa->u.net->family);
+       audit_log_format(ab, " sock_type=");
+       if (sock_type_names[aad(sa)->net.type])
+               audit_log_string(ab, sock_type_names[aad(sa)->net.type]);
+       else
+               audit_log_format(ab, "\"unknown(%d)\"", aad(sa)->net.type);
+       audit_log_format(ab, " protocol=%d", aad(sa)->net.protocol);
+
+       if (aad(sa)->request & NET_PERMS_MASK) {
+               audit_log_format(ab, " requested_mask=");
+               aa_audit_perm_mask(ab, aad(sa)->request, NULL, 0,
+                                  net_mask_names, NET_PERMS_MASK);
+
+               if (aad(sa)->denied & NET_PERMS_MASK) {
+                       audit_log_format(ab, " denied_mask=");
+                       aa_audit_perm_mask(ab, aad(sa)->denied, NULL, 0,
+                                          net_mask_names, NET_PERMS_MASK);
+               }
+       }
+       if (aad(sa)->peer) {
+               audit_log_format(ab, " peer=");
+               aa_label_xaudit(ab, labels_ns(aad(sa)->label), aad(sa)->peer,
+                               FLAGS_NONE, GFP_ATOMIC);
+       }
+}
+
+
+/* Generic af perm */
+int aa_profile_af_perm(struct aa_profile *profile, struct common_audit_data *sa,
+                      u32 request, u16 family, int type)
+{
+       struct aa_perms perms = { };
+
+       AA_BUG(family >= AF_MAX);
+       AA_BUG(type < 0 || type >= SOCK_MAX);
+
+       if (profile_unconfined(profile))
+               return 0;
+
+       perms.allow = (profile->net.allow[family] & (1 << type)) ?
+               ALL_PERMS_MASK : 0;
+       perms.audit = (profile->net.audit[family] & (1 << type)) ?
+               ALL_PERMS_MASK : 0;
+       perms.quiet = (profile->net.quiet[family] & (1 << type)) ?
+               ALL_PERMS_MASK : 0;
+       aa_apply_modes_to_perms(profile, &perms);
+
+       return aa_check_perms(profile, &perms, request, sa, audit_net_cb);
+}
+
+int aa_af_perm(struct aa_label *label, const char *op, u32 request, u16 family,
+              int type, int protocol)
+{
+       struct aa_profile *profile;
+       DEFINE_AUDIT_NET(sa, op, NULL, family, type, protocol);
+
+       return fn_for_each_confined(label, profile,
+                       aa_profile_af_perm(profile, &sa, request, family,
+                                          type));
+}
+
+static int aa_label_sk_perm(struct aa_label *label, const char *op, u32 request,
+                           struct sock *sk)
+{
+       struct aa_profile *profile;
+       DEFINE_AUDIT_SK(sa, op, sk);
+
+       AA_BUG(!label);
+       AA_BUG(!sk);
+
+       if (unconfined(label))
+               return 0;
+
+       return fn_for_each_confined(label, profile,
+                       aa_profile_af_sk_perm(profile, &sa, request, sk));
+}
+
+int aa_sk_perm(const char *op, u32 request, struct sock *sk)
+{
+       struct aa_label *label;
+       int error;
+
+       AA_BUG(!sk);
+       AA_BUG(in_interrupt());
+
+       /* TODO: switch to begin_current_label ???? */
+       label = begin_current_label_crit_section();
+       error = aa_label_sk_perm(label, op, request, sk);
+       end_current_label_crit_section(label);
+
+       return error;
+}
+
+
+int aa_sock_file_perm(struct aa_label *label, const char *op, u32 request,
+                     struct socket *sock)
+{
+       AA_BUG(!label);
+       AA_BUG(!sock);
+       AA_BUG(!sock->sk);
+
+       return aa_label_sk_perm(label, op, request, sock->sk);
+}
diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c

index 244ea4a..4243b0c 100644 (file)
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -289,85 +289,6 @@ fail:
         return NULL;
  }
  
-/**
- * aa_new_null_profile - create or find a null-X learning profile
- * @parent: profile that caused this profile to be created (NOT NULL)
- * @hat: true if the null- learning profile is a hat
- * @base: name to base the null profile off of
- * @gfp: type of allocation
- *
- * Find/Create a null- complain mode profile used in learning mode.  The
- * name of the profile is unique and follows the format of parent//null-XXX.
- * where XXX is based on the @name or if that fails or is not supplied
- * a unique number
- *
- * null profiles are added to the profile list but the list does not
- * hold a count on them so that they are automatically released when
- * not in use.
- *
- * Returns: new refcounted profile else NULL on failure
- */
-struct aa_profile *aa_new_null_profile(struct aa_profile *parent, bool hat,
-                                      const char *base, gfp_t gfp)
-{
-       struct aa_profile *profile;
-       char *name;
-
-       AA_BUG(!parent);
-
-       if (base) {
-               name = kmalloc(strlen(parent->base.hname) + 8 + strlen(base),
-                              gfp);
-               if (name) {
-                       sprintf(name, "%s//null-%s", parent->base.hname, base);
-                       goto name;
-               }
-               /* fall through to try shorter uniq */
-       }
-
-       name = kmalloc(strlen(parent->base.hname) + 2 + 7 + 8, gfp);
-       if (!name)
-               return NULL;
-       sprintf(name, "%s//null-%x", parent->base.hname,
-               atomic_inc_return(&parent->ns->uniq_null));
-
-name:
-       /* lookup to see if this is a dup creation */
-       profile = aa_find_child(parent, basename(name));
-       if (profile)
-               goto out;
-
-       profile = aa_alloc_profile(name, NULL, gfp);
-       if (!profile)
-               goto fail;
-
-       profile->mode = APPARMOR_COMPLAIN;
-       profile->label.flags |= FLAG_NULL;
-       if (hat)
-               profile->label.flags |= FLAG_HAT;
-       profile->path_flags = parent->path_flags;
-
-       /* released on free_profile */
-       rcu_assign_pointer(profile->parent, aa_get_profile(parent));
-       profile->ns = aa_get_ns(parent->ns);
-       profile->file.dfa = aa_get_dfa(nulldfa);
-       profile->policy.dfa = aa_get_dfa(nulldfa);
-
-       mutex_lock(&profile->ns->lock);
-       __add_profile(&parent->base.profiles, profile);
-       mutex_unlock(&profile->ns->lock);
-
-       /* refcount released by caller */
-out:
-       kfree(name);
-
-       return profile;
-
-fail:
-       aa_free_profile(profile);
-       return NULL;
-}
-
  /* TODO: profile accounting - setup in remove */
  
  /**
@@ -558,6 +479,93 @@ struct aa_profile *aa_fqlookupn_profile(struct aa_label *base,
         return profile;
  }
  
+/**
+ * aa_new_null_profile - create or find a null-X learning profile
+ * @parent: profile that caused this profile to be created (NOT NULL)
+ * @hat: true if the null- learning profile is a hat
+ * @base: name to base the null profile off of
+ * @gfp: type of allocation
+ *
+ * Find/Create a null- complain mode profile used in learning mode.  The
+ * name of the profile is unique and follows the format of parent//null-XXX.
+ * where XXX is based on the @name or if that fails or is not supplied
+ * a unique number
+ *
+ * null profiles are added to the profile list but the list does not
+ * hold a count on them so that they are automatically released when
+ * not in use.
+ *
+ * Returns: new refcounted profile else NULL on failure
+ */
+struct aa_profile *aa_new_null_profile(struct aa_profile *parent, bool hat,
+                                      const char *base, gfp_t gfp)
+{
+       struct aa_profile *p, *profile;
+       const char *bname;
+       char *name;
+
+       AA_BUG(!parent);
+
+       if (base) {
+               name = kmalloc(strlen(parent->base.hname) + 8 + strlen(base),
+                              gfp);
+               if (name) {
+                       sprintf(name, "%s//null-%s", parent->base.hname, base);
+                       goto name;
+               }
+               /* fall through to try shorter uniq */
+       }
+
+       name = kmalloc(strlen(parent->base.hname) + 2 + 7 + 8, gfp);
+       if (!name)
+               return NULL;
+       sprintf(name, "%s//null-%x", parent->base.hname,
+               atomic_inc_return(&parent->ns->uniq_null));
+
+name:
+       /* lookup to see if this is a dup creation */
+       bname = basename(name);
+       profile = aa_find_child(parent, bname);
+       if (profile)
+               goto out;
+
+       profile = aa_alloc_profile(name, NULL, gfp);
+       if (!profile)
+               goto fail;
+
+       profile->mode = APPARMOR_COMPLAIN;
+       profile->label.flags |= FLAG_NULL;
+       if (hat)
+               profile->label.flags |= FLAG_HAT;
+       profile->path_flags = parent->path_flags;
+
+       /* released on free_profile */
+       rcu_assign_pointer(profile->parent, aa_get_profile(parent));
+       profile->ns = aa_get_ns(parent->ns);
+       profile->file.dfa = aa_get_dfa(nulldfa);
+       profile->policy.dfa = aa_get_dfa(nulldfa);
+
+       mutex_lock(&profile->ns->lock);
+       p = __find_child(&parent->base.profiles, bname);
+       if (p) {
+               aa_free_profile(profile);
+               profile = aa_get_profile(p);
+       } else {
+               __add_profile(&parent->base.profiles, profile);
+       }
+       mutex_unlock(&profile->ns->lock);
+
+       /* refcount released by caller */
+out:
+       kfree(name);
+
+       return profile;
+
+fail:
+       aa_free_profile(profile);
+       return NULL;
+}
+
  /**
   * replacement_allowed - test to see if replacement is allowed
   * @profile: profile to test if it can be replaced  (MAYBE NULL)
diff --git a/security/apparmor/policy_ns.c b/security/apparmor/policy_ns.c

index 351d3ba..62a3589 100644 (file)
--- a/security/apparmor/policy_ns.c
+++ b/security/apparmor/policy_ns.c
@@ -112,6 +112,8 @@ static struct aa_ns *alloc_ns(const char *prefix, const char *name)
         ns->unconfined->label.flags |= FLAG_IX_ON_NAME_ERROR |
                 FLAG_IMMUTIBLE | FLAG_NS_COUNT | FLAG_UNCONFINED;
         ns->unconfined->mode = APPARMOR_UNCONFINED;
+       ns->unconfined->file.dfa = aa_get_dfa(nulldfa);
+       ns->unconfined->policy.dfa = aa_get_dfa(nulldfa);
  
         /* ns and ns->unconfined share ns->unconfined refcount */
         ns->unconfined->ns = ns;
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c

index c600f4d..5a2aec3 100644 (file)
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -85,9 +85,9 @@ static void audit_cb(struct audit_buffer *ab, void *va)
                 audit_log_format(ab, " ns=");
                 audit_log_untrustedstring(ab, aad(sa)->iface.ns);
         }
-       if (aad(sa)->iface.name) {
+       if (aad(sa)->name) {
                 audit_log_format(ab, " name=");
-               audit_log_untrustedstring(ab, aad(sa)->iface.name);
+               audit_log_untrustedstring(ab, aad(sa)->name);
         }
         if (aad(sa)->iface.pos)
                 audit_log_format(ab, " offset=%ld", aad(sa)->iface.pos);
@@ -114,9 +114,9 @@ static int audit_iface(struct aa_profile *new, const char *ns_name,
                 aad(&sa)->iface.pos = e->pos - e->start;
         aad(&sa)->iface.ns = ns_name;
         if (new)
-               aad(&sa)->iface.name = new->base.hname;
+               aad(&sa)->name = new->base.hname;
         else
-               aad(&sa)->iface.name = name;
+               aad(&sa)->name = name;
         aad(&sa)->info = info;
         aad(&sa)->error = error;
  
@@ -275,6 +275,19 @@ fail:
         return 0;
  }
  
+static bool unpack_u16(struct aa_ext *e, u16 *data, const char *name)
+{
+       if (unpack_nameX(e, AA_U16, name)) {
+               if (!inbounds(e, sizeof(u16)))
+                       return 0;
+               if (data)
+                       *data = le16_to_cpu(get_unaligned((__le16 *) e->pos));
+               e->pos += sizeof(u16);
+               return 1;
+       }
+       return 0;
+}
+
  static bool unpack_u32(struct aa_ext *e, u32 *data, const char *name)
  {
         if (unpack_nameX(e, AA_U32, name)) {
@@ -448,7 +461,7 @@ fail:
   */
  static bool unpack_trans_table(struct aa_ext *e, struct aa_profile *profile)
  {
-       void *pos = e->pos;
+       void *saved_pos = e->pos;
  
         /* exec table is optional */
         if (unpack_nameX(e, AA_STRUCT, "xtable")) {
@@ -511,7 +524,7 @@ static bool unpack_trans_table(struct aa_ext *e, struct aa_profile *profile)
  
  fail:
         aa_free_domain_entries(&profile->file.trans);
-       e->pos = pos;
+       e->pos = saved_pos;
         return 0;
  }
  
@@ -583,7 +596,8 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
  {
         struct aa_profile *profile = NULL;
         const char *tmpname, *tmpns = NULL, *name = NULL;
-       size_t ns_len;
+       const char *info = "failed to unpack profile";
+       size_t size = 0, ns_len;
         struct rhashtable_params params = { 0 };
         char *key = NULL;
         struct aa_data *data;
@@ -604,8 +618,10 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
         tmpname = aa_splitn_fqname(name, strlen(name), &tmpns, &ns_len);
         if (tmpns) {
                 *ns_name = kstrndup(tmpns, ns_len, GFP_KERNEL);
-               if (!*ns_name)
+               if (!*ns_name) {
+                       info = "out of memory";
                         goto fail;
+               }
                 name = tmpname;
         }
  
@@ -624,12 +640,15 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
         if (IS_ERR(profile->xmatch)) {
                 error = PTR_ERR(profile->xmatch);
                 profile->xmatch = NULL;
+               info = "bad xmatch";
                 goto fail;
         }
         /* xmatch_len is not optional if xmatch is set */
         if (profile->xmatch) {
-               if (!unpack_u32(e, &tmp, NULL))
+               if (!unpack_u32(e, &tmp, NULL)) {
+                       info = "missing xmatch len";
                         goto fail;
+               }
                 profile->xmatch_len = tmp;
         }
  
@@ -637,8 +656,11 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
         (void) unpack_str(e, &profile->disconnected, "disconnected");
  
         /* per profile debug flags (complain, audit) */
-       if (!unpack_nameX(e, AA_STRUCT, "flags"))
+       if (!unpack_nameX(e, AA_STRUCT, "flags")) {
+               info = "profile missing flags";
                 goto fail;
+       }
+       info = "failed to unpack profile flags";
         if (!unpack_u32(e, &tmp, NULL))
                 goto fail;
         if (tmp & PACKED_FLAG_HAT)
@@ -667,6 +689,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
                 /* set a default value if path_flags field is not present */
                 profile->path_flags = PATH_MEDIATE_DELETED;
  
+       info = "failed to unpack profile capabilities";
         if (!unpack_u32(e, &(profile->caps.allow.cap[0]), NULL))
                 goto fail;
         if (!unpack_u32(e, &(profile->caps.audit.cap[0]), NULL))
@@ -676,6 +699,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
         if (!unpack_u32(e, &tmpcap.cap[0], NULL))
                 goto fail;
  
+       info = "failed to unpack upper profile capabilities";
         if (unpack_nameX(e, AA_STRUCT, "caps64")) {
                 /* optional upper half of 64 bit caps */
                 if (!unpack_u32(e, &(profile->caps.allow.cap[1]), NULL))
@@ -690,6 +714,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
                         goto fail;
         }
  
+       info = "failed to unpack extended profile capabilities";
         if (unpack_nameX(e, AA_STRUCT, "capsx")) {
                 /* optional extended caps mediation mask */
                 if (!unpack_u32(e, &(profile->caps.extended.cap[0]), NULL))
@@ -700,11 +725,46 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
                         goto fail;
         }
  
-       if (!unpack_rlimits(e, profile))
+       if (!unpack_rlimits(e, profile)) {
+               info = "failed to unpack profile rlimits";
                 goto fail;
+       }
+
+       size = unpack_array(e, "net_allowed_af");
+       if (size) {
+
+               for (i = 0; i < size; i++) {
+                       /* discard extraneous rules that this kernel will
+                        * never request
+                        */
+                       if (i >= AF_MAX) {
+                               u16 tmp;
+
+                               if (!unpack_u16(e, &tmp, NULL) ||
+                                   !unpack_u16(e, &tmp, NULL) ||
+                                   !unpack_u16(e, &tmp, NULL))
+                                       goto fail;
+                               continue;
+                       }
+                       if (!unpack_u16(e, &profile->net.allow[i], NULL))
+                               goto fail;
+                       if (!unpack_u16(e, &profile->net.audit[i], NULL))
+                               goto fail;
+                       if (!unpack_u16(e, &profile->net.quiet[i], NULL))
+                               goto fail;
+               }
+               if (!unpack_nameX(e, AA_ARRAYEND, NULL))
+                       goto fail;
+       }
+       if (VERSION_LT(e->version, v7)) {
+               /* pre v7 policy always allowed these */
+               profile->net.allow[AF_UNIX] = 0xffff;
+               profile->net.allow[AF_NETLINK] = 0xffff;
+       }
  
         if (unpack_nameX(e, AA_STRUCT, "policydb")) {
                 /* generic policy dfa - optional and may be NULL */
+               info = "failed to unpack policydb";
                 profile->policy.dfa = unpack_dfa(e);
                 if (IS_ERR(profile->policy.dfa)) {
                         error = PTR_ERR(profile->policy.dfa);
@@ -734,6 +794,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
         if (IS_ERR(profile->file.dfa)) {
                 error = PTR_ERR(profile->file.dfa);
                 profile->file.dfa = NULL;
+               info = "failed to unpack profile file rules";
                 goto fail;
         } else if (profile->file.dfa) {
                 if (!unpack_u32(e, &profile->file.start, "dfa_start"))
@@ -746,10 +807,13 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
         } else
                 profile->file.dfa = aa_get_dfa(nulldfa);
  
-       if (!unpack_trans_table(e, profile))
+       if (!unpack_trans_table(e, profile)) {
+               info = "failed to unpack profile transition table";
                 goto fail;
+       }
  
         if (unpack_nameX(e, AA_STRUCT, "data")) {
+               info = "out of memory";
                 profile->data = kzalloc(sizeof(*profile->data), GFP_KERNEL);
                 if (!profile->data)
                         goto fail;
@@ -761,8 +825,10 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
                 params.hashfn = strhash;
                 params.obj_cmpfn = datacmp;
  
-               if (rhashtable_init(profile->data, &params))
+               if (rhashtable_init(profile->data, &params)) {
+                       info = "failed to init key, value hash table";
                         goto fail;
+               }
  
                 while (unpack_strdup(e, &key, NULL)) {
                         data = kzalloc(sizeof(*data), GFP_KERNEL);
@@ -784,12 +850,16 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
                                                profile->data->p);
                 }
  
-               if (!unpack_nameX(e, AA_STRUCTEND, NULL))
+               if (!unpack_nameX(e, AA_STRUCTEND, NULL)) {
+                       info = "failed to unpack end of key, value data table";
                         goto fail;
+               }
         }
  
-       if (!unpack_nameX(e, AA_STRUCTEND, NULL))
+       if (!unpack_nameX(e, AA_STRUCTEND, NULL)) {
+               info = "failed to unpack end of profile";
                 goto fail;
+       }
  
         return profile;
  
@@ -798,8 +868,7 @@ fail:
                 name = NULL;
         else if (!name)
                 name = "unknown";
-       audit_iface(profile, NULL, name, "failed to unpack profile", e,
-                   error);
+       audit_iface(profile, NULL, name, info, e, error);
         aa_free_profile(profile);
  
         return ERR_PTR(error);
@@ -832,7 +901,7 @@ static int verify_header(struct aa_ext *e, int required, const char **ns)
          * if not specified use previous version
          * Mask off everything that is not kernel abi version
          */
-       if (VERSION_LT(e->version, v5) && VERSION_GT(e->version, v7)) {
+       if (VERSION_LT(e->version, v5) || VERSION_GT(e->version, v7)) {
                 audit_iface(NULL, NULL, NULL, "unsupported interface version",
                             e, error);
                 return error;
diff --git a/security/commoncap.c b/security/commoncap.c

index 6bf72b1..c25e0d2 100644 (file)
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -294,10 +294,10 @@ int cap_capset(struct cred *new,
   *
   * Determine if an inode having a change applied that's marked ATTR_KILL_PRIV
   * affects the security markings on that inode, and if it is, should
- * inode_killpriv() be invoked or the change rejected?
+ * inode_killpriv() be invoked or the change rejected.
   *
- * Returns 0 if granted; +ve if granted, but inode_killpriv() is required; and
- * -ve to deny the change.
+ * Returns 1 if security.capability has a value, meaning inode_killpriv()
+ * is required, 0 otherwise, meaning inode_killpriv() is not required.
   */
  int cap_inode_need_killpriv(struct dentry *dentry)
  {
diff --git a/security/keys/Kconfig b/security/keys/Kconfig

index a7a23b5..91eafad 100644 (file)
--- a/security/keys/Kconfig
+++ b/security/keys/Kconfig
@@ -45,10 +45,8 @@ config BIG_KEYS
         bool "Large payload keys"
         depends on KEYS
         depends on TMPFS
-       depends on (CRYPTO_ANSI_CPRNG = y || CRYPTO_DRBG = y)
         select CRYPTO_AES
-       select CRYPTO_ECB
-       select CRYPTO_RNG
+       select CRYPTO_GCM
         help
           This option provides support for holding large keys within the kernel
           (for example Kerberos ticket caches).  The data may be stored out to
diff --git a/security/keys/big_key.c b/security/keys/big_key.c

index 6acb00f..e607830 100644 (file)
--- a/security/keys/big_key.c
+++ b/security/keys/big_key.c
@@ -1,5 +1,6 @@
  /* Large capacity key type
   *
+ * Copyright (C) 2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
   * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
   * Written by David Howells (dhowells@redhat.com)
   *
@@ -16,10 +17,10 @@
  #include <linux/shmem_fs.h>
  #include <linux/err.h>
  #include <linux/scatterlist.h>
+#include <linux/random.h>
  #include <keys/user-type.h>
  #include <keys/big_key-type.h>
-#include <crypto/rng.h>
-#include <crypto/skcipher.h>
+#include <crypto/aead.h>
  
  /*
   * Layout of key payload words.
@@ -49,7 +50,12 @@ enum big_key_op {
  /*
   * Key size for big_key data encryption
   */
-#define ENC_KEY_SIZE   16
+#define ENC_KEY_SIZE 32
+
+/*
+ * Authentication tag length
+ */
+#define ENC_AUTHTAG_SIZE 16
  
  /*
   * big_key defined keys take an arbitrary string as the description and an
@@ -64,57 +70,62 @@ struct key_type key_type_big_key = {
         .destroy                = big_key_destroy,
         .describe               = big_key_describe,
         .read                   = big_key_read,
+       /* no ->update(); don't add it without changing big_key_crypt() nonce */
  };
  
  /*
- * Crypto names for big_key data encryption
+ * Crypto names for big_key data authenticated encryption
   */
-static const char big_key_rng_name[] = "stdrng";
-static const char big_key_alg_name[] = "ecb(aes)";
+static const char big_key_alg_name[] = "gcm(aes)";
  
  /*
- * Crypto algorithms for big_key data encryption
+ * Crypto algorithms for big_key data authenticated encryption
   */
-static struct crypto_rng *big_key_rng;
-static struct crypto_skcipher *big_key_skcipher;
+static struct crypto_aead *big_key_aead;
  
  /*
- * Generate random key to encrypt big_key data
+ * Since changing the key affects the entire object, we need a mutex.
   */
-static inline int big_key_gen_enckey(u8 *key)
-{
-       return crypto_rng_get_bytes(big_key_rng, key, ENC_KEY_SIZE);
-}
+static DEFINE_MUTEX(big_key_aead_lock);
  
  /*
   * Encrypt/decrypt big_key data
   */
  static int big_key_crypt(enum big_key_op op, u8 *data, size_t datalen, u8 *key)
  {
-       int ret = -EINVAL;
+       int ret;
         struct scatterlist sgio;
-       SKCIPHER_REQUEST_ON_STACK(req, big_key_skcipher);
-
-       if (crypto_skcipher_setkey(big_key_skcipher, key, ENC_KEY_SIZE)) {
+       struct aead_request *aead_req;
+       /* We always use a zero nonce. The reason we can get away with this is
+        * because we're using a different randomly generated key for every
+        * different encryption. Notably, too, key_type_big_key doesn't define
+        * an .update function, so there's no chance we'll wind up reusing the
+        * key to encrypt updated data. Simply put: one key, one encryption.
+        */
+       u8 zero_nonce[crypto_aead_ivsize(big_key_aead)];
+
+       aead_req = aead_request_alloc(big_key_aead, GFP_KERNEL);
+       if (!aead_req)
+               return -ENOMEM;
+
+       memset(zero_nonce, 0, sizeof(zero_nonce));
+       sg_init_one(&sgio, data, datalen + (op == BIG_KEY_ENC ? ENC_AUTHTAG_SIZE : 0));
+       aead_request_set_crypt(aead_req, &sgio, &sgio, datalen, zero_nonce);
+       aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
+       aead_request_set_ad(aead_req, 0);
+
+       mutex_lock(&big_key_aead_lock);
+       if (crypto_aead_setkey(big_key_aead, key, ENC_KEY_SIZE)) {
                 ret = -EAGAIN;
                 goto error;
         }
-
-       skcipher_request_set_tfm(req, big_key_skcipher);
-       skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP,
-                                     NULL, NULL);
-
-       sg_init_one(&sgio, data, datalen);
-       skcipher_request_set_crypt(req, &sgio, &sgio, datalen, NULL);
-
         if (op == BIG_KEY_ENC)
-               ret = crypto_skcipher_encrypt(req);
+               ret = crypto_aead_encrypt(aead_req);
         else
-               ret = crypto_skcipher_decrypt(req);
-
-       skcipher_request_zero(req);
-
+               ret = crypto_aead_decrypt(aead_req);
  error:
+       mutex_unlock(&big_key_aead_lock);
+       aead_request_free(aead_req);
         return ret;
  }
  
@@ -146,16 +157,13 @@ int big_key_preparse(struct key_preparsed_payload *prep)
                  *
                  * File content is stored encrypted with randomly generated key.
                  */
-               size_t enclen = ALIGN(datalen, crypto_skcipher_blocksize(big_key_skcipher));
+               size_t enclen = datalen + ENC_AUTHTAG_SIZE;
                 loff_t pos = 0;
  
-               /* prepare aligned data to encrypt */
                 data = kmalloc(enclen, GFP_KERNEL);
                 if (!data)
                         return -ENOMEM;
-
                 memcpy(data, prep->data, datalen);
-               memset(data + datalen, 0x00, enclen - datalen);
  
                 /* generate random key */
                 enckey = kmalloc(ENC_KEY_SIZE, GFP_KERNEL);
@@ -163,13 +171,12 @@ int big_key_preparse(struct key_preparsed_payload *prep)
                         ret = -ENOMEM;
                         goto error;
                 }
-
-               ret = big_key_gen_enckey(enckey);
-               if (ret)
+               ret = get_random_bytes_wait(enckey, ENC_KEY_SIZE);
+               if (unlikely(ret))
                         goto err_enckey;
  
                 /* encrypt aligned data */
-               ret = big_key_crypt(BIG_KEY_ENC, data, enclen, enckey);
+               ret = big_key_crypt(BIG_KEY_ENC, data, datalen, enckey);
                 if (ret)
                         goto err_enckey;
  
@@ -195,7 +202,7 @@ int big_key_preparse(struct key_preparsed_payload *prep)
                 *path = file->f_path;
                 path_get(path);
                 fput(file);
-               kfree(data);
+               kzfree(data);
         } else {
                 /* Just store the data in a buffer */
                 void *data = kmalloc(datalen, GFP_KERNEL);
@@ -211,9 +218,9 @@ int big_key_preparse(struct key_preparsed_payload *prep)
  err_fput:
         fput(file);
  err_enckey:
-       kfree(enckey);
+       kzfree(enckey);
  error:
-       kfree(data);
+       kzfree(data);
         return ret;
  }
  
@@ -227,7 +234,7 @@ void big_key_free_preparse(struct key_preparsed_payload *prep)
  
                 path_put(path);
         }
-       kfree(prep->payload.data[big_key_data]);
+       kzfree(prep->payload.data[big_key_data]);
  }
  
  /*
@@ -259,7 +266,7 @@ void big_key_destroy(struct key *key)
                 path->mnt = NULL;
                 path->dentry = NULL;
         }
-       kfree(key->payload.data[big_key_data]);
+       kzfree(key->payload.data[big_key_data]);
         key->payload.data[big_key_data] = NULL;
  }
  
@@ -295,7 +302,7 @@ long big_key_read(const struct key *key, char __user *buffer, size_t buflen)
                 struct file *file;
                 u8 *data;
                 u8 *enckey = (u8 *)key->payload.data[big_key_data];
-               size_t enclen = ALIGN(datalen, crypto_skcipher_blocksize(big_key_skcipher));
+               size_t enclen = datalen + ENC_AUTHTAG_SIZE;
                 loff_t pos = 0;
  
                 data = kmalloc(enclen, GFP_KERNEL);
@@ -328,7 +335,7 @@ long big_key_read(const struct key *key, char __user *buffer, size_t buflen)
  err_fput:
                 fput(file);
  error:
-               kfree(data);
+               kzfree(data);
         } else {
                 ret = datalen;
                 if (copy_to_user(buffer, key->payload.data[big_key_data],
@@ -344,47 +351,31 @@ error:
   */
  static int __init big_key_init(void)
  {
-       struct crypto_skcipher *cipher;
-       struct crypto_rng *rng;
         int ret;
  
-       rng = crypto_alloc_rng(big_key_rng_name, 0, 0);
-       if (IS_ERR(rng)) {
-               pr_err("Can't alloc rng: %ld\n", PTR_ERR(rng));
-               return PTR_ERR(rng);
-       }
-
-       big_key_rng = rng;
-
-       /* seed RNG */
-       ret = crypto_rng_reset(rng, NULL, crypto_rng_seedsize(rng));
-       if (ret) {
-               pr_err("Can't reset rng: %d\n", ret);
-               goto error_rng;
-       }
-
         /* init block cipher */
-       cipher = crypto_alloc_skcipher(big_key_alg_name, 0, CRYPTO_ALG_ASYNC);
-       if (IS_ERR(cipher)) {
-               ret = PTR_ERR(cipher);
+       big_key_aead = crypto_alloc_aead(big_key_alg_name, 0, CRYPTO_ALG_ASYNC);
+       if (IS_ERR(big_key_aead)) {
+               ret = PTR_ERR(big_key_aead);
                 pr_err("Can't alloc crypto: %d\n", ret);
-               goto error_rng;
+               return ret;
+       }
+       ret = crypto_aead_setauthsize(big_key_aead, ENC_AUTHTAG_SIZE);
+       if (ret < 0) {
+               pr_err("Can't set crypto auth tag len: %d\n", ret);
+               goto free_aead;
         }
-
-       big_key_skcipher = cipher;
  
         ret = register_key_type(&key_type_big_key);
         if (ret < 0) {
                 pr_err("Can't register type: %d\n", ret);
-               goto error_cipher;
+               goto free_aead;
         }
  
         return 0;
  
-error_cipher:
-       crypto_free_skcipher(big_key_skcipher);
-error_rng:
-       crypto_free_rng(big_key_rng);
+free_aead:
+       crypto_free_aead(big_key_aead);
         return ret;
  }
  
diff --git a/security/keys/internal.h b/security/keys/internal.h

index 1c02c65..503adba 100644 (file)
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -141,7 +141,7 @@ extern key_ref_t keyring_search_aux(key_ref_t keyring_ref,
  extern key_ref_t search_my_process_keyrings(struct keyring_search_context *ctx);
  extern key_ref_t search_process_keyrings(struct keyring_search_context *ctx);
  
-extern struct key *find_keyring_by_name(const char *name, bool skip_perm_check);
+extern struct key *find_keyring_by_name(const char *name, bool uid_keyring);
  
  extern int install_user_keyrings(void);
  extern int install_thread_keyring_to_cred(struct cred *);
diff --git a/security/keys/key.c b/security/keys/key.c

index 83da68d..eb914a8 100644 (file)
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -54,10 +54,10 @@ void __key_check(const struct key *key)
  struct key_user *key_user_lookup(kuid_t uid)
  {
         struct key_user *candidate = NULL, *user;
-       struct rb_node *parent = NULL;
-       struct rb_node **p;
+       struct rb_node *parent, **p;
  
  try_again:
+       parent = NULL;
         p = &key_user_tree.rb_node;
         spin_lock(&key_user_lock);
  
@@ -302,6 +302,8 @@ struct key *key_alloc(struct key_type *type, const char *desc,
                 key->flags |= 1 << KEY_FLAG_IN_QUOTA;
         if (flags & KEY_ALLOC_BUILT_IN)
                 key->flags |= 1 << KEY_FLAG_BUILTIN;
+       if (flags & KEY_ALLOC_UID_KEYRING)
+               key->flags |= 1 << KEY_FLAG_UID_KEYRING;
  
  #ifdef KEY_DEBUGGING
         key->magic = KEY_DEBUG_MAGIC;
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c

index ab0b337..365ff85 100644 (file)
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -766,12 +766,17 @@ long keyctl_read_key(key_serial_t keyid, char __user *buffer, size_t buflen)
  
         key = key_ref_to_ptr(key_ref);
  
+       if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) {
+               ret = -ENOKEY;
+               goto error2;
+       }
+
         /* see if we can read it directly */
         ret = key_permission(key_ref, KEY_NEED_READ);
         if (ret == 0)
                 goto can_read_key;
         if (ret != -EACCES)
-               goto error;
+               goto error2;
  
         /* we can't; see if it's searchable from this process's keyrings
          * - we automatically take account of the fact that it may be
@@ -1406,11 +1411,9 @@ long keyctl_assume_authority(key_serial_t id)
         }
  
         ret = keyctl_change_reqkey_auth(authkey);
-       if (ret < 0)
-               goto error;
+       if (ret == 0)
+               ret = authkey->serial;
         key_put(authkey);
-
-       ret = authkey->serial;
  error:
         return ret;
  }
diff --git a/security/keys/keyring.c b/security/keys/keyring.c

index de81793..4fa82a8 100644 (file)
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -423,7 +423,7 @@ static void keyring_describe(const struct key *keyring, struct seq_file *m)
  }
  
  struct keyring_read_iterator_context {
-       size_t                  qty;
+       size_t                  buflen;
         size_t                  count;
         key_serial_t __user     *buffer;
  };
@@ -435,9 +435,9 @@ static int keyring_read_iterator(const void *object, void *data)
         int ret;
  
         kenter("{%s,%d},,{%zu/%zu}",
-              key->type->name, key->serial, ctx->count, ctx->qty);
+              key->type->name, key->serial, ctx->count, ctx->buflen);
  
-       if (ctx->count >= ctx->qty)
+       if (ctx->count >= ctx->buflen)
                 return 1;
  
         ret = put_user(key->serial, ctx->buffer);
@@ -472,16 +472,12 @@ static long keyring_read(const struct key *keyring,
                 return 0;
  
         /* Calculate how much data we could return */
-       ctx.qty = nr_keys * sizeof(key_serial_t);
-
         if (!buffer || !buflen)
-               return ctx.qty;
-
-       if (buflen > ctx.qty)
-               ctx.qty = buflen;
+               return nr_keys * sizeof(key_serial_t);
  
         /* Copy the IDs of the subscribed keys into the buffer */
         ctx.buffer = (key_serial_t __user *)buffer;
+       ctx.buflen = buflen;
         ctx.count = 0;
         ret = assoc_array_iterate(&keyring->keys, keyring_read_iterator, &ctx);
         if (ret < 0) {
@@ -1101,15 +1097,15 @@ found:
  /*
   * Find a keyring with the specified name.
   *
- * All named keyrings in the current user namespace are searched, provided they
- * grant Search permission directly to the caller (unless this check is
- * skipped).  Keyrings whose usage points have reached zero or who have been
- * revoked are skipped.
+ * Only keyrings that have nonzero refcount, are not revoked, and are owned by a
+ * user in the current user namespace are considered.  If @uid_keyring is %true,
+ * the keyring additionally must have been allocated as a user or user session
+ * keyring; otherwise, it must grant Search permission directly to the caller.
   *
   * Returns a pointer to the keyring with the keyring's refcount having being
   * incremented on success.  -ENOKEY is returned if a key could not be found.
   */
-struct key *find_keyring_by_name(const char *name, bool skip_perm_check)
+struct key *find_keyring_by_name(const char *name, bool uid_keyring)
  {
         struct key *keyring;
         int bucket;
@@ -1137,10 +1133,15 @@ struct key *find_keyring_by_name(const char *name, bool skip_perm_check)
                         if (strcmp(keyring->description, name) != 0)
                                 continue;
  
-                       if (!skip_perm_check &&
-                           key_permission(make_key_ref(keyring, 0),
-                                          KEY_NEED_SEARCH) < 0)
-                               continue;
+                       if (uid_keyring) {
+                               if (!test_bit(KEY_FLAG_UID_KEYRING,
+                                             &keyring->flags))
+                                       continue;
+                       } else {
+                               if (key_permission(make_key_ref(keyring, 0),
+                                                  KEY_NEED_SEARCH) < 0)
+                                       continue;
+                       }
  
                         /* we've got a match but we might end up racing with
                          * key_cleanup() if the keyring is currently 'dead'
diff --git a/security/keys/proc.c b/security/keys/proc.c

index bf08d02..de83430 100644 (file)
--- a/security/keys/proc.c
+++ b/security/keys/proc.c
@@ -187,7 +187,7 @@ static int proc_keys_show(struct seq_file *m, void *v)
         struct keyring_search_context ctx = {
                 .index_key.type         = key->type,
                 .index_key.description  = key->description,
-               .cred                   = current_cred(),
+               .cred                   = m->file->f_cred,
                 .match_data.cmp         = lookup_user_key_possessed,
                 .match_data.raw_data    = key,
                 .match_data.lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT,
@@ -207,11 +207,7 @@ static int proc_keys_show(struct seq_file *m, void *v)
                 }
         }
  
-       /* check whether the current task is allowed to view the key (assuming
-        * non-possession)
-        * - the caller holds a spinlock, and thus the RCU read lock, making our
-        *   access to __current_cred() safe
-        */
+       /* check whether the current task is allowed to view the key */
         rc = key_task_permission(key_ref, ctx.cred, KEY_NEED_VIEW);
         if (rc < 0)
                 return 0;
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c

index 86bced9..293d359 100644 (file)
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -77,7 +77,8 @@ int install_user_keyrings(void)
                 if (IS_ERR(uid_keyring)) {
                         uid_keyring = keyring_alloc(buf, user->uid, INVALID_GID,
                                                     cred, user_keyring_perm,
-                                                   KEY_ALLOC_IN_QUOTA,
+                                                   KEY_ALLOC_UID_KEYRING |
+                                                       KEY_ALLOC_IN_QUOTA,
                                                     NULL, NULL);
                         if (IS_ERR(uid_keyring)) {
                                 ret = PTR_ERR(uid_keyring);
@@ -94,7 +95,8 @@ int install_user_keyrings(void)
                         session_keyring =
                                 keyring_alloc(buf, user->uid, INVALID_GID,
                                               cred, user_keyring_perm,
-                                             KEY_ALLOC_IN_QUOTA,
+                                             KEY_ALLOC_UID_KEYRING |
+                                                 KEY_ALLOC_IN_QUOTA,
                                               NULL, NULL);
                         if (IS_ERR(session_keyring)) {
                                 ret = PTR_ERR(session_keyring);
diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c

index afe9d22..6ebf1af 100644 (file)
--- a/security/keys/request_key_auth.c
+++ b/security/keys/request_key_auth.c
@@ -120,6 +120,18 @@ static void request_key_auth_revoke(struct key *key)
         }
  }
  
+static void free_request_key_auth(struct request_key_auth *rka)
+{
+       if (!rka)
+               return;
+       key_put(rka->target_key);
+       key_put(rka->dest_keyring);
+       if (rka->cred)
+               put_cred(rka->cred);
+       kfree(rka->callout_info);
+       kfree(rka);
+}
+
  /*
   * Destroy an instantiation authorisation token key.
   */
@@ -129,15 +141,7 @@ static void request_key_auth_destroy(struct key *key)
  
         kenter("{%d}", key->serial);
  
-       if (rka->cred) {
-               put_cred(rka->cred);
-               rka->cred = NULL;
-       }
-
-       key_put(rka->target_key);
-       key_put(rka->dest_keyring);
-       kfree(rka->callout_info);
-       kfree(rka);
+       free_request_key_auth(rka);
  }
  
  /*
@@ -151,22 +155,18 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info,
         const struct cred *cred = current->cred;
         struct key *authkey = NULL;
         char desc[20];
-       int ret;
+       int ret = -ENOMEM;
  
         kenter("%d,", target->serial);
  
         /* allocate a auth record */
-       rka = kmalloc(sizeof(*rka), GFP_KERNEL);
-       if (!rka) {
-               kleave(" = -ENOMEM");
-               return ERR_PTR(-ENOMEM);
-       }
-       rka->callout_info = kmalloc(callout_len, GFP_KERNEL);
-       if (!rka->callout_info) {
-               kleave(" = -ENOMEM");
-               kfree(rka);
-               return ERR_PTR(-ENOMEM);
-       }
+       rka = kzalloc(sizeof(*rka), GFP_KERNEL);
+       if (!rka)
+               goto error;
+       rka->callout_info = kmemdup(callout_info, callout_len, GFP_KERNEL);
+       if (!rka->callout_info)
+               goto error_free_rka;
+       rka->callout_len = callout_len;
  
         /* see if the calling process is already servicing the key request of
          * another process */
@@ -176,8 +176,12 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info,
  
                 /* if the auth key has been revoked, then the key we're
                  * servicing is already instantiated */
-               if (test_bit(KEY_FLAG_REVOKED, &cred->request_key_auth->flags))
-                       goto auth_key_revoked;
+               if (test_bit(KEY_FLAG_REVOKED,
+                            &cred->request_key_auth->flags)) {
+                       up_read(&cred->request_key_auth->sem);
+                       ret = -EKEYREVOKED;
+                       goto error_free_rka;
+               }
  
                 irka = cred->request_key_auth->payload.data[0];
                 rka->cred = get_cred(irka->cred);
@@ -193,8 +197,6 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info,
  
         rka->target_key = key_get(target);
         rka->dest_keyring = key_get(dest_keyring);
-       memcpy(rka->callout_info, callout_info, callout_len);
-       rka->callout_len = callout_len;
  
         /* allocate the auth key */
         sprintf(desc, "%x", target->serial);
@@ -205,32 +207,22 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info,
                             KEY_USR_VIEW, KEY_ALLOC_NOT_IN_QUOTA, NULL);
         if (IS_ERR(authkey)) {
                 ret = PTR_ERR(authkey);
-               goto error_alloc;
+               goto error_free_rka;
         }
  
         /* construct the auth key */
         ret = key_instantiate_and_link(authkey, rka, 0, NULL, NULL);
         if (ret < 0)
-               goto error_inst;
+               goto error_put_authkey;
  
         kleave(" = {%d,%d}", authkey->serial, refcount_read(&authkey->usage));
         return authkey;
  
-auth_key_revoked:
-       up_read(&cred->request_key_auth->sem);
-       kfree(rka->callout_info);
-       kfree(rka);
-       kleave("= -EKEYREVOKED");
-       return ERR_PTR(-EKEYREVOKED);
-
-error_inst:
-       key_revoke(authkey);
+error_put_authkey:
         key_put(authkey);
-error_alloc:
-       key_put(rka->target_key);
-       key_put(rka->dest_keyring);
-       kfree(rka->callout_info);
-       kfree(rka);
+error_free_rka:
+       free_request_key_auth(rka);
+error:
         kleave("= %d", ret);
         return ERR_PTR(ret);
  }
diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt

index 6a1af43..3995735 100644 (file)
--- a/tools/objtool/Documentation/stack-validation.txt
+++ b/tools/objtool/Documentation/stack-validation.txt
@@ -194,10 +194,10 @@ they mean, and suggestions for how to fix them.
     If it's a GCC-compiled .c file, the error may be because the function
     uses an inline asm() statement which has a "call" instruction.  An
     asm() statement with a call instruction must declare the use of the
-   stack pointer in its output operand.  For example, on x86_64:
+   stack pointer in its output operand.  On x86_64, this means adding
+   the ASM_CALL_CONSTRAINT as an output constraint:
  
-     register void *__sp asm("rsp");
-     asm volatile("call func" : "+r" (__sp));
+     asm volatile("call func" : ASM_CALL_CONSTRAINT);
  
     Otherwise the stack frame may not get created before the call.
  
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c

index 0e8c8ec..0f22768 100644 (file)
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -208,14 +208,14 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
                 break;
  
         case 0x89:
-               if (rex == 0x48 && modrm == 0xe5) {
+               if (rex_w && !rex_r && modrm_mod == 3 && modrm_reg == 4) {
  
-                       /* mov %rsp, %rbp */
+                       /* mov %rsp, reg */
                         *type = INSN_STACK;
                         op->src.type = OP_SRC_REG;
                         op->src.reg = CFI_SP;
                         op->dest.type = OP_DEST_REG;
-                       op->dest.reg = CFI_BP;
+                       op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b];
                         break;
                 }
  
diff --git a/tools/objtool/check.c b/tools/objtool/check.c

index f744617..a0c518e 100644 (file)
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1203,24 +1203,39 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
                 switch (op->src.type) {
  
                 case OP_SRC_REG:
-                       if (op->src.reg == CFI_SP && op->dest.reg == CFI_BP) {
+                       if (op->src.reg == CFI_SP && op->dest.reg == CFI_BP &&
+                           cfa->base == CFI_SP &&
+                           regs[CFI_BP].base == CFI_CFA &&
+                           regs[CFI_BP].offset == -cfa->offset) {
+
+                               /* mov %rsp, %rbp */
+                               cfa->base = op->dest.reg;
+                               state->bp_scratch = false;
+                       }
  
-                               if (cfa->base == CFI_SP &&
-                                   regs[CFI_BP].base == CFI_CFA &&
-                                   regs[CFI_BP].offset == -cfa->offset) {
+                       else if (op->src.reg == CFI_SP &&
+                                op->dest.reg == CFI_BP && state->drap) {
  
-                                       /* mov %rsp, %rbp */
-                                       cfa->base = op->dest.reg;
-                                       state->bp_scratch = false;
-                               }
+                               /* drap: mov %rsp, %rbp */
+                               regs[CFI_BP].base = CFI_BP;
+                               regs[CFI_BP].offset = -state->stack_size;
+                               state->bp_scratch = false;
+                       }
  
-                               else if (state->drap) {
+                       else if (op->src.reg == CFI_SP && cfa->base == CFI_SP) {
  
-                                       /* drap: mov %rsp, %rbp */
-                                       regs[CFI_BP].base = CFI_BP;
-                                       regs[CFI_BP].offset = -state->stack_size;
-                                       state->bp_scratch = false;
-                               }
+                               /*
+                                * mov %rsp, %reg
+                                *
+                                * This is needed for the rare case where GCC
+                                * does:
+                                *
+                                *   mov    %rsp, %rax
+                                *   ...
+                                *   mov    %rax, %rsp
+                                */
+                               state->vals[op->dest.reg].base = CFI_CFA;
+                               state->vals[op->dest.reg].offset = -state->stack_size;
                         }
  
                         else if (op->dest.reg == cfa->base) {
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c

index 6e9f980..2446015 100644 (file)
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -175,19 +175,20 @@ static int read_sections(struct elf *elf)
                         return -1;
                 }
  
-               sec->data = elf_getdata(s, NULL);
-               if (!sec->data) {
-                       WARN_ELF("elf_getdata");
-                       return -1;
-               }
-
-               if (sec->data->d_off != 0 ||
-                   sec->data->d_size != sec->sh.sh_size) {
-                       WARN("unexpected data attributes for %s", sec->name);
-                       return -1;
+               if (sec->sh.sh_size != 0) {
+                       sec->data = elf_getdata(s, NULL);
+                       if (!sec->data) {
+                               WARN_ELF("elf_getdata");
+                               return -1;
+                       }
+                       if (sec->data->d_off != 0 ||
+                           sec->data->d_size != sec->sh.sh_size) {
+                               WARN("unexpected data attributes for %s",
+                                    sec->name);
+                               return -1;
+                       }
                 }
-
-               sec->len = sec->data->d_size;
+               sec->len = sec->sh.sh_size;
         }
  
         /* sanity check, one more call to elf_nextscn() should return NULL */
@@ -508,6 +509,7 @@ struct section *elf_create_rela_section(struct elf *elf, struct section *base)
         strcat(relaname, base->name);
  
         sec = elf_create_section(elf, relaname, sizeof(GElf_Rela), 0);
+       free(relaname);
         if (!sec)
                 return NULL;
  
@@ -561,6 +563,7 @@ int elf_write(struct elf *elf)
         struct section *sec;
         Elf_Scn *s;
  
+       /* Update section headers for changed sections: */
         list_for_each_entry(sec, &elf->sections, list) {
                 if (sec->changed) {
                         s = elf_getscn(elf->elf, sec->idx);
@@ -568,13 +571,17 @@ int elf_write(struct elf *elf)
                                 WARN_ELF("elf_getscn");
                                 return -1;
                         }
-                       if (!gelf_update_shdr (s, &sec->sh)) {
+                       if (!gelf_update_shdr(s, &sec->sh)) {
                                 WARN_ELF("gelf_update_shdr");
                                 return -1;
                         }
                 }
         }
  
+       /* Make sure the new section header entries get updated properly. */
+       elf_flagelf(elf->elf, ELF_C_SET, ELF_F_DIRTY);
+
+       /* Write all changes to the file. */
         if (elf_update(elf->elf, ELF_C_WRITE) < 0) {
                 WARN_ELF("elf_update");
                 return -1;
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c

index d20791c..bef419d 100644 (file)
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -1527,9 +1527,6 @@ static void nfit_test1_setup(struct nfit_test *t)
         set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en);
         set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
         set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en);
-       set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en);
-       set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
-       set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
  }
  
  static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa,
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile

index 26ce4f7..ff80564 100644 (file)
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -52,6 +52,10 @@ override LDFLAGS =
  override MAKEFLAGS =
  endif
  
+ifneq ($(KBUILD_SRC),)
+override LDFLAGS =
+endif
+
  BUILD := $(O)
  ifndef BUILD
    BUILD := $(KBUILD_OUTPUT)
@@ -62,32 +66,32 @@ endif
  
  export BUILD
  all:
-       for TARGET in $(TARGETS); do            \
+       @for TARGET in $(TARGETS); do           \
                 BUILD_TARGET=$$BUILD/$$TARGET;  \
                 mkdir $$BUILD_TARGET  -p;       \
                 make OUTPUT=$$BUILD_TARGET -C $$TARGET;\
         done;
  
  run_tests: all
-       for TARGET in $(TARGETS); do \
+       @for TARGET in $(TARGETS); do \
                 BUILD_TARGET=$$BUILD/$$TARGET;  \
                 make OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\
         done;
  
  hotplug:
-       for TARGET in $(TARGETS_HOTPLUG); do \
+       @for TARGET in $(TARGETS_HOTPLUG); do \
                 BUILD_TARGET=$$BUILD/$$TARGET;  \
                 make OUTPUT=$$BUILD_TARGET -C $$TARGET;\
         done;
  
  run_hotplug: hotplug
-       for TARGET in $(TARGETS_HOTPLUG); do \
+       @for TARGET in $(TARGETS_HOTPLUG); do \
                 BUILD_TARGET=$$BUILD/$$TARGET;  \
                 make OUTPUT=$$BUILD_TARGET -C $$TARGET run_full_test;\
         done;
  
  clean_hotplug:
-       for TARGET in $(TARGETS_HOTPLUG); do \
+       @for TARGET in $(TARGETS_HOTPLUG); do \
                 BUILD_TARGET=$$BUILD/$$TARGET;  \
                 make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
         done;
@@ -103,7 +107,7 @@ install:
  ifdef INSTALL_PATH
         @# Ask all targets to install their files
         mkdir -p $(INSTALL_PATH)
-       for TARGET in $(TARGETS); do \
+       @for TARGET in $(TARGETS); do \
                 BUILD_TARGET=$$BUILD/$$TARGET;  \
                 make OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \
         done;
@@ -128,7 +132,7 @@ else
  endif
  
  clean:
-       for TARGET in $(TARGETS); do \
+       @for TARGET in $(TARGETS); do \
                 BUILD_TARGET=$$BUILD/$$TARGET;  \
                 make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
         done;
diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h

index 20ecbaa..6c53a89 100644 (file)
--- a/tools/testing/selftests/bpf/bpf_util.h
+++ b/tools/testing/selftests/bpf/bpf_util.h
@@ -12,6 +12,7 @@ static inline unsigned int bpf_num_possible_cpus(void)
         unsigned int start, end, possible_cpus = 0;
         char buff[128];
         FILE *fp;
+       int n;
  
         fp = fopen(fcpu, "r");
         if (!fp) {
@@ -20,17 +21,17 @@ static inline unsigned int bpf_num_possible_cpus(void)
         }
  
         while (fgets(buff, sizeof(buff), fp)) {
-               if (sscanf(buff, "%u-%u", &start, &end) == 2) {
-                       possible_cpus = start == 0 ? end + 1 : 0;
-                       break;
+               n = sscanf(buff, "%u-%u", &start, &end);
+               if (n == 0) {
+                       printf("Failed to retrieve # possible CPUs!\n");
+                       exit(1);
+               } else if (n == 1) {
+                       end = start;
                 }
+               possible_cpus = start == 0 ? end + 1 : 0;
+               break;
         }
-
         fclose(fp);
-       if (!possible_cpus) {
-               printf("Failed to retrieve # possible CPUs!\n");
-               exit(1);
-       }
  
         return possible_cpus;
  }
diff --git a/tools/testing/selftests/breakpoints/Makefile b/tools/testing/selftests/breakpoints/Makefile

index 6b214b7..247b0a1 100644 (file)
--- a/tools/testing/selftests/breakpoints/Makefile
+++ b/tools/testing/selftests/breakpoints/Makefile
@@ -2,14 +2,14 @@
  uname_M := $(shell uname -m 2>/dev/null || echo not)
  ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
  
+TEST_GEN_PROGS := step_after_suspend_test
+
  ifeq ($(ARCH),x86)
-TEST_GEN_PROGS := breakpoint_test
+TEST_GEN_PROGS += breakpoint_test
  endif
  ifneq (,$(filter $(ARCH),aarch64 arm64))
-TEST_GEN_PROGS := breakpoint_test_arm64
+TEST_GEN_PROGS += breakpoint_test_arm64
  endif
  
-TEST_GEN_PROGS += step_after_suspend_test
-
  include ../lib.mk
  
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc

index 2a1cb99..a4fd4c8 100644 (file)
--- a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
@@ -1,6 +1,8 @@
  #!/bin/sh
  # description: Register/unregister many kprobe events
  
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
  # ftrace fentry skip size depends on the machine architecture.
  # Currently HAVE_KPROBES_ON_FTRACE defined on x86 and powerpc64le
  case `uname -m` in
diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile

index 7c647f6..f0c0369 100644 (file)
--- a/tools/testing/selftests/futex/Makefile
+++ b/tools/testing/selftests/futex/Makefile
@@ -7,14 +7,17 @@ TEST_PROGS := run.sh
  include ../lib.mk
  
  all:
-       for DIR in $(SUBDIRS); do               \
+       @for DIR in $(SUBDIRS); do              \
                 BUILD_TARGET=$(OUTPUT)/$$DIR;   \
                 mkdir $$BUILD_TARGET  -p;       \
                 make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+               if [ -e $$DIR/$(TEST_PROGS) ]; then
+                       rsync -a $$DIR/$(TEST_PROGS) $$BUILD_TARGET/;
+               fi
         done
  
  override define RUN_TESTS
-       $(OUTPUT)/run.sh
+       @cd $(OUTPUT); ./run.sh
  endef
  
  override define INSTALL_RULE
@@ -33,7 +36,7 @@ override define EMIT_TESTS
  endef
  
  override define CLEAN
-       for DIR in $(SUBDIRS); do               \
+       @for DIR in $(SUBDIRS); do              \
                 BUILD_TARGET=$(OUTPUT)/$$DIR;   \
                 mkdir $$BUILD_TARGET  -p;       \
                 make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
diff --git a/tools/testing/selftests/intel_pstate/Makefile b/tools/testing/selftests/intel_pstate/Makefile

index 849a90f..a97e24e 100644 (file)
--- a/tools/testing/selftests/intel_pstate/Makefile
+++ b/tools/testing/selftests/intel_pstate/Makefile
@@ -1,7 +1,9 @@
  CFLAGS := $(CFLAGS) -Wall -D_GNU_SOURCE
  LDLIBS := $(LDLIBS) -lm
  
+ifeq (,$(filter $(ARCH),x86))
  TEST_GEN_FILES := msr aperf
+endif
  
  TEST_PROGS := run.sh
  
diff --git a/tools/testing/selftests/intel_pstate/run.sh b/tools/testing/selftests/intel_pstate/run.sh

index 7868c10..d3ab48f 100755 (executable)
--- a/tools/testing/selftests/intel_pstate/run.sh
+++ b/tools/testing/selftests/intel_pstate/run.sh
@@ -29,13 +29,12 @@
  
  EVALUATE_ONLY=0
  
-max_cpus=$(($(nproc)-1))
+if ! uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ | grep -q x86; then
+       echo "$0 # Skipped: Test can only run on x86 architectures."
+       exit 0
+fi
  
-# compile programs
-gcc aperf.c -Wall -D_GNU_SOURCE -o aperf  -lm
-[ $? -ne 0 ] && echo "Problem compiling aperf.c." && exit 1
-gcc -o msr msr.c -lm
-[ $? -ne 0 ] && echo "Problem compiling msr.c." && exit 1
+max_cpus=$(($(nproc)-1))
  
  function run_test () {
  
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk

index 6936166..f65886a 100644 (file)
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -6,7 +6,14 @@ ifeq (0,$(MAKELEVEL))
  OUTPUT := $(shell pwd)
  endif
  
+# The following are built by lib.mk common compile rules.
+# TEST_CUSTOM_PROGS should be used by tests that require
+# custom build rule and prevent common build rule use.
+# TEST_PROGS are for test shell scripts.
+# TEST_CUSTOM_PROGS and TEST_PROGS will be run by common run_tests
+# and install targets. Common clean doesn't touch them.
  TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS))
+TEST_GEN_PROGS_EXTENDED := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS_EXTENDED))
  TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES))
  
  all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
@@ -20,17 +27,28 @@ define RUN_TESTS
                 test_num=`echo $$test_num+1 | bc`;      \
                 echo "selftests: $$BASENAME_TEST";      \
                 echo "========================================";        \
-               if [ ! -x $$BASENAME_TEST ]; then       \
+               if [ ! -x $$TEST ]; then        \
                         echo "selftests: Warning: file $$BASENAME_TEST is not executable, correct this.";\
                         echo "not ok 1..$$test_num selftests: $$BASENAME_TEST [FAIL]"; \
                 else                                    \
-                       cd `dirname $$TEST` > /dev/null; (./$$BASENAME_TEST && echo "ok 1..$$test_num selftests: $$BASENAME_TEST [PASS]") || echo "not ok 1..$$test_num selftests:  $$BASENAME_TEST [FAIL]"; cd - > /dev/null;\
+                       cd `dirname $$TEST` > /dev/null; (./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && echo "ok 1..$$test_num selftests: $$BASENAME_TEST [PASS]") || echo "not ok 1..$$test_num selftests:  $$BASENAME_TEST [FAIL]"; cd - > /dev/null;\
                 fi;                                     \
         done;
  endef
  
  run_tests: all
-       $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_PROGS))
+ifneq ($(KBUILD_SRC),)
+       @if [ "X$(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)" != "X" ]; then
+               @rsync -aq $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(OUTPUT)
+       fi
+       @if [ "X$(TEST_PROGS)" != "X" ]; then
+               $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(OUTPUT)/$(TEST_PROGS))
+       else
+               $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS))
+       fi
+else
+       $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS))
+endif
  
  define INSTALL_RULE
         @if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then                                        \
@@ -38,10 +56,10 @@ define INSTALL_RULE
                 echo "rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/";    \
                 rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/;           \
         fi
-       @if [ "X$(TEST_GEN_PROGS)$(TEST_GEN_PROGS_EXTENDED)$(TEST_GEN_FILES)" != "X" ]; then                                    \
+       @if [ "X$(TEST_GEN_PROGS)$(TEST_CUSTOM_PROGS)$(TEST_GEN_PROGS_EXTENDED)$(TEST_GEN_FILES)" != "X" ]; then                                        \
                 mkdir -p ${INSTALL_PATH};                                                                               \
-               echo "rsync -a $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/";        \
-               rsync -a $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/;               \
+               echo "rsync -a $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/";   \
+               rsync -a $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/;          \
         fi
  endef
  
@@ -53,15 +71,20 @@ else
  endif
  
  define EMIT_TESTS
-       @for TEST in $(TEST_GEN_PROGS) $(TEST_PROGS); do \
+       @for TEST in $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS); do \
                 BASENAME_TEST=`basename $$TEST`;        \
-               echo "(./$$BASENAME_TEST && echo \"selftests: $$BASENAME_TEST [PASS]\") || echo \"selftests: $$BASENAME_TEST [FAIL]\""; \
+               echo "(./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && echo \"selftests: $$BASENAME_TEST [PASS]\") || echo \"selftests: $$BASENAME_TEST [FAIL]\""; \
         done;
  endef
  
  emit_tests:
         $(EMIT_TESTS)
  
+# define if isn't already. It is undefined in make O= case.
+ifeq ($(RM),)
+RM := rm -f
+endif
+
  define CLEAN
         $(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
  endef
@@ -69,6 +92,15 @@ endef
  clean:
         $(CLEAN)
  
+# When make O= with kselftest target from main level
+# the following aren't defined.
+#
+ifneq ($(KBUILD_SRC),)
+LINK.c = $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH)
+COMPILE.S = $(CC) $(ASFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c
+LINK.S = $(CC) $(ASFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH)
+endif
+
  $(OUTPUT)/%:%.c
         $(LINK.c) $^ $(LDLIBS) -o $@
  
diff --git a/tools/testing/selftests/memfd/run_tests.sh b/tools/testing/selftests/memfd/run_tests.sh

old mode 100644 (file)

new mode 100755 (executable)
diff --git a/tools/testing/selftests/mqueue/Makefile b/tools/testing/selftests/mqueue/Makefile

index 79a664a..0f5e347 100644 (file)
--- a/tools/testing/selftests/mqueue/Makefile
+++ b/tools/testing/selftests/mqueue/Makefile
@@ -5,8 +5,8 @@ TEST_GEN_PROGS := mq_open_tests mq_perf_tests
  include ../lib.mk
  
  override define RUN_TESTS
-       @./mq_open_tests /test1 || echo "selftests: mq_open_tests [FAIL]"
-       @./mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]"
+       $(OUTPUT)/mq_open_tests /test1 || echo "selftests: mq_open_tests [FAIL]"
+       $(OUTPUT)//mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]"
  endef
  
  override define EMIT_TESTS
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore

index 9801253..c612d6e 100644 (file)
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -6,3 +6,4 @@ reuseport_bpf
  reuseport_bpf_cpu
  reuseport_bpf_numa
  reuseport_dualstack
+reuseaddr_conflict
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile

index de1f577..d86bca9 100644 (file)
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -5,9 +5,9 @@ CFLAGS += -I../../../../usr/include/
  
  TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
  TEST_GEN_FILES =  socket
-TEST_GEN_FILES += psock_fanout psock_tpacket
-TEST_GEN_FILES += reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
-TEST_GEN_FILES += reuseport_dualstack msg_zerocopy
+TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
+TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
+TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict
  
  include ../lib.mk
  
diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c

index 40232af..3ab6ec4 100644 (file)
--- a/tools/testing/selftests/net/msg_zerocopy.c
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -55,7 +55,7 @@
  #include <unistd.h>
  
  #ifndef SO_EE_ORIGIN_ZEROCOPY
-#define SO_EE_ORIGIN_ZEROCOPY          SO_EE_ORIGIN_UPAGE
+#define SO_EE_ORIGIN_ZEROCOPY          5
  #endif
  
  #ifndef SO_ZEROCOPY
diff --git a/tools/testing/selftests/net/netdevice.sh b/tools/testing/selftests/net/netdevice.sh

index 4e00568..90cb903 100755 (executable)
--- a/tools/testing/selftests/net/netdevice.sh
+++ b/tools/testing/selftests/net/netdevice.sh
@@ -178,7 +178,7 @@ if [ "$(id -u)" -ne 0 ];then
         exit 0
  fi
  
-ip -Version 2>/dev/null >/dev/null
+ip link show 2>/dev/null >/dev/null
  if [ $? -ne 0 ];then
         echo "SKIP: Could not run test without the ip tool"
         exit 0
diff --git a/tools/testing/selftests/net/reuseaddr_conflict.c b/tools/testing/selftests/net/reuseaddr_conflict.c

new file mode 100644 (file)

index 0000000..7c5b126
--- /dev/null
+++ b/tools/testing/selftests/net/reuseaddr_conflict.c
@@ -0,0 +1,114 @@
+/*
+ * Test for the regression introduced by
+ *
+ * b9470c27607b ("inet: kill smallest_size and smallest_port")
+ *
+ * If we open an ipv4 socket on a port with reuseaddr we shouldn't reset the tb
+ * when we open the ipv6 conterpart, which is what was happening previously.
+ */
+#include <errno.h>
+#include <error.h>
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#define PORT 9999
+
+int open_port(int ipv6, int any)
+{
+       int fd = -1;
+       int reuseaddr = 1;
+       int v6only = 1;
+       int addrlen;
+       int ret = -1;
+       struct sockaddr *addr;
+       int family = ipv6 ? AF_INET6 : AF_INET;
+
+       struct sockaddr_in6 addr6 = {
+               .sin6_family = AF_INET6,
+               .sin6_port = htons(PORT),
+               .sin6_addr = in6addr_any
+       };
+       struct sockaddr_in addr4 = {
+               .sin_family = AF_INET,
+               .sin_port = htons(PORT),
+               .sin_addr.s_addr = any ? htonl(INADDR_ANY) : inet_addr("127.0.0.1"),
+       };
+
+
+       if (ipv6) {
+               addr = (struct sockaddr*)&addr6;
+               addrlen = sizeof(addr6);
+       } else {
+               addr = (struct sockaddr*)&addr4;
+               addrlen = sizeof(addr4);
+       }
+
+       if ((fd = socket(family, SOCK_STREAM, IPPROTO_TCP)) < 0) {
+               perror("socket");
+               goto out;
+       }
+
+       if (ipv6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, (void*)&v6only,
+                              sizeof(v6only)) < 0) {
+               perror("setsockopt IPV6_V6ONLY");
+               goto out;
+       }
+
+       if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &reuseaddr,
+                      sizeof(reuseaddr)) < 0) {
+               perror("setsockopt SO_REUSEADDR");
+               goto out;
+       }
+
+       if (bind(fd, addr, addrlen) < 0) {
+               perror("bind");
+               goto out;
+       }
+
+       if (any)
+               return fd;
+
+       if (listen(fd, 1) < 0) {
+               perror("listen");
+               goto out;
+       }
+       return fd;
+out:
+       close(fd);
+       return ret;
+}
+
+int main(void)
+{
+       int listenfd;
+       int fd1, fd2;
+
+       fprintf(stderr, "Opening 127.0.0.1:%d\n", PORT);
+       listenfd = open_port(0, 0);
+       if (listenfd < 0)
+               error(1, errno, "Couldn't open listen socket");
+       fprintf(stderr, "Opening INADDR_ANY:%d\n", PORT);
+       fd1 = open_port(0, 1);
+       if (fd1 >= 0)
+               error(1, 0, "Was allowed to create an ipv4 reuseport on a already bound non-reuseport socket");
+       fprintf(stderr, "Opening in6addr_any:%d\n", PORT);
+       fd1 = open_port(1, 1);
+       if (fd1 < 0)
+               error(1, errno, "Couldn't open ipv6 reuseport");
+       fprintf(stderr, "Opening INADDR_ANY:%d\n", PORT);
+       fd2 = open_port(0, 1);
+       if (fd2 >= 0)
+               error(1, 0, "Was allowed to create an ipv4 reuseport on a already bound non-reuseport socket");
+       close(fd1);
+       fprintf(stderr, "Opening INADDR_ANY:%d after closing ipv6 socket\n", PORT);
+       fd1 = open_port(0, 1);
+       if (fd1 >= 0)
+               error(1, 0, "Was allowed to create an ipv4 reuseport on an already bound non-reuseport socket with no ipv6");
+       fprintf(stderr, "Success");
+       return 0;
+}
diff --git a/tools/testing/selftests/seccomp/Makefile b/tools/testing/selftests/seccomp/Makefile

index aeb0c80..553d870 100644 (file)
--- a/tools/testing/selftests/seccomp/Makefile
+++ b/tools/testing/selftests/seccomp/Makefile
@@ -1,8 +1,16 @@
-TEST_GEN_PROGS := seccomp_bpf
-CFLAGS += -Wl,-no-as-needed -Wall
-LDFLAGS += -lpthread
+all:
  
  include ../lib.mk
  
-$(TEST_GEN_PROGS): seccomp_bpf.c ../kselftest_harness.h
-       $(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+.PHONY: all clean
+
+BINARIES := seccomp_bpf seccomp_benchmark
+CFLAGS += -Wl,-no-as-needed -Wall
+
+seccomp_bpf: seccomp_bpf.c ../kselftest_harness.h
+       $(CC) $(CFLAGS) $(LDFLAGS) -lpthread $< -o $@
+
+TEST_PROGS += $(BINARIES)
+EXTRA_CLEAN := $(BINARIES)
+
+all: $(BINARIES)
diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c

new file mode 100644 (file)

index 0000000..5838c86
--- /dev/null
+++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c
@@ -0,0 +1,99 @@
+/*
+ * Strictly speaking, this is not a test. But it can report during test
+ * runs so relative performace can be measured.
+ */
+#define _GNU_SOURCE
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <unistd.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+
+#define ARRAY_SIZE(a)    (sizeof(a) / sizeof(a[0]))
+
+unsigned long long timing(clockid_t clk_id, unsigned long long samples)
+{
+       pid_t pid, ret;
+       unsigned long long i;
+       struct timespec start, finish;
+
+       pid = getpid();
+       assert(clock_gettime(clk_id, &start) == 0);
+       for (i = 0; i < samples; i++) {
+               ret = syscall(__NR_getpid);
+               assert(pid == ret);
+       }
+       assert(clock_gettime(clk_id, &finish) == 0);
+
+       i = finish.tv_sec - start.tv_sec;
+       i *= 1000000000;
+       i += finish.tv_nsec - start.tv_nsec;
+
+       printf("%lu.%09lu - %lu.%09lu = %llu\n",
+               finish.tv_sec, finish.tv_nsec,
+               start.tv_sec, start.tv_nsec,
+               i);
+
+       return i;
+}
+
+unsigned long long calibrate(void)
+{
+       unsigned long long i;
+
+       printf("Calibrating reasonable sample size...\n");
+
+       for (i = 5; ; i++) {
+               unsigned long long samples = 1 << i;
+
+               /* Find something that takes more than 5 seconds to run. */
+               if (timing(CLOCK_REALTIME, samples) / 1000000000ULL > 5)
+                       return samples;
+       }
+}
+
+int main(int argc, char *argv[])
+{
+       struct sock_filter filter[] = {
+               BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+       };
+       struct sock_fprog prog = {
+               .len = (unsigned short)ARRAY_SIZE(filter),
+               .filter = filter,
+       };
+       long ret;
+       unsigned long long samples;
+       unsigned long long native, filtered;
+
+       if (argc > 1)
+               samples = strtoull(argv[1], NULL, 0);
+       else
+               samples = calibrate();
+
+       printf("Benchmarking %llu samples...\n", samples);
+
+       native = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
+       printf("getpid native: %llu ns\n", native);
+
+       ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+       assert(ret == 0);
+
+       ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+       assert(ret == 0);
+
+       filtered = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
+       printf("getpid RET_ALLOW: %llu ns\n", filtered);
+
+       printf("Estimated seccomp overhead per syscall: %llu ns\n",
+               filtered - native);
+
+       if (filtered == native)
+               printf("Trying running again with more samples.\n");
+
+       return 0;
+}
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c

index 4d6f92a..24dbf63 100644 (file)
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -6,10 +6,18 @@
   */
  
  #include <sys/types.h>
-#include <asm/siginfo.h>
-#define __have_siginfo_t 1
-#define __have_sigval_t 1
-#define __have_sigevent_t 1
+
+/*
+ * glibc 2.26 and later have SIGSYS in siginfo_t. Before that,
+ * we need to use the kernel's siginfo.h file and trick glibc
+ * into accepting it.
+ */
+#if !__GLIBC_PREREQ(2, 26)
+# include <asm/siginfo.h>
+# define __have_siginfo_t 1
+# define __have_sigval_t 1
+# define __have_sigevent_t 1
+#endif
  
  #include <errno.h>
  #include <linux/filter.h>
@@ -68,17 +76,7 @@
  #define SECCOMP_MODE_FILTER 2
  #endif
  
-#ifndef SECCOMP_RET_KILL
-#define SECCOMP_RET_KILL        0x00000000U /* kill the task immediately */
-#define SECCOMP_RET_TRAP        0x00030000U /* disallow and force a SIGSYS */
-#define SECCOMP_RET_ERRNO       0x00050000U /* returns an errno */
-#define SECCOMP_RET_TRACE       0x7ff00000U /* pass to a tracer or disallow */
-#define SECCOMP_RET_ALLOW       0x7fff0000U /* allow */
-
-/* Masks for the return value sections. */
-#define SECCOMP_RET_ACTION      0x7fff0000U
-#define SECCOMP_RET_DATA        0x0000ffffU
-
+#ifndef SECCOMP_RET_ALLOW
  struct seccomp_data {
         int nr;
         __u32 arch;
@@ -87,6 +85,70 @@ struct seccomp_data {
  };
  #endif
  
+#ifndef SECCOMP_RET_KILL_PROCESS
+#define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */
+#define SECCOMP_RET_KILL_THREAD         0x00000000U /* kill the thread */
+#endif
+#ifndef SECCOMP_RET_KILL
+#define SECCOMP_RET_KILL        SECCOMP_RET_KILL_THREAD
+#define SECCOMP_RET_TRAP        0x00030000U /* disallow and force a SIGSYS */
+#define SECCOMP_RET_ERRNO       0x00050000U /* returns an errno */
+#define SECCOMP_RET_TRACE       0x7ff00000U /* pass to a tracer or disallow */
+#define SECCOMP_RET_ALLOW       0x7fff0000U /* allow */
+#endif
+#ifndef SECCOMP_RET_LOG
+#define SECCOMP_RET_LOG                 0x7ffc0000U /* allow after logging */
+#endif
+
+#ifndef __NR_seccomp
+# if defined(__i386__)
+#  define __NR_seccomp 354
+# elif defined(__x86_64__)
+#  define __NR_seccomp 317
+# elif defined(__arm__)
+#  define __NR_seccomp 383
+# elif defined(__aarch64__)
+#  define __NR_seccomp 277
+# elif defined(__hppa__)
+#  define __NR_seccomp 338
+# elif defined(__powerpc__)
+#  define __NR_seccomp 358
+# elif defined(__s390__)
+#  define __NR_seccomp 348
+# else
+#  warning "seccomp syscall number unknown for this architecture"
+#  define __NR_seccomp 0xffff
+# endif
+#endif
+
+#ifndef SECCOMP_SET_MODE_STRICT
+#define SECCOMP_SET_MODE_STRICT 0
+#endif
+
+#ifndef SECCOMP_SET_MODE_FILTER
+#define SECCOMP_SET_MODE_FILTER 1
+#endif
+
+#ifndef SECCOMP_GET_ACTION_AVAIL
+#define SECCOMP_GET_ACTION_AVAIL 2
+#endif
+
+#ifndef SECCOMP_FILTER_FLAG_TSYNC
+#define SECCOMP_FILTER_FLAG_TSYNC 1
+#endif
+
+#ifndef SECCOMP_FILTER_FLAG_LOG
+#define SECCOMP_FILTER_FLAG_LOG 2
+#endif
+
+#ifndef seccomp
+int seccomp(unsigned int op, unsigned int flags, void *args)
+{
+       errno = 0;
+       return syscall(__NR_seccomp, op, flags, args);
+}
+#endif
+
  #if __BYTE_ORDER == __LITTLE_ENDIAN
  #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
  #elif __BYTE_ORDER == __BIG_ENDIAN
@@ -136,7 +198,7 @@ TEST(no_new_privs_support)
         }
  }
  
-/* Tests kernel support by checking for a copy_from_user() fault on * NULL. */
+/* Tests kernel support by checking for a copy_from_user() fault on NULL. */
  TEST(mode_filter_support)
  {
         long ret;
@@ -342,6 +404,28 @@ TEST(empty_prog)
         EXPECT_EQ(EINVAL, errno);
  }
  
+TEST(log_all)
+{
+       struct sock_filter filter[] = {
+               BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
+       };
+       struct sock_fprog prog = {
+               .len = (unsigned short)ARRAY_SIZE(filter),
+               .filter = filter,
+       };
+       long ret;
+       pid_t parent = getppid();
+
+       ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+       ASSERT_EQ(0, ret);
+
+       ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+       ASSERT_EQ(0, ret);
+
+       /* getppid() should succeed and be logged (no check for logging) */
+       EXPECT_EQ(parent, syscall(__NR_getppid));
+}
+
  TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
  {
         struct sock_filter filter[] = {
@@ -520,6 +604,117 @@ TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
         close(fd);
  }
  
+/* This is a thread task to die via seccomp filter violation. */
+void *kill_thread(void *data)
+{
+       bool die = (bool)data;
+
+       if (die) {
+               prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
+               return (void *)SIBLING_EXIT_FAILURE;
+       }
+
+       return (void *)SIBLING_EXIT_UNKILLED;
+}
+
+/* Prepare a thread that will kill itself or both of us. */
+void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
+{
+       pthread_t thread;
+       void *status;
+       /* Kill only when calling __NR_prctl. */
+       struct sock_filter filter_thread[] = {
+               BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+                       offsetof(struct seccomp_data, nr)),
+               BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
+               BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD),
+               BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+       };
+       struct sock_fprog prog_thread = {
+               .len = (unsigned short)ARRAY_SIZE(filter_thread),
+               .filter = filter_thread,
+       };
+       struct sock_filter filter_process[] = {
+               BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+                       offsetof(struct seccomp_data, nr)),
+               BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
+               BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS),
+               BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+       };
+       struct sock_fprog prog_process = {
+               .len = (unsigned short)ARRAY_SIZE(filter_process),
+               .filter = filter_process,
+       };
+
+       ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+               TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+       }
+
+       ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0,
+                            kill_process ? &prog_process : &prog_thread));
+
+       /*
+        * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS
+        * flag cannot be downgraded by a new filter.
+        */
+       ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
+
+       /* Start a thread that will exit immediately. */
+       ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false));
+       ASSERT_EQ(0, pthread_join(thread, &status));
+       ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status);
+
+       /* Start a thread that will die immediately. */
+       ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true));
+       ASSERT_EQ(0, pthread_join(thread, &status));
+       ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status);
+
+       /*
+        * If we get here, only the spawned thread died. Let the parent know
+        * the whole process didn't die (i.e. this thread, the spawner,
+        * stayed running).
+        */
+       exit(42);
+}
+
+TEST(KILL_thread)
+{
+       int status;
+       pid_t child_pid;
+
+       child_pid = fork();
+       ASSERT_LE(0, child_pid);
+       if (child_pid == 0) {
+               kill_thread_or_group(_metadata, false);
+               _exit(38);
+       }
+
+       ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+
+       /* If only the thread was killed, we'll see exit 42. */
+       ASSERT_TRUE(WIFEXITED(status));
+       ASSERT_EQ(42, WEXITSTATUS(status));
+}
+
+TEST(KILL_process)
+{
+       int status;
+       pid_t child_pid;
+
+       child_pid = fork();
+       ASSERT_LE(0, child_pid);
+       if (child_pid == 0) {
+               kill_thread_or_group(_metadata, true);
+               _exit(38);
+       }
+
+       ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+
+       /* If the entire process was killed, we'll see SIGSYS. */
+       ASSERT_TRUE(WIFSIGNALED(status));
+       ASSERT_EQ(SIGSYS, WTERMSIG(status));
+}
+
  /* TODO(wad) add 64-bit versus 32-bit arg tests. */
  TEST(arg_out_of_range)
  {
@@ -541,26 +736,30 @@ TEST(arg_out_of_range)
         EXPECT_EQ(EINVAL, errno);
  }
  
+#define ERRNO_FILTER(name, errno)                                      \
+       struct sock_filter _read_filter_##name[] = {                    \
+               BPF_STMT(BPF_LD|BPF_W|BPF_ABS,                          \
+                       offsetof(struct seccomp_data, nr)),             \
+               BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),       \
+               BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno),     \
+               BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),             \
+       };                                                              \
+       struct sock_fprog prog_##name = {                               \
+               .len = (unsigned short)ARRAY_SIZE(_read_filter_##name), \
+               .filter = _read_filter_##name,                          \
+       }
+
+/* Make sure basic errno values are correctly passed through a filter. */
  TEST(ERRNO_valid)
  {
-       struct sock_filter filter[] = {
-               BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
-                       offsetof(struct seccomp_data, nr)),
-               BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
-               BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | E2BIG),
-               BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
-       };
-       struct sock_fprog prog = {
-               .len = (unsigned short)ARRAY_SIZE(filter),
-               .filter = filter,
-       };
+       ERRNO_FILTER(valid, E2BIG);
         long ret;
         pid_t parent = getppid();
  
         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
         ASSERT_EQ(0, ret);
  
-       ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+       ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid);
         ASSERT_EQ(0, ret);
  
         EXPECT_EQ(parent, syscall(__NR_getppid));
@@ -568,26 +767,17 @@ TEST(ERRNO_valid)
         EXPECT_EQ(E2BIG, errno);
  }
  
+/* Make sure an errno of zero is correctly handled by the arch code. */
  TEST(ERRNO_zero)
  {
-       struct sock_filter filter[] = {
-               BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
-                       offsetof(struct seccomp_data, nr)),
-               BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
-               BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 0),
-               BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
-       };
-       struct sock_fprog prog = {
-               .len = (unsigned short)ARRAY_SIZE(filter),
-               .filter = filter,
-       };
+       ERRNO_FILTER(zero, 0);
         long ret;
         pid_t parent = getppid();
  
         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
         ASSERT_EQ(0, ret);
  
-       ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+       ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero);
         ASSERT_EQ(0, ret);
  
         EXPECT_EQ(parent, syscall(__NR_getppid));
@@ -595,26 +785,21 @@ TEST(ERRNO_zero)
         EXPECT_EQ(0, read(0, NULL, 0));
  }
  
+/*
+ * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller.
+ * This tests that the errno value gets capped correctly, fixed by
+ * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO").
+ */
  TEST(ERRNO_capped)
  {
-       struct sock_filter filter[] = {
-               BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
-                       offsetof(struct seccomp_data, nr)),
-               BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
-               BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 4096),
-               BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
-       };
-       struct sock_fprog prog = {
-               .len = (unsigned short)ARRAY_SIZE(filter),
-               .filter = filter,
-       };
+       ERRNO_FILTER(capped, 4096);
         long ret;
         pid_t parent = getppid();
  
         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
         ASSERT_EQ(0, ret);
  
-       ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+       ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped);
         ASSERT_EQ(0, ret);
  
         EXPECT_EQ(parent, syscall(__NR_getppid));
@@ -622,6 +807,37 @@ TEST(ERRNO_capped)
         EXPECT_EQ(4095, errno);
  }
  
+/*
+ * Filters are processed in reverse order: last applied is executed first.
+ * Since only the SECCOMP_RET_ACTION mask is tested for return values, the
+ * SECCOMP_RET_DATA mask results will follow the most recently applied
+ * matching filter return (and not the lowest or highest value).
+ */
+TEST(ERRNO_order)
+{
+       ERRNO_FILTER(first,  11);
+       ERRNO_FILTER(second, 13);
+       ERRNO_FILTER(third,  12);
+       long ret;
+       pid_t parent = getppid();
+
+       ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+       ASSERT_EQ(0, ret);
+
+       ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first);
+       ASSERT_EQ(0, ret);
+
+       ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second);
+       ASSERT_EQ(0, ret);
+
+       ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third);
+       ASSERT_EQ(0, ret);
+
+       EXPECT_EQ(parent, syscall(__NR_getppid));
+       EXPECT_EQ(-1, read(0, NULL, 0));
+       EXPECT_EQ(12, errno);
+}
+
  FIXTURE_DATA(TRAP) {
         struct sock_fprog prog;
  };
@@ -676,7 +892,7 @@ TEST_F_SIGNAL(TRAP, ign, SIGSYS)
         syscall(__NR_getpid);
  }
  
-static struct siginfo TRAP_info;
+static siginfo_t TRAP_info;
  static volatile int TRAP_nr;
  static void TRAP_action(int nr, siginfo_t *info, void *void_context)
  {
@@ -735,6 +951,7 @@ TEST_F(TRAP, handler)
  
  FIXTURE_DATA(precedence) {
         struct sock_fprog allow;
+       struct sock_fprog log;
         struct sock_fprog trace;
         struct sock_fprog error;
         struct sock_fprog trap;
@@ -746,6 +963,13 @@ FIXTURE_SETUP(precedence)
         struct sock_filter allow_insns[] = {
                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
         };
+       struct sock_filter log_insns[] = {
+               BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+                       offsetof(struct seccomp_data, nr)),
+               BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+               BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+               BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
+       };
         struct sock_filter trace_insns[] = {
                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
                         offsetof(struct seccomp_data, nr)),
@@ -782,6 +1006,7 @@ FIXTURE_SETUP(precedence)
         memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
         self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
         FILTER_ALLOC(allow);
+       FILTER_ALLOC(log);
         FILTER_ALLOC(trace);
         FILTER_ALLOC(error);
         FILTER_ALLOC(trap);
@@ -792,6 +1017,7 @@ FIXTURE_TEARDOWN(precedence)
  {
  #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
         FILTER_FREE(allow);
+       FILTER_FREE(log);
         FILTER_FREE(trace);
         FILTER_FREE(error);
         FILTER_FREE(trap);
@@ -809,6 +1035,8 @@ TEST_F(precedence, allow_ok)
  
         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
         ASSERT_EQ(0, ret);
+       ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+       ASSERT_EQ(0, ret);
         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
         ASSERT_EQ(0, ret);
         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
@@ -833,6 +1061,8 @@ TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
  
         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
         ASSERT_EQ(0, ret);
+       ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+       ASSERT_EQ(0, ret);
         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
         ASSERT_EQ(0, ret);
         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
@@ -864,6 +1094,8 @@ TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
         ASSERT_EQ(0, ret);
         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
         ASSERT_EQ(0, ret);
+       ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+       ASSERT_EQ(0, ret);
         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
         ASSERT_EQ(0, ret);
         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
@@ -885,6 +1117,8 @@ TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
  
         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
         ASSERT_EQ(0, ret);
+       ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+       ASSERT_EQ(0, ret);
         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
         ASSERT_EQ(0, ret);
         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
@@ -910,6 +1144,8 @@ TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
         ASSERT_EQ(0, ret);
         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
         ASSERT_EQ(0, ret);
+       ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+       ASSERT_EQ(0, ret);
         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
         ASSERT_EQ(0, ret);
         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
@@ -931,6 +1167,8 @@ TEST_F(precedence, errno_is_third)
  
         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
         ASSERT_EQ(0, ret);
+       ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+       ASSERT_EQ(0, ret);
         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
         ASSERT_EQ(0, ret);
         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
@@ -949,6 +1187,8 @@ TEST_F(precedence, errno_is_third_in_any_order)
         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
         ASSERT_EQ(0, ret);
  
+       ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+       ASSERT_EQ(0, ret);
         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
         ASSERT_EQ(0, ret);
         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
@@ -971,6 +1211,8 @@ TEST_F(precedence, trace_is_fourth)
  
         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
         ASSERT_EQ(0, ret);
+       ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+       ASSERT_EQ(0, ret);
         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
         ASSERT_EQ(0, ret);
         /* Should work just fine. */
@@ -992,12 +1234,54 @@ TEST_F(precedence, trace_is_fourth_in_any_order)
         ASSERT_EQ(0, ret);
         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
         ASSERT_EQ(0, ret);
+       ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+       ASSERT_EQ(0, ret);
         /* Should work just fine. */
         EXPECT_EQ(parent, syscall(__NR_getppid));
         /* No ptracer */
         EXPECT_EQ(-1, syscall(__NR_getpid));
  }
  
+TEST_F(precedence, log_is_fifth)
+{
+       pid_t mypid, parent;
+       long ret;
+
+       mypid = getpid();
+       parent = getppid();
+       ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+       ASSERT_EQ(0, ret);
+
+       ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+       ASSERT_EQ(0, ret);
+       ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+       ASSERT_EQ(0, ret);
+       /* Should work just fine. */
+       EXPECT_EQ(parent, syscall(__NR_getppid));
+       /* Should also work just fine */
+       EXPECT_EQ(mypid, syscall(__NR_getpid));
+}
+
+TEST_F(precedence, log_is_fifth_in_any_order)
+{
+       pid_t mypid, parent;
+       long ret;
+
+       mypid = getpid();
+       parent = getppid();
+       ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+       ASSERT_EQ(0, ret);
+
+       ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+       ASSERT_EQ(0, ret);
+       ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+       ASSERT_EQ(0, ret);
+       /* Should work just fine. */
+       EXPECT_EQ(parent, syscall(__NR_getppid));
+       /* Should also work just fine */
+       EXPECT_EQ(mypid, syscall(__NR_getpid));
+}
+
  #ifndef PTRACE_O_TRACESECCOMP
  #define PTRACE_O_TRACESECCOMP  0x00000080
  #endif
@@ -1262,6 +1546,13 @@ TEST_F(TRACE_poke, getpid_runs_normally)
  # error "Do not know how to find your architecture's registers and syscalls"
  #endif
  
+/* When the syscall return can't be changed, stub out the tests for it. */
+#ifdef SYSCALL_NUM_RET_SHARE_REG
+# define EXPECT_SYSCALL_RETURN(val, action)    EXPECT_EQ(-1, action)
+#else
+# define EXPECT_SYSCALL_RETURN(val, action)    EXPECT_EQ(val, action)
+#endif
+
  /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
   * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
   */
@@ -1357,7 +1648,7 @@ void change_syscall(struct __test_metadata *_metadata,
  #ifdef SYSCALL_NUM_RET_SHARE_REG
                 TH_LOG("Can't modify syscall return on this architecture");
  #else
-               regs.SYSCALL_RET = 1;
+               regs.SYSCALL_RET = EPERM;
  #endif
  
  #ifdef HAVE_GETREGS
@@ -1426,6 +1717,8 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
  
         if (nr == __NR_getpid)
                 change_syscall(_metadata, tracee, __NR_getppid);
+       if (nr == __NR_open)
+               change_syscall(_metadata, tracee, -1);
  }
  
  FIXTURE_DATA(TRACE_syscall) {
@@ -1480,6 +1773,28 @@ FIXTURE_TEARDOWN(TRACE_syscall)
                 free(self->prog.filter);
  }
  
+TEST_F(TRACE_syscall, ptrace_syscall_redirected)
+{
+       /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
+       teardown_trace_fixture(_metadata, self->tracer);
+       self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
+                                          true);
+
+       /* Tracer will redirect getpid to getppid. */
+       EXPECT_NE(self->mypid, syscall(__NR_getpid));
+}
+
+TEST_F(TRACE_syscall, ptrace_syscall_dropped)
+{
+       /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
+       teardown_trace_fixture(_metadata, self->tracer);
+       self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
+                                          true);
+
+       /* Tracer should skip the open syscall, resulting in EPERM. */
+       EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_open));
+}
+
  TEST_F(TRACE_syscall, syscall_allowed)
  {
         long ret;
@@ -1520,13 +1835,8 @@ TEST_F(TRACE_syscall, syscall_dropped)
         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
         ASSERT_EQ(0, ret);
  
-#ifdef SYSCALL_NUM_RET_SHARE_REG
-       /* gettid has been skipped */
-       EXPECT_EQ(-1, syscall(__NR_gettid));
-#else
         /* gettid has been skipped and an altered return value stored. */
-       EXPECT_EQ(1, syscall(__NR_gettid));
-#endif
+       EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_gettid));
         EXPECT_NE(self->mytid, syscall(__NR_gettid));
  }
  
@@ -1557,6 +1867,7 @@ TEST_F(TRACE_syscall, skip_after_RET_TRACE)
         ASSERT_EQ(0, ret);
  
         /* Tracer will redirect getpid to getppid, and we should see EPERM. */
+       errno = 0;
         EXPECT_EQ(-1, syscall(__NR_getpid));
         EXPECT_EQ(EPERM, errno);
  }
@@ -1654,47 +1965,6 @@ TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
         EXPECT_NE(self->mypid, syscall(__NR_getpid));
  }
  
-#ifndef __NR_seccomp
-# if defined(__i386__)
-#  define __NR_seccomp 354
-# elif defined(__x86_64__)
-#  define __NR_seccomp 317
-# elif defined(__arm__)
-#  define __NR_seccomp 383
-# elif defined(__aarch64__)
-#  define __NR_seccomp 277
-# elif defined(__hppa__)
-#  define __NR_seccomp 338
-# elif defined(__powerpc__)
-#  define __NR_seccomp 358
-# elif defined(__s390__)
-#  define __NR_seccomp 348
-# else
-#  warning "seccomp syscall number unknown for this architecture"
-#  define __NR_seccomp 0xffff
-# endif
-#endif
-
-#ifndef SECCOMP_SET_MODE_STRICT
-#define SECCOMP_SET_MODE_STRICT 0
-#endif
-
-#ifndef SECCOMP_SET_MODE_FILTER
-#define SECCOMP_SET_MODE_FILTER 1
-#endif
-
-#ifndef SECCOMP_FILTER_FLAG_TSYNC
-#define SECCOMP_FILTER_FLAG_TSYNC 1
-#endif
-
-#ifndef seccomp
-int seccomp(unsigned int op, unsigned int flags, void *args)
-{
-       errno = 0;
-       return syscall(__NR_seccomp, op, flags, args);
-}
-#endif
-
  TEST(seccomp_syscall)
  {
         struct sock_filter filter[] = {
@@ -1783,6 +2053,67 @@ TEST(seccomp_syscall_mode_lock)
         }
  }
  
+/*
+ * Test detection of known and unknown filter flags. Userspace needs to be able
+ * to check if a filter flag is supported by the current kernel and a good way
+ * of doing that is by attempting to enter filter mode, with the flag bit in
+ * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates
+ * that the flag is valid and EINVAL indicates that the flag is invalid.
+ */
+TEST(detect_seccomp_filter_flags)
+{
+       unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
+                                SECCOMP_FILTER_FLAG_LOG };
+       unsigned int flag, all_flags;
+       int i;
+       long ret;
+
+       /* Test detection of known-good filter flags */
+       for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
+               flag = flags[i];
+               ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
+               ASSERT_NE(ENOSYS, errno) {
+                       TH_LOG("Kernel does not support seccomp syscall!");
+               }
+               EXPECT_EQ(-1, ret);
+               EXPECT_EQ(EFAULT, errno) {
+                       TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!",
+                              flag);
+               }
+
+               all_flags |= flag;
+       }
+
+       /* Test detection of all known-good filter flags */
+       ret = seccomp(SECCOMP_SET_MODE_FILTER, all_flags, NULL);
+       EXPECT_EQ(-1, ret);
+       EXPECT_EQ(EFAULT, errno) {
+               TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
+                      all_flags);
+       }
+
+       /* Test detection of an unknown filter flag */
+       flag = -1;
+       ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
+       EXPECT_EQ(-1, ret);
+       EXPECT_EQ(EINVAL, errno) {
+               TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!",
+                      flag);
+       }
+
+       /*
+        * Test detection of an unknown filter flag that may simply need to be
+        * added to this test
+        */
+       flag = flags[ARRAY_SIZE(flags) - 1] << 1;
+       ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
+       EXPECT_EQ(-1, ret);
+       EXPECT_EQ(EINVAL, errno) {
+               TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?",
+                      flag);
+       }
+}
+
  TEST(TSYNC_first)
  {
         struct sock_filter filter[] = {
@@ -2421,6 +2752,99 @@ TEST(syscall_restart)
                 _metadata->passed = 0;
  }
  
+TEST_SIGNAL(filter_flag_log, SIGSYS)
+{
+       struct sock_filter allow_filter[] = {
+               BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+       };
+       struct sock_filter kill_filter[] = {
+               BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+                       offsetof(struct seccomp_data, nr)),
+               BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
+               BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+               BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+       };
+       struct sock_fprog allow_prog = {
+               .len = (unsigned short)ARRAY_SIZE(allow_filter),
+               .filter = allow_filter,
+       };
+       struct sock_fprog kill_prog = {
+               .len = (unsigned short)ARRAY_SIZE(kill_filter),
+               .filter = kill_filter,
+       };
+       long ret;
+       pid_t parent = getppid();
+
+       ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+       ASSERT_EQ(0, ret);
+
+       /* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */
+       ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG,
+                     &allow_prog);
+       ASSERT_NE(ENOSYS, errno) {
+               TH_LOG("Kernel does not support seccomp syscall!");
+       }
+       EXPECT_NE(0, ret) {
+               TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!");
+       }
+       EXPECT_EQ(EINVAL, errno) {
+               TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!");
+       }
+
+       /* Verify that a simple, permissive filter can be added with no flags */
+       ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog);
+       EXPECT_EQ(0, ret);
+
+       /* See if the same filter can be added with the FILTER_FLAG_LOG flag */
+       ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
+                     &allow_prog);
+       ASSERT_NE(EINVAL, errno) {
+               TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!");
+       }
+       EXPECT_EQ(0, ret);
+
+       /* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */
+       ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
+                     &kill_prog);
+       EXPECT_EQ(0, ret);
+
+       EXPECT_EQ(parent, syscall(__NR_getppid));
+       /* getpid() should never return. */
+       EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST(get_action_avail)
+{
+       __u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP,
+                           SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE,
+                           SECCOMP_RET_LOG,   SECCOMP_RET_ALLOW };
+       __u32 unknown_action = 0x10000000U;
+       int i;
+       long ret;
+
+       ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]);
+       ASSERT_NE(ENOSYS, errno) {
+               TH_LOG("Kernel does not support seccomp syscall!");
+       }
+       ASSERT_NE(EINVAL, errno) {
+               TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!");
+       }
+       EXPECT_EQ(ret, 0);
+
+       for (i = 0; i < ARRAY_SIZE(actions); i++) {
+               ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]);
+               EXPECT_EQ(ret, 0) {
+                       TH_LOG("Expected action (0x%X) not available!",
+                              actions[i]);
+               }
+       }
+
+       /* Check that an unknown action is handled properly (EOPNOTSUPP) */
+       ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action);
+       EXPECT_EQ(ret, -1);
+       EXPECT_EQ(errno, EOPNOTSUPP);
+}
+
  /*
   * TODO:
   * - add microbenchmarks
@@ -2429,6 +2853,8 @@ TEST(syscall_restart)
   * - endianness checking when appropriate
   * - 64-bit arg prodding
   * - arch value testing (x86 modes especially)
+ * - verify that FILTER_FLAG_LOG filters generate log messages
+ * - verify that RET_LOG generates log messages
   * - ...
   */
  
diff --git a/tools/testing/selftests/sigaltstack/sas.c b/tools/testing/selftests/sigaltstack/sas.c

index 7d406c3..97bb150 100644 (file)
--- a/tools/testing/selftests/sigaltstack/sas.c
+++ b/tools/testing/selftests/sigaltstack/sas.c
@@ -39,7 +39,11 @@ void my_usr1(int sig, siginfo_t *si, void *u)
         stack_t stk;
         struct stk_data *p;
  
+#if __s390x__
+       register unsigned long sp asm("%15");
+#else
         register unsigned long sp asm("sp");
+#endif
  
         if (sp < (unsigned long)sstack ||
                         sp >= (unsigned long)sstack + SIGSTKSZ) {
diff --git a/tools/testing/selftests/sync/Makefile b/tools/testing/selftests/sync/Makefile

index 4981c6b..8e04d0a 100644 (file)
--- a/tools/testing/selftests/sync/Makefile
+++ b/tools/testing/selftests/sync/Makefile
@@ -2,12 +2,16 @@ CFLAGS += -O2 -g -std=gnu89 -pthread -Wall -Wextra
  CFLAGS += -I../../../../usr/include/
  LDFLAGS += -pthread
  
-TEST_PROGS = sync_test
-
-all: $(TEST_PROGS)
+.PHONY: all clean
  
  include ../lib.mk
  
+# lib.mk TEST_CUSTOM_PROGS var is for custom tests that need special
+# build rules. lib.mk will run and install them.
+
+TEST_CUSTOM_PROGS := $(OUTPUT)/sync_test
+all: $(TEST_CUSTOM_PROGS)
+
  OBJS = sync_test.o sync.o
  
  TESTS += sync_alloc.o
@@ -18,6 +22,16 @@ TESTS += sync_stress_parallelism.o
  TESTS += sync_stress_consumer.o
  TESTS += sync_stress_merge.o
  
-sync_test: $(OBJS) $(TESTS)
+OBJS := $(patsubst %,$(OUTPUT)/%,$(OBJS))
+TESTS := $(patsubst %,$(OUTPUT)/%,$(TESTS))
+
+$(TEST_CUSTOM_PROGS): $(TESTS) $(OBJS)
+       $(CC) -o $(TEST_CUSTOM_PROGS) $(OBJS) $(TESTS) $(CFLAGS) $(LDFLAGS)
+
+$(OBJS): $(OUTPUT)/%.o: %.c
+       $(CC) -c $^ -o $@
+
+$(TESTS): $(OUTPUT)/%.o: %.c
+       $(CC) -c $^ -o $@
  
-EXTRA_CLEAN := sync_test $(OBJS) $(TESTS)
+EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(OBJS) $(TESTS)
diff --git a/tools/testing/selftests/timers/set-timer-lat.c b/tools/testing/selftests/timers/set-timer-lat.c

index 9c92b7b..50da454 100644 (file)
--- a/tools/testing/selftests/timers/set-timer-lat.c
+++ b/tools/testing/selftests/timers/set-timer-lat.c
@@ -143,7 +143,8 @@ int setup_timer(int clock_id, int flags, int interval, timer_t *tm1)
                         printf("%-22s %s missing CAP_WAKE_ALARM?    : [UNSUPPORTED]\n",
                                         clockstring(clock_id),
                                         flags ? "ABSTIME":"RELTIME");
-                       return 0;
+                       /* Indicate timer isn't set, so caller doesn't wait */
+                       return 1;
                 }
                 printf("%s - timer_create() failed\n", clockstring(clock_id));
                 return -1;
@@ -213,8 +214,9 @@ int do_timer(int clock_id, int flags)
         int err;
  
         err = setup_timer(clock_id, flags, interval, &tm1);
+       /* Unsupported case - return 0 to not fail the test */
         if (err)
-               return err;
+               return err == 1 ? 0 : err;
  
         while (alarmcount < 5)
                 sleep(1);
@@ -228,18 +230,17 @@ int do_timer_oneshot(int clock_id, int flags)
         timer_t tm1;
         const int interval = 0;
         struct timeval timeout;
-       fd_set fds;
         int err;
  
         err = setup_timer(clock_id, flags, interval, &tm1);
+       /* Unsupported case - return 0 to not fail the test */
         if (err)
-               return err;
+               return err == 1 ? 0 : err;
  
         memset(&timeout, 0, sizeof(timeout));
         timeout.tv_sec = 5;
-       FD_ZERO(&fds);
         do {
-               err = select(FD_SETSIZE, &fds, NULL, NULL, &timeout);
+               err = select(0, NULL, NULL, NULL, &timeout);
         } while (err == -1 && errno == EINTR);
  
         timer_delete(tm1);
diff --git a/tools/testing/selftests/watchdog/Makefile b/tools/testing/selftests/watchdog/Makefile

index f863c66..ee06851 100644 (file)
--- a/tools/testing/selftests/watchdog/Makefile
+++ b/tools/testing/selftests/watchdog/Makefile
@@ -1,8 +1,3 @@
-TEST_PROGS := watchdog-test
-
-all: $(TEST_PROGS)
+TEST_GEN_PROGS := watchdog-test
  
  include ../lib.mk
-
-clean:
-       rm -fr $(TEST_PROGS)
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c

index c608ab4..f2ac53a 100644 (file)
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -565,8 +565,6 @@ kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
  {
         if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE))
                 return -EINVAL;
-       if (args->gsi >= KVM_MAX_IRQ_ROUTES)
-               return -EINVAL;
  
         if (args->flags & KVM_IRQFD_FLAG_DEASSIGN)
                 return kvm_irqfd_deassign(kvm, args);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 29 Sep 2017 19:24:28 +0000 (12:24 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 29 Sep 2017 19:24:28 +0000 (12:24 -0700)