Merge tag 'dmaengine-5.7-rc1' of git://git.infradead.org/users/vkoul/slave-dma
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 2 Apr 2020 23:04:42 +0000 (16:04 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 2 Apr 2020 23:04:42 +0000 (16:04 -0700)
Pull dmaengine updates from Vinod Koul:
 "Core:
   - Some code cleanup and optimization in core by Andy

   - Debugfs support for displaying dmaengine channels by Peter

  Drivers:
   - New driver for uniphier-xdmac controller

   - Updates to stm32 dma, mdma and dmamux drivers and PM support

   - More updates to idxd drivers

   - Bunch of changes in tegra-apb driver and cleaning up of pm
     functions

   - Bunch of spelling fixes and Replace zero-length array patches

   - Shutdown hook for fsl-dpaa2-qdma driver

   - Support for interleaved transfers for ti-edma and virtualization
     support for k3-dma driver

   - Support for reset and updates in xilinx_dma driver

   - Improvements and locking updates in at_hdma driver"

* tag 'dmaengine-5.7-rc1' of git://git.infradead.org/users/vkoul/slave-dma: (89 commits)
  dt-bindings: dma: renesas,usb-dmac: add r8a77961 support
  dmaengine: uniphier-xdmac: Remove redandant error log for platform_get_irq
  dmaengine: tegra-apb: Improve DMA synchronization
  dmaengine: tegra-apb: Don't save/restore IRQ flags in interrupt handler
  dmaengine: tegra-apb: mark PM functions as __maybe_unused
  dmaengine: fix spelling mistake "exceds" -> "exceeds"
  dmaengine: sprd: Set request pending flag when DMA controller is active
  dmaengine: ppc4xx: Use scnprintf() for avoiding potential buffer overflow
  dmaengine: idxd: remove global token limit check
  dmaengine: idxd: reflect shadow copy of traffic class programming
  dmaengine: idxd: Merge definition of dsa_batch_desc into dsa_hw_desc
  dmaengine: Create debug directories for DMA devices
  dmaengine: ti: k3-udma: Implement custom dbg_summary_show for debugfs
  dmaengine: Add basic debugfs support
  dmaengine: fsl-dpaa2-qdma: remove set but not used variable 'dpaa2_qdma'
  dmaengine: ti: edma: fix null dereference because of a typo in pointer name
  dmaengine: fsl-dpaa2-qdma: Adding shutdown hook
  dmaengine: uniphier-xdmac: Add UniPhier external DMA controller driver
  dt-bindings: dmaengine: Add UniPhier external DMA controller bindings
  dmaengine: ti: k3-udma: Implement support for atype (for virtualization)
  ...

1  2 
Documentation/devicetree/bindings/dma/ti/k3-udma.yaml
drivers/dma/dmaengine.c
drivers/dma/idxd/cdev.c
drivers/dma/idxd/sysfs.c
drivers/dma/tegra20-apb-dma.c
drivers/dma/ti/k3-udma-glue.c
drivers/dma/ti/k3-udma.c

@@@ -45,7 -45,8 +45,8 @@@ allOf
  
  properties:
    "#dma-cells":
-     const: 1
+     minimum: 1
+     maximum: 2
      description: |
        The cell is the PSI-L  thread ID of the remote (to UDMAP) end.
        Valid ranges for thread ID depends on the data movement direction:
@@@ -55,6 -56,8 +56,8 @@@
        Please refer to the device documentation for the PSI-L thread map and also
        the PSI-L peripheral chapter for the correct thread ID.
  
+       When #dma-cells is 2, the second parameter is the channel ATYPE.
    compatible:
      enum:
        - ti,am654-navss-main-udmap
@@@ -131,6 -134,20 +134,20 @@@ required
    - ti,sci-rm-range-rchan
    - ti,sci-rm-range-rflow
  
+ if:
+   properties:
+     "#dma-cells":
+       const: 2
+ then:
+   properties:
+     ti,udma-atype:
+       description: ATYPE value which should be used by non slave channels
+       allOf:
+         - $ref: /schemas/types.yaml#/definitions/uint32
+   required:
+     - ti,udma-atype
  examples:
    - |+
      cbass_main {
              #size-cells = <2>;
              dma-coherent;
              dma-ranges;
 -            ranges;
 +            ranges = <0x0 0x30800000 0x0 0x30800000 0x0 0x05000000>;
  
              ti,sci-dev-id = <118>;
  
                  ti,sci-rm-range-rflow = <0x6>; /* GP RFLOW */
              };
          };
 -
 -        mcasp0: mcasp@02B00000 {
 -            dmas = <&main_udmap 0xc400>, <&main_udmap 0x4400>;
 -            dma-names = "tx", "rx";
 -        };
 -
 -        crypto: crypto@4E00000 {
 -            compatible = "ti,sa2ul-crypto";
 -
 -            dmas = <&main_udmap 0xc000>, <&main_udmap 0x4000>, <&main_udmap 0x4001>;
 -            dma-names = "tx", "rx1", "rx2";
 -        };
      };
diff --combined drivers/dma/dmaengine.c
@@@ -58,6 -58,87 +58,87 @@@ static DEFINE_IDA(dma_ida)
  static LIST_HEAD(dma_device_list);
  static long dmaengine_ref_count;
  
+ /* --- debugfs implementation --- */
+ #ifdef CONFIG_DEBUG_FS
+ #include <linux/debugfs.h>
+ static struct dentry *rootdir;
+ static void dmaengine_debug_register(struct dma_device *dma_dev)
+ {
+       dma_dev->dbg_dev_root = debugfs_create_dir(dev_name(dma_dev->dev),
+                                                  rootdir);
+       if (IS_ERR(dma_dev->dbg_dev_root))
+               dma_dev->dbg_dev_root = NULL;
+ }
+ static void dmaengine_debug_unregister(struct dma_device *dma_dev)
+ {
+       debugfs_remove_recursive(dma_dev->dbg_dev_root);
+       dma_dev->dbg_dev_root = NULL;
+ }
+ static void dmaengine_dbg_summary_show(struct seq_file *s,
+                                      struct dma_device *dma_dev)
+ {
+       struct dma_chan *chan;
+       list_for_each_entry(chan, &dma_dev->channels, device_node) {
+               if (chan->client_count) {
+                       seq_printf(s, " %-13s| %s", dma_chan_name(chan),
+                                  chan->dbg_client_name ?: "in-use");
+                       if (chan->router)
+                               seq_printf(s, " (via router: %s)\n",
+                                       dev_name(chan->router->dev));
+                       else
+                               seq_puts(s, "\n");
+               }
+       }
+ }
+ static int dmaengine_summary_show(struct seq_file *s, void *data)
+ {
+       struct dma_device *dma_dev = NULL;
+       mutex_lock(&dma_list_mutex);
+       list_for_each_entry(dma_dev, &dma_device_list, global_node) {
+               seq_printf(s, "dma%d (%s): number of channels: %u\n",
+                          dma_dev->dev_id, dev_name(dma_dev->dev),
+                          dma_dev->chancnt);
+               if (dma_dev->dbg_summary_show)
+                       dma_dev->dbg_summary_show(s, dma_dev);
+               else
+                       dmaengine_dbg_summary_show(s, dma_dev);
+               if (!list_is_last(&dma_dev->global_node, &dma_device_list))
+                       seq_puts(s, "\n");
+       }
+       mutex_unlock(&dma_list_mutex);
+       return 0;
+ }
+ DEFINE_SHOW_ATTRIBUTE(dmaengine_summary);
+ static void __init dmaengine_debugfs_init(void)
+ {
+       rootdir = debugfs_create_dir("dmaengine", NULL);
+       /* /sys/kernel/debug/dmaengine/summary */
+       debugfs_create_file("summary", 0444, rootdir, NULL,
+                           &dmaengine_summary_fops);
+ }
+ #else
+ static inline void dmaengine_debugfs_init(void) { }
+ static inline int dmaengine_debug_register(struct dma_device *dma_dev)
+ {
+       return 0;
+ }
+ static inline void dmaengine_debug_unregister(struct dma_device *dma_dev) { }
+ #endif        /* DEBUG_FS */
  /* --- sysfs implementation --- */
  
  #define DMA_SLAVE_NAME        "slave"
@@@ -760,6 -841,11 +841,11 @@@ struct dma_chan *dma_request_chan(struc
                return chan ? chan : ERR_PTR(-EPROBE_DEFER);
  
  found:
+ #ifdef CONFIG_DEBUG_FS
+       chan->dbg_client_name = kasprintf(GFP_KERNEL, "%s:%s", dev_name(dev),
+                                         name);
+ #endif
        chan->name = kasprintf(GFP_KERNEL, "dma:%s", name);
        if (!chan->name)
                return chan;
@@@ -837,6 -923,11 +923,11 @@@ void dma_release_channel(struct dma_cha
                chan->name = NULL;
                chan->slave = NULL;
        }
+ #ifdef CONFIG_DEBUG_FS
+       kfree(chan->dbg_client_name);
+       chan->dbg_client_name = NULL;
+ #endif
        mutex_unlock(&dma_list_mutex);
  }
  EXPORT_SYMBOL_GPL(dma_release_channel);
@@@ -1151,7 -1242,7 +1242,7 @@@ int dma_async_device_register(struct dm
        }
  
        if (!device->device_release)
 -              dev_warn(device->dev,
 +              dev_dbg(device->dev,
                         "WARN: Device release is not defined so it is not safe to unbind this driver while in use\n");
  
        kref_init(&device->ref);
        dma_channel_rebalance();
        mutex_unlock(&dma_list_mutex);
  
+       dmaengine_debug_register(device);
        return 0;
  
  err_out:
@@@ -1229,6 -1322,8 +1322,8 @@@ void dma_async_device_unregister(struc
  {
        struct dma_chan *chan, *n;
  
+       dmaengine_debug_unregister(device);
        list_for_each_entry_safe(chan, n, &device->channels, device_node)
                __dma_async_device_channel_unregister(device, chan);
  
@@@ -1559,6 -1654,11 +1654,11 @@@ static int __init dma_bus_init(void
  
        if (err)
                return err;
-       return class_register(&dma_devclass);
+       err = class_register(&dma_devclass);
+       if (!err)
+               dmaengine_debugfs_init();
+       return err;
  }
  arch_initcall(dma_bus_init);
diff --combined drivers/dma/idxd/cdev.c
@@@ -74,16 -74,14 +74,14 @@@ static int idxd_cdev_open(struct inode 
        struct idxd_device *idxd;
        struct idxd_wq *wq;
        struct device *dev;
-       struct idxd_cdev *idxd_cdev;
  
        wq = inode_wq(inode);
        idxd = wq->idxd;
        dev = &idxd->pdev->dev;
-       idxd_cdev = &wq->idxd_cdev;
  
 -      dev_dbg(dev, "%s called\n", __func__);
 +      dev_dbg(dev, "%s called: %d\n", __func__, idxd_wq_refcount(wq));
  
 -      if (idxd_wq_refcount(wq) > 1 && wq_dedicated(wq))
 +      if (idxd_wq_refcount(wq) > 0 && wq_dedicated(wq))
                return -EBUSY;
  
        ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
@@@ -139,6 -137,8 +137,8 @@@ static int idxd_cdev_mmap(struct file *
  
        dev_dbg(&pdev->dev, "%s called\n", __func__);
        rc = check_vma(wq, vma, __func__);
+       if (rc < 0)
+               return rc;
  
        vma->vm_flags |= VM_DONTCOPY;
        pfn = (base + idxd_get_wq_portal_full_offset(wq->id,
@@@ -204,7 -204,6 +204,7 @@@ static int idxd_wq_cdev_dev_setup(struc
        minor = ida_simple_get(&cdev_ctx->minor_ida, 0, MINORMASK, GFP_KERNEL);
        if (minor < 0) {
                rc = minor;
 +              kfree(dev);
                goto ida_err;
        }
  
        rc = device_register(dev);
        if (rc < 0) {
                dev_err(&idxd->pdev->dev, "device register failed\n");
 -              put_device(dev);
                goto dev_reg_err;
        }
        idxd_cdev->minor = minor;
  
   dev_reg_err:
        ida_simple_remove(&cdev_ctx->minor_ida, MINOR(dev->devt));
 +      put_device(dev);
   ida_err:
 -      kfree(dev);
        idxd_cdev->dev = NULL;
        return rc;
  }
diff --combined drivers/dma/idxd/sysfs.c
@@@ -124,7 -124,6 +124,7 @@@ static int idxd_config_bus_probe(struc
                rc = idxd_device_config(idxd);
                if (rc < 0) {
                        spin_unlock_irqrestore(&idxd->dev_lock, flags);
 +                      module_put(THIS_MODULE);
                        dev_warn(dev, "Device config failed: %d\n", rc);
                        return rc;
                }
                rc = idxd_device_enable(idxd);
                if (rc < 0) {
                        spin_unlock_irqrestore(&idxd->dev_lock, flags);
 +                      module_put(THIS_MODULE);
                        dev_warn(dev, "Device enable failed: %d\n", rc);
                        return rc;
                }
                rc = idxd_register_dma_device(idxd);
                if (rc < 0) {
                        spin_unlock_irqrestore(&idxd->dev_lock, flags);
 +                      module_put(THIS_MODULE);
                        dev_dbg(dev, "Failed to register dmaengine device\n");
                        return rc;
                }
@@@ -419,7 -416,7 +419,7 @@@ static ssize_t engine_group_id_store(st
        struct idxd_device *idxd = engine->idxd;
        long id;
        int rc;
-       struct idxd_group *prevg, *group;
+       struct idxd_group *prevg;
  
        rc = kstrtol(buf, 10, &id);
        if (rc < 0)
                return count;
        }
  
-       group = &idxd->groups[id];
        prevg = engine->group;
  
        if (prevg)
@@@ -513,13 -509,10 +512,10 @@@ static ssize_t group_tokens_reserved_st
        if (idxd->state == IDXD_DEV_ENABLED)
                return -EPERM;
  
-       if (idxd->token_limit == 0)
-               return -EPERM;
        if (val > idxd->max_tokens)
                return -EINVAL;
  
 -      if (val > idxd->nr_tokens)
 +      if (val > idxd->nr_tokens + group->tokens_reserved)
                return -EINVAL;
  
        group->tokens_reserved = val;
@@@ -561,8 -554,6 +557,6 @@@ static ssize_t group_tokens_allowed_sto
        if (idxd->state == IDXD_DEV_ENABLED)
                return -EPERM;
  
-       if (idxd->token_limit == 0)
-               return -EPERM;
        if (val < 4 * group->num_engines ||
            val > group->tokens_reserved + idxd->nr_tokens)
                return -EINVAL;
@@@ -904,20 -895,6 +898,20 @@@ static ssize_t wq_size_show(struct devi
        return sprintf(buf, "%u\n", wq->size);
  }
  
 +static int total_claimed_wq_size(struct idxd_device *idxd)
 +{
 +      int i;
 +      int wq_size = 0;
 +
 +      for (i = 0; i < idxd->max_wqs; i++) {
 +              struct idxd_wq *wq = &idxd->wqs[i];
 +
 +              wq_size += wq->size;
 +      }
 +
 +      return wq_size;
 +}
 +
  static ssize_t wq_size_store(struct device *dev,
                             struct device_attribute *attr, const char *buf,
                             size_t count)
        if (wq->state != IDXD_WQ_DISABLED)
                return -EPERM;
  
 -      if (size > idxd->max_wq_size)
 +      if (size + total_claimed_wq_size(idxd) - wq->size > idxd->max_wq_size)
                return -EINVAL;
  
        wq->size = size;
@@@ -1016,14 -993,12 +1010,14 @@@ static ssize_t wq_type_store(struct dev
                return -EPERM;
  
        old_type = wq->type;
 -      if (sysfs_streq(buf, idxd_wq_type_names[IDXD_WQT_KERNEL]))
 +      if (sysfs_streq(buf, idxd_wq_type_names[IDXD_WQT_NONE]))
 +              wq->type = IDXD_WQT_NONE;
 +      else if (sysfs_streq(buf, idxd_wq_type_names[IDXD_WQT_KERNEL]))
                wq->type = IDXD_WQT_KERNEL;
        else if (sysfs_streq(buf, idxd_wq_type_names[IDXD_WQT_USER]))
                wq->type = IDXD_WQT_USER;
        else
 -              wq->type = IDXD_WQT_NONE;
 +              return -EINVAL;
  
        /* If we are changing queue type, clear the name */
        if (wq->type != old_type)
@@@ -1180,6 -1155,16 +1174,16 @@@ static ssize_t op_cap_show(struct devic
  }
  static DEVICE_ATTR_RO(op_cap);
  
+ static ssize_t gen_cap_show(struct device *dev,
+                           struct device_attribute *attr, char *buf)
+ {
+       struct idxd_device *idxd =
+               container_of(dev, struct idxd_device, conf_dev);
+       return sprintf(buf, "%#llx\n", idxd->hw.gen_cap.bits);
+ }
+ static DEVICE_ATTR_RO(gen_cap);
  static ssize_t configurable_show(struct device *dev,
                                 struct device_attribute *attr, char *buf)
  {
@@@ -1317,6 -1302,7 +1321,7 @@@ static struct attribute *idxd_device_at
        &dev_attr_max_batch_size.attr,
        &dev_attr_max_transfer_size.attr,
        &dev_attr_op_cap.attr,
+       &dev_attr_gen_cap.attr,
        &dev_attr_configurable.attr,
        &dev_attr_clients.attr,
        &dev_attr_state.attr,
@@@ -24,6 -24,7 +24,7 @@@
  #include <linux/pm_runtime.h>
  #include <linux/reset.h>
  #include <linux/slab.h>
+ #include <linux/wait.h>
  
  #include "dmaengine.h"
  
@@@ -59,7 -60,7 +60,7 @@@
  #define TEGRA_APBDMA_STATUS_COUNT_MASK                0xFFFC
  
  #define TEGRA_APBDMA_CHAN_CSRE                        0x00C
- #define TEGRA_APBDMA_CHAN_CSRE_PAUSE          (1 << 31)
+ #define TEGRA_APBDMA_CHAN_CSRE_PAUSE          BIT(31)
  
  /* AHB memory address */
  #define TEGRA_APBDMA_CHAN_AHBPTR              0x010
@@@ -120,21 -121,21 +121,21 @@@ struct tegra_dma
   * @support_separate_wcount_reg: Support separate word count register.
   */
  struct tegra_dma_chip_data {
-       int nr_channels;
-       int channel_reg_size;
-       int max_dma_count;
+       unsigned int nr_channels;
+       unsigned int channel_reg_size;
+       unsigned int max_dma_count;
        bool support_channel_pause;
        bool support_separate_wcount_reg;
  };
  
  /* DMA channel registers */
  struct tegra_dma_channel_regs {
-       unsigned long   csr;
-       unsigned long   ahb_ptr;
-       unsigned long   apb_ptr;
-       unsigned long   ahb_seq;
-       unsigned long   apb_seq;
-       unsigned long   wcount;
+       u32 csr;
+       u32 ahb_ptr;
+       u32 apb_ptr;
+       u32 ahb_seq;
+       u32 apb_seq;
+       u32 wcount;
  };
  
  /*
@@@ -168,7 -169,7 +169,7 @@@ struct tegra_dma_desc 
        struct list_head                node;
        struct list_head                tx_list;
        struct list_head                cb_node;
-       int                             cb_count;
+       unsigned int                    cb_count;
  };
  
  struct tegra_dma_channel;
@@@ -181,8 -182,7 +182,7 @@@ struct tegra_dma_channel 
        struct dma_chan         dma_chan;
        char                    name[12];
        bool                    config_init;
-       int                     id;
-       int                     irq;
+       unsigned int            id;
        void __iomem            *chan_addr;
        spinlock_t              lock;
        bool                    busy;
        /* Channel-slave specific configuration */
        unsigned int slave_id;
        struct dma_slave_config dma_sconfig;
-       struct tegra_dma_channel_regs   channel_reg;
+       struct tegra_dma_channel_regs channel_reg;
+       struct wait_queue_head wq;
  };
  
  /* tegra_dma: Tegra DMA specific information */
@@@ -222,9 -224,6 +224,6 @@@ struct tegra_dma 
         */
        u32                             global_pause_count;
  
-       /* Some register need to be cache before suspend */
-       u32                             reg_gen;
        /* Last member of the structure */
        struct tegra_dma_channel channels[0];
  };
@@@ -240,7 -239,7 +239,7 @@@ static inline u32 tdma_read(struct tegr
  }
  
  static inline void tdc_write(struct tegra_dma_channel *tdc,
-               u32 reg, u32 val)
+                            u32 reg, u32 val)
  {
        writel(val, tdc->chan_addr + reg);
  }
@@@ -255,8 -254,8 +254,8 @@@ static inline struct tegra_dma_channel 
        return container_of(dc, struct tegra_dma_channel, dma_chan);
  }
  
- static inline struct tegra_dma_desc *txd_to_tegra_dma_desc(
              struct dma_async_tx_descriptor *td)
+ static inline struct tegra_dma_desc *
txd_to_tegra_dma_desc(struct dma_async_tx_descriptor *td)
  {
        return container_of(td, struct tegra_dma_desc, txd);
  }
@@@ -267,12 -266,9 +266,9 @@@ static inline struct device *tdc2dev(st
  }
  
  static dma_cookie_t tegra_dma_tx_submit(struct dma_async_tx_descriptor *tx);
- static int tegra_dma_runtime_suspend(struct device *dev);
- static int tegra_dma_runtime_resume(struct device *dev);
  
  /* Get DMA desc from free list, if not there then allocate it.  */
- static struct tegra_dma_desc *tegra_dma_desc_get(
-               struct tegra_dma_channel *tdc)
+ static struct tegra_dma_desc *tegra_dma_desc_get(struct tegra_dma_channel *tdc)
  {
        struct tegra_dma_desc *dma_desc;
        unsigned long flags;
  
        /* Do not allocate if desc are waiting for ack */
        list_for_each_entry(dma_desc, &tdc->free_dma_desc, node) {
 -              if (async_tx_test_ack(&dma_desc->txd)) {
 +              if (async_tx_test_ack(&dma_desc->txd) && !dma_desc->cb_count) {
                        list_del(&dma_desc->node);
                        spin_unlock_irqrestore(&tdc->lock, flags);
                        dma_desc->txd.flags = 0;
        dma_async_tx_descriptor_init(&dma_desc->txd, &tdc->dma_chan);
        dma_desc->txd.tx_submit = tegra_dma_tx_submit;
        dma_desc->txd.flags = 0;
        return dma_desc;
  }
  
  static void tegra_dma_desc_put(struct tegra_dma_channel *tdc,
-               struct tegra_dma_desc *dma_desc)
+                              struct tegra_dma_desc *dma_desc)
  {
        unsigned long flags;
  
        spin_unlock_irqrestore(&tdc->lock, flags);
  }
  
- static struct tegra_dma_sg_req *tegra_dma_sg_req_get(
              struct tegra_dma_channel *tdc)
+ static struct tegra_dma_sg_req *
tegra_dma_sg_req_get(struct tegra_dma_channel *tdc)
  {
-       struct tegra_dma_sg_req *sg_req = NULL;
+       struct tegra_dma_sg_req *sg_req;
        unsigned long flags;
  
        spin_lock_irqsave(&tdc->lock, flags);
        if (!list_empty(&tdc->free_sg_req)) {
-               sg_req = list_first_entry(&tdc->free_sg_req,
-                                       typeof(*sg_req), node);
+               sg_req = list_first_entry(&tdc->free_sg_req, typeof(*sg_req),
+                                         node);
                list_del(&sg_req->node);
                spin_unlock_irqrestore(&tdc->lock, flags);
                return sg_req;
        }
        spin_unlock_irqrestore(&tdc->lock, flags);
  
-       sg_req = kzalloc(sizeof(struct tegra_dma_sg_req), GFP_NOWAIT);
+       sg_req = kzalloc(sizeof(*sg_req), GFP_NOWAIT);
  
        return sg_req;
  }
  
  static int tegra_dma_slave_config(struct dma_chan *dc,
-               struct dma_slave_config *sconfig)
+                                 struct dma_slave_config *sconfig)
  {
        struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
  
                tdc->slave_id = sconfig->slave_id;
        }
        tdc->config_init = true;
        return 0;
  }
  
  static void tegra_dma_global_pause(struct tegra_dma_channel *tdc,
-       bool wait_for_burst_complete)
+                                  bool wait_for_burst_complete)
  {
        struct tegra_dma *tdma = tdc->tdma;
  
@@@ -392,13 -390,13 +390,13 @@@ out
  }
  
  static void tegra_dma_pause(struct tegra_dma_channel *tdc,
-       bool wait_for_burst_complete)
+                           bool wait_for_burst_complete)
  {
        struct tegra_dma *tdma = tdc->tdma;
  
        if (tdma->chip_data->support_channel_pause) {
                tdc_write(tdc, TEGRA_APBDMA_CHAN_CSRE,
-                               TEGRA_APBDMA_CHAN_CSRE_PAUSE);
+                         TEGRA_APBDMA_CHAN_CSRE_PAUSE);
                if (wait_for_burst_complete)
                        udelay(TEGRA_APBDMA_BURST_COMPLETE_TIME);
        } else {
@@@ -410,17 -408,15 +408,15 @@@ static void tegra_dma_resume(struct teg
  {
        struct tegra_dma *tdma = tdc->tdma;
  
-       if (tdma->chip_data->support_channel_pause) {
+       if (tdma->chip_data->support_channel_pause)
                tdc_write(tdc, TEGRA_APBDMA_CHAN_CSRE, 0);
-       } else {
+       else
                tegra_dma_global_resume(tdc);
-       }
  }
  
  static void tegra_dma_stop(struct tegra_dma_channel *tdc)
  {
-       u32 csr;
-       u32 status;
+       u32 csr, status;
  
        /* Disable interrupts */
        csr = tdc_read(tdc, TEGRA_APBDMA_CHAN_CSR);
  }
  
  static void tegra_dma_start(struct tegra_dma_channel *tdc,
-               struct tegra_dma_sg_req *sg_req)
+                           struct tegra_dma_sg_req *sg_req)
  {
        struct tegra_dma_channel_regs *ch_regs = &sg_req->ch_regs;
  
  
        /* Start DMA */
        tdc_write(tdc, TEGRA_APBDMA_CHAN_CSR,
-                               ch_regs->csr | TEGRA_APBDMA_CSR_ENB);
+                 ch_regs->csr | TEGRA_APBDMA_CSR_ENB);
  }
  
  static void tegra_dma_configure_for_next(struct tegra_dma_channel *tdc,
-               struct tegra_dma_sg_req *nsg_req)
+                                        struct tegra_dma_sg_req *nsg_req)
  {
        unsigned long status;
  
        tdc_write(tdc, TEGRA_APBDMA_CHAN_AHBPTR, nsg_req->ch_regs.ahb_ptr);
        if (tdc->tdma->chip_data->support_separate_wcount_reg)
                tdc_write(tdc, TEGRA_APBDMA_CHAN_WCOUNT,
-                                               nsg_req->ch_regs.wcount);
+                         nsg_req->ch_regs.wcount);
        tdc_write(tdc, TEGRA_APBDMA_CHAN_CSR,
-                               nsg_req->ch_regs.csr | TEGRA_APBDMA_CSR_ENB);
+                 nsg_req->ch_regs.csr | TEGRA_APBDMA_CSR_ENB);
        nsg_req->configured = true;
        nsg_req->words_xferred = 0;
  
@@@ -506,11 -502,7 +502,7 @@@ static void tdc_start_head_req(struct t
  {
        struct tegra_dma_sg_req *sg_req;
  
-       if (list_empty(&tdc->pending_sg_req))
-               return;
-       sg_req = list_first_entry(&tdc->pending_sg_req,
-                                       typeof(*sg_req), node);
+       sg_req = list_first_entry(&tdc->pending_sg_req, typeof(*sg_req), node);
        tegra_dma_start(tdc, sg_req);
        sg_req->configured = true;
        sg_req->words_xferred = 0;
  
  static void tdc_configure_next_head_desc(struct tegra_dma_channel *tdc)
  {
-       struct tegra_dma_sg_req *hsgreq;
-       struct tegra_dma_sg_req *hnsgreq;
-       if (list_empty(&tdc->pending_sg_req))
-               return;
+       struct tegra_dma_sg_req *hsgreq, *hnsgreq;
  
        hsgreq = list_first_entry(&tdc->pending_sg_req, typeof(*hsgreq), node);
        if (!list_is_last(&hsgreq->node, &tdc->pending_sg_req)) {
-               hnsgreq = list_first_entry(&hsgreq->node,
-                                       typeof(*hnsgreq), node);
+               hnsgreq = list_first_entry(&hsgreq->node, typeof(*hnsgreq),
+                                          node);
                tegra_dma_configure_for_next(tdc, hnsgreq);
        }
  }
  
- static inline int get_current_xferred_count(struct tegra_dma_channel *tdc,
-       struct tegra_dma_sg_req *sg_req, unsigned long status)
+ static inline unsigned int
+ get_current_xferred_count(struct tegra_dma_channel *tdc,
+                         struct tegra_dma_sg_req *sg_req,
+                         unsigned long status)
  {
        return sg_req->req_len - (status & TEGRA_APBDMA_STATUS_COUNT_MASK) - 4;
  }
  
  static void tegra_dma_abort_all(struct tegra_dma_channel *tdc)
  {
-       struct tegra_dma_sg_req *sgreq;
        struct tegra_dma_desc *dma_desc;
+       struct tegra_dma_sg_req *sgreq;
  
        while (!list_empty(&tdc->pending_sg_req)) {
-               sgreq = list_first_entry(&tdc->pending_sg_req,
-                                               typeof(*sgreq), node);
+               sgreq = list_first_entry(&tdc->pending_sg_req, typeof(*sgreq),
+                                        node);
                list_move_tail(&sgreq->node, &tdc->free_sg_req);
                if (sgreq->last_sg) {
                        dma_desc = sgreq->dma_desc;
                        /* Add in cb list if it is not there. */
                        if (!dma_desc->cb_count)
                                list_add_tail(&dma_desc->cb_node,
-                                                       &tdc->cb_desc);
+                                             &tdc->cb_desc);
                        dma_desc->cb_count++;
                }
        }
  }
  
  static bool handle_continuous_head_request(struct tegra_dma_channel *tdc,
-               struct tegra_dma_sg_req *last_sg_req, bool to_terminate)
+                                          bool to_terminate)
  {
-       struct tegra_dma_sg_req *hsgreq = NULL;
-       if (list_empty(&tdc->pending_sg_req)) {
-               dev_err(tdc2dev(tdc), "DMA is running without req\n");
-               tegra_dma_stop(tdc);
-               return false;
-       }
+       struct tegra_dma_sg_req *hsgreq;
  
        /*
         * Check that head req on list should be in flight.
        hsgreq = list_first_entry(&tdc->pending_sg_req, typeof(*hsgreq), node);
        if (!hsgreq->configured) {
                tegra_dma_stop(tdc);
-               dev_err(tdc2dev(tdc), "Error in DMA transfer, aborting DMA\n");
+               pm_runtime_put(tdc->tdma->dev);
+               dev_err(tdc2dev(tdc), "DMA transfer underflow, aborting DMA\n");
                tegra_dma_abort_all(tdc);
                return false;
        }
        /* Configure next request */
        if (!to_terminate)
                tdc_configure_next_head_desc(tdc);
        return true;
  }
  
  static void handle_once_dma_done(struct tegra_dma_channel *tdc,
-       bool to_terminate)
+                                bool to_terminate)
  {
-       struct tegra_dma_sg_req *sgreq;
        struct tegra_dma_desc *dma_desc;
+       struct tegra_dma_sg_req *sgreq;
  
        tdc->busy = false;
        sgreq = list_first_entry(&tdc->pending_sg_req, typeof(*sgreq), node);
        list_add_tail(&sgreq->node, &tdc->free_sg_req);
  
        /* Do not start DMA if it is going to be terminate */
-       if (to_terminate || list_empty(&tdc->pending_sg_req))
+       if (to_terminate)
+               return;
+       if (list_empty(&tdc->pending_sg_req)) {
+               pm_runtime_put(tdc->tdma->dev);
                return;
+       }
  
        tdc_start_head_req(tdc);
  }
  
  static void handle_cont_sngl_cycle_dma_done(struct tegra_dma_channel *tdc,
-               bool to_terminate)
+                                           bool to_terminate)
  {
-       struct tegra_dma_sg_req *sgreq;
        struct tegra_dma_desc *dma_desc;
+       struct tegra_dma_sg_req *sgreq;
        bool st;
  
        sgreq = list_first_entry(&tdc->pending_sg_req, typeof(*sgreq), node);
        if (!list_is_last(&sgreq->node, &tdc->pending_sg_req)) {
                list_move_tail(&sgreq->node, &tdc->pending_sg_req);
                sgreq->configured = false;
-               st = handle_continuous_head_request(tdc, sgreq, to_terminate);
+               st = handle_continuous_head_request(tdc, to_terminate);
                if (!st)
                        dma_desc->dma_status = DMA_ERROR;
        }
@@@ -658,13 -649,13 +649,13 @@@ static void tegra_dma_tasklet(unsigned 
        struct tegra_dma_channel *tdc = (struct tegra_dma_channel *)data;
        struct dmaengine_desc_callback cb;
        struct tegra_dma_desc *dma_desc;
+       unsigned int cb_count;
        unsigned long flags;
-       int cb_count;
  
        spin_lock_irqsave(&tdc->lock, flags);
        while (!list_empty(&tdc->cb_desc)) {
-               dma_desc  = list_first_entry(&tdc->cb_desc,
-                                       typeof(*dma_desc), cb_node);
+               dma_desc = list_first_entry(&tdc->cb_desc, typeof(*dma_desc),
+                                           cb_node);
                list_del(&dma_desc->cb_node);
                dmaengine_desc_get_callback(&dma_desc->txd, &cb);
                cb_count = dma_desc->cb_count;
  static irqreturn_t tegra_dma_isr(int irq, void *dev_id)
  {
        struct tegra_dma_channel *tdc = dev_id;
-       unsigned long status;
-       unsigned long flags;
+       u32 status;
  
-       spin_lock_irqsave(&tdc->lock, flags);
+       spin_lock(&tdc->lock);
  
        trace_tegra_dma_isr(&tdc->dma_chan, irq);
        status = tdc_read(tdc, TEGRA_APBDMA_CHAN_STATUS);
                tdc_write(tdc, TEGRA_APBDMA_CHAN_STATUS, status);
                tdc->isr_handler(tdc, false);
                tasklet_schedule(&tdc->tasklet);
-               spin_unlock_irqrestore(&tdc->lock, flags);
+               wake_up_all(&tdc->wq);
+               spin_unlock(&tdc->lock);
                return IRQ_HANDLED;
        }
  
-       spin_unlock_irqrestore(&tdc->lock, flags);
-       dev_info(tdc2dev(tdc),
-               "Interrupt already served status 0x%08lx\n", status);
+       spin_unlock(&tdc->lock);
+       dev_info(tdc2dev(tdc), "Interrupt already served status 0x%08x\n",
+                status);
        return IRQ_NONE;
  }
  
@@@ -715,6 -707,7 +707,7 @@@ static dma_cookie_t tegra_dma_tx_submit
        cookie = dma_cookie_assign(&dma_desc->txd);
        list_splice_tail_init(&dma_desc->tx_list, &tdc->pending_sg_req);
        spin_unlock_irqrestore(&tdc->lock, flags);
        return cookie;
  }
  
@@@ -722,6 -715,7 +715,7 @@@ static void tegra_dma_issue_pending(str
  {
        struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
        unsigned long flags;
+       int err;
  
        spin_lock_irqsave(&tdc->lock, flags);
        if (list_empty(&tdc->pending_sg_req)) {
                goto end;
        }
        if (!tdc->busy) {
+               err = pm_runtime_get_sync(tdc->tdma->dev);
+               if (err < 0) {
+                       dev_err(tdc2dev(tdc), "Failed to enable DMA\n");
+                       goto end;
+               }
                tdc_start_head_req(tdc);
  
                /* Continuous single mode: Configure next req */
@@@ -748,14 -748,17 +748,13 @@@ end
  static int tegra_dma_terminate_all(struct dma_chan *dc)
  {
        struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
-       struct tegra_dma_sg_req *sgreq;
        struct tegra_dma_desc *dma_desc;
+       struct tegra_dma_sg_req *sgreq;
        unsigned long flags;
-       unsigned long status;
-       unsigned long wcount;
+       u32 status, wcount;
        bool was_busy;
  
        spin_lock_irqsave(&tdc->lock, flags);
 -      if (list_empty(&tdc->pending_sg_req)) {
 -              spin_unlock_irqrestore(&tdc->lock, flags);
 -              return 0;
 -      }
  
        if (!tdc->busy)
                goto skip_dma_stop;
        tegra_dma_stop(tdc);
  
        if (!list_empty(&tdc->pending_sg_req) && was_busy) {
-               sgreq = list_first_entry(&tdc->pending_sg_req,
-                                       typeof(*sgreq), node);
+               sgreq = list_first_entry(&tdc->pending_sg_req, typeof(*sgreq),
+                                        node);
                sgreq->dma_desc->bytes_transferred +=
                                get_current_xferred_count(tdc, sgreq, wcount);
        }
        tegra_dma_resume(tdc);
  
+       pm_runtime_put(tdc->tdma->dev);
+       wake_up_all(&tdc->wq);
  skip_dma_stop:
        tegra_dma_abort_all(tdc);
  
        while (!list_empty(&tdc->cb_desc)) {
-               dma_desc  = list_first_entry(&tdc->cb_desc,
-                                       typeof(*dma_desc), cb_node);
+               dma_desc = list_first_entry(&tdc->cb_desc, typeof(*dma_desc),
+                                           cb_node);
                list_del(&dma_desc->cb_node);
                dma_desc->cb_count = 0;
        }
        spin_unlock_irqrestore(&tdc->lock, flags);
        return 0;
  }
  
+ static bool tegra_dma_eoc_interrupt_deasserted(struct tegra_dma_channel *tdc)
+ {
+       unsigned long flags;
+       u32 status;
+       spin_lock_irqsave(&tdc->lock, flags);
+       status = tdc_read(tdc, TEGRA_APBDMA_CHAN_STATUS);
+       spin_unlock_irqrestore(&tdc->lock, flags);
+       return !(status & TEGRA_APBDMA_STATUS_ISE_EOC);
+ }
+ static void tegra_dma_synchronize(struct dma_chan *dc)
+ {
+       struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+       /*
+        * CPU, which handles interrupt, could be busy in
+        * uninterruptible state, in this case sibling CPU
+        * should wait until interrupt is handled.
+        */
+       wait_event(tdc->wq, tegra_dma_eoc_interrupt_deasserted(tdc));
+       tasklet_kill(&tdc->tasklet);
+ }
  static unsigned int tegra_dma_sg_bytes_xferred(struct tegra_dma_channel *tdc,
                                               struct tegra_dma_sg_req *sg_req)
  {
-       unsigned long status, wcount = 0;
+       u32 status, wcount = 0;
  
        if (!list_is_first(&sg_req->node, &tdc->pending_sg_req))
                return 0;
  }
  
  static enum dma_status tegra_dma_tx_status(struct dma_chan *dc,
-       dma_cookie_t cookie, struct dma_tx_state *txstate)
+                                          dma_cookie_t cookie,
+                                          struct dma_tx_state *txstate)
  {
        struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
        struct tegra_dma_desc *dma_desc;
@@@ -905,11 -939,12 +935,12 @@@ found
  
        trace_tegra_dma_tx_status(&tdc->dma_chan, cookie, txstate);
        spin_unlock_irqrestore(&tdc->lock, flags);
        return ret;
  }
  
- static inline int get_bus_width(struct tegra_dma_channel *tdc,
-               enum dma_slave_buswidth slave_bw)
+ static inline unsigned int get_bus_width(struct tegra_dma_channel *tdc,
+                                        enum dma_slave_buswidth slave_bw)
  {
        switch (slave_bw) {
        case DMA_SLAVE_BUSWIDTH_1_BYTE:
                return TEGRA_APBDMA_APBSEQ_BUS_WIDTH_64;
        default:
                dev_warn(tdc2dev(tdc),
-                       "slave bw is not supported, using 32bits\n");
+                        "slave bw is not supported, using 32bits\n");
                return TEGRA_APBDMA_APBSEQ_BUS_WIDTH_32;
        }
  }
  
- static inline int get_burst_size(struct tegra_dma_channel *tdc,
-       u32 burst_size, enum dma_slave_buswidth slave_bw, int len)
+ static inline unsigned int get_burst_size(struct tegra_dma_channel *tdc,
+                                         u32 burst_size,
+                                         enum dma_slave_buswidth slave_bw,
+                                         u32 len)
  {
-       int burst_byte;
-       int burst_ahb_width;
+       unsigned int burst_byte, burst_ahb_width;
  
        /*
         * burst_size from client is in terms of the bus_width.
  }
  
  static int get_transfer_param(struct tegra_dma_channel *tdc,
-       enum dma_transfer_direction direction, unsigned long *apb_addr,
-       unsigned long *apb_seq, unsigned long *csr, unsigned int *burst_size,
-       enum dma_slave_buswidth *slave_bw)
+                             enum dma_transfer_direction direction,
+                             u32 *apb_addr,
+                             u32 *apb_seq,
+                             u32 *csr,
+                             unsigned int *burst_size,
+                             enum dma_slave_buswidth *slave_bw)
  {
        switch (direction) {
        case DMA_MEM_TO_DEV:
  
        default:
                dev_err(tdc2dev(tdc), "DMA direction is not supported\n");
-               return -EINVAL;
+               break;
        }
        return -EINVAL;
  }
  
  static void tegra_dma_prep_wcount(struct tegra_dma_channel *tdc,
-       struct tegra_dma_channel_regs *ch_regs, u32 len)
+                                 struct tegra_dma_channel_regs *ch_regs,
+                                 u32 len)
  {
        u32 len_field = (len - 4) & 0xFFFC;
  
                ch_regs->csr |= len_field;
  }
  
- static struct dma_async_tx_descriptor *tegra_dma_prep_slave_sg(
-       struct dma_chan *dc, struct scatterlist *sgl, unsigned int sg_len,
-       enum dma_transfer_direction direction, unsigned long flags,
-       void *context)
+ static struct dma_async_tx_descriptor *
+ tegra_dma_prep_slave_sg(struct dma_chan *dc,
+                       struct scatterlist *sgl,
+                       unsigned int sg_len,
+                       enum dma_transfer_direction direction,
+                       unsigned long flags,
+                       void *context)
  {
        struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+       struct tegra_dma_sg_req *sg_req = NULL;
+       u32 csr, ahb_seq, apb_ptr, apb_seq;
+       enum dma_slave_buswidth slave_bw;
        struct tegra_dma_desc *dma_desc;
-       unsigned int i;
-       struct scatterlist *sg;
-       unsigned long csr, ahb_seq, apb_ptr, apb_seq;
        struct list_head req_list;
-       struct tegra_dma_sg_req  *sg_req = NULL;
-       u32 burst_size;
-       enum dma_slave_buswidth slave_bw;
+       struct scatterlist *sg;
+       unsigned int burst_size;
+       unsigned int i;
  
        if (!tdc->config_init) {
                dev_err(tdc2dev(tdc), "DMA channel is not configured\n");
        }
  
        if (get_transfer_param(tdc, direction, &apb_ptr, &apb_seq, &csr,
-                               &burst_size, &slave_bw) < 0)
+                              &burst_size, &slave_bw) < 0)
                return NULL;
  
        INIT_LIST_HEAD(&req_list);
                len = sg_dma_len(sg);
  
                if ((len & 3) || (mem & 3) ||
-                               (len > tdc->tdma->chip_data->max_dma_count)) {
+                   len > tdc->tdma->chip_data->max_dma_count) {
                        dev_err(tdc2dev(tdc),
                                "DMA length/memory address is not supported\n");
                        tegra_dma_desc_put(tdc, dma_desc);
        return &dma_desc->txd;
  }
  
- static struct dma_async_tx_descriptor *tegra_dma_prep_dma_cyclic(
-       struct dma_chan *dc, dma_addr_t buf_addr, size_t buf_len,
-       size_t period_len, enum dma_transfer_direction direction,
-       unsigned long flags)
+ static struct dma_async_tx_descriptor *
+ tegra_dma_prep_dma_cyclic(struct dma_chan *dc, dma_addr_t buf_addr,
+                         size_t buf_len,
+                         size_t period_len,
+                         enum dma_transfer_direction direction,
+                         unsigned long flags)
  {
        struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
-       struct tegra_dma_desc *dma_desc = NULL;
        struct tegra_dma_sg_req *sg_req = NULL;
-       unsigned long csr, ahb_seq, apb_ptr, apb_seq;
-       int len;
-       size_t remain_len;
-       dma_addr_t mem = buf_addr;
-       u32 burst_size;
+       u32 csr, ahb_seq, apb_ptr, apb_seq;
        enum dma_slave_buswidth slave_bw;
+       struct tegra_dma_desc *dma_desc;
+       dma_addr_t mem = buf_addr;
+       unsigned int burst_size;
+       size_t len, remain_len;
  
        if (!buf_len || !period_len) {
                dev_err(tdc2dev(tdc), "Invalid buffer/period len\n");
  
        len = period_len;
        if ((len & 3) || (buf_addr & 3) ||
-                       (len > tdc->tdma->chip_data->max_dma_count)) {
+           len > tdc->tdma->chip_data->max_dma_count) {
                dev_err(tdc2dev(tdc), "Req len/mem address is not correct\n");
                return NULL;
        }
  
        if (get_transfer_param(tdc, direction, &apb_ptr, &apb_seq, &csr,
-                               &burst_size, &slave_bw) < 0)
+                              &burst_size, &slave_bw) < 0)
                return NULL;
  
        ahb_seq = TEGRA_APBDMA_AHBSEQ_INTR_ENB;
  static int tegra_dma_alloc_chan_resources(struct dma_chan *dc)
  {
        struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
-       struct tegra_dma *tdma = tdc->tdma;
-       int ret;
  
        dma_cookie_init(&tdc->dma_chan);
-       tdc->config_init = false;
-       ret = pm_runtime_get_sync(tdma->dev);
-       if (ret < 0)
-               return ret;
  
        return 0;
  }
  static void tegra_dma_free_chan_resources(struct dma_chan *dc)
  {
        struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
-       struct tegra_dma *tdma = tdc->tdma;
        struct tegra_dma_desc *dma_desc;
        struct tegra_dma_sg_req *sg_req;
        struct list_head dma_desc_list;
        struct list_head sg_req_list;
-       unsigned long flags;
  
        INIT_LIST_HEAD(&dma_desc_list);
        INIT_LIST_HEAD(&sg_req_list);
  
        dev_dbg(tdc2dev(tdc), "Freeing channel %d\n", tdc->id);
  
-       if (tdc->busy)
-               tegra_dma_terminate_all(dc);
+       tegra_dma_terminate_all(dc);
+       tasklet_kill(&tdc->tasklet);
  
-       spin_lock_irqsave(&tdc->lock, flags);
        list_splice_init(&tdc->pending_sg_req, &sg_req_list);
        list_splice_init(&tdc->free_sg_req, &sg_req_list);
        list_splice_init(&tdc->free_dma_desc, &dma_desc_list);
        INIT_LIST_HEAD(&tdc->cb_desc);
        tdc->config_init = false;
        tdc->isr_handler = NULL;
-       spin_unlock_irqrestore(&tdc->lock, flags);
  
        while (!list_empty(&dma_desc_list)) {
-               dma_desc = list_first_entry(&dma_desc_list,
-                                       typeof(*dma_desc), node);
+               dma_desc = list_first_entry(&dma_desc_list, typeof(*dma_desc),
+                                           node);
                list_del(&dma_desc->node);
                kfree(dma_desc);
        }
                list_del(&sg_req->node);
                kfree(sg_req);
        }
-       pm_runtime_put(tdma->dev);
  
        tdc->slave_id = TEGRA_APBDMA_SLAVE_ID_INVALID;
  }
@@@ -1320,8 -1353,8 +1349,8 @@@ static struct dma_chan *tegra_dma_of_xl
                                           struct of_dma *ofdma)
  {
        struct tegra_dma *tdma = ofdma->of_dma_data;
-       struct dma_chan *chan;
        struct tegra_dma_channel *tdc;
+       struct dma_chan *chan;
  
        if (dma_spec->args[0] > TEGRA_APBDMA_CSR_REQ_SEL_MASK) {
                dev_err(tdma->dev, "Invalid slave id: %d\n", dma_spec->args[0]);
@@@ -1374,23 -1407,48 +1403,48 @@@ static const struct tegra_dma_chip_dat
        .support_separate_wcount_reg = true,
  };
  
+ static int tegra_dma_init_hw(struct tegra_dma *tdma)
+ {
+       int err;
+       err = reset_control_assert(tdma->rst);
+       if (err) {
+               dev_err(tdma->dev, "failed to assert reset: %d\n", err);
+               return err;
+       }
+       err = clk_enable(tdma->dma_clk);
+       if (err) {
+               dev_err(tdma->dev, "failed to enable clk: %d\n", err);
+               return err;
+       }
+       /* reset DMA controller */
+       udelay(2);
+       reset_control_deassert(tdma->rst);
+       /* enable global DMA registers */
+       tdma_write(tdma, TEGRA_APBDMA_GENERAL, TEGRA_APBDMA_GENERAL_ENABLE);
+       tdma_write(tdma, TEGRA_APBDMA_CONTROL, 0);
+       tdma_write(tdma, TEGRA_APBDMA_IRQ_MASK_SET, 0xFFFFFFFF);
+       clk_disable(tdma->dma_clk);
+       return 0;
+ }
  static int tegra_dma_probe(struct platform_device *pdev)
  {
-       struct resource *res;
+       const struct tegra_dma_chip_data *cdata;
        struct tegra_dma *tdma;
+       unsigned int i;
+       size_t size;
        int ret;
-       int i;
-       const struct tegra_dma_chip_data *cdata;
  
        cdata = of_device_get_match_data(&pdev->dev);
-       if (!cdata) {
-               dev_err(&pdev->dev, "Error: No device match data found\n");
-               return -ENODEV;
-       }
+       size = struct_size(tdma, channels, cdata->nr_channels);
  
-       tdma = devm_kzalloc(&pdev->dev,
-                           struct_size(tdma, channels, cdata->nr_channels),
-                           GFP_KERNEL);
+       tdma = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);
        if (!tdma)
                return -ENOMEM;
  
        tdma->chip_data = cdata;
        platform_set_drvdata(pdev, tdma);
  
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       tdma->base_addr = devm_ioremap_resource(&pdev->dev, res);
+       tdma->base_addr = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(tdma->base_addr))
                return PTR_ERR(tdma->base_addr);
  
  
        spin_lock_init(&tdma->global_lock);
  
-       pm_runtime_enable(&pdev->dev);
-       if (!pm_runtime_enabled(&pdev->dev))
-               ret = tegra_dma_runtime_resume(&pdev->dev);
-       else
-               ret = pm_runtime_get_sync(&pdev->dev);
-       if (ret < 0) {
-               pm_runtime_disable(&pdev->dev);
+       ret = clk_prepare(tdma->dma_clk);
+       if (ret)
                return ret;
-       }
-       /* Reset DMA controller */
-       reset_control_assert(tdma->rst);
-       udelay(2);
-       reset_control_deassert(tdma->rst);
  
-       /* Enable global DMA registers */
-       tdma_write(tdma, TEGRA_APBDMA_GENERAL, TEGRA_APBDMA_GENERAL_ENABLE);
-       tdma_write(tdma, TEGRA_APBDMA_CONTROL, 0);
-       tdma_write(tdma, TEGRA_APBDMA_IRQ_MASK_SET, 0xFFFFFFFFul);
+       ret = tegra_dma_init_hw(tdma);
+       if (ret)
+               goto err_clk_unprepare;
  
-       pm_runtime_put(&pdev->dev);
+       pm_runtime_irq_safe(&pdev->dev);
+       pm_runtime_enable(&pdev->dev);
  
        INIT_LIST_HEAD(&tdma->dma_dev.channels);
        for (i = 0; i < cdata->nr_channels; i++) {
                struct tegra_dma_channel *tdc = &tdma->channels[i];
+               int irq;
  
                tdc->chan_addr = tdma->base_addr +
                                 TEGRA_APBDMA_CHANNEL_BASE_ADD_OFFSET +
                                 (i * cdata->channel_reg_size);
  
-               res = platform_get_resource(pdev, IORESOURCE_IRQ, i);
-               if (!res) {
-                       ret = -EINVAL;
+               irq = platform_get_irq(pdev, i);
+               if (irq < 0) {
+                       ret = irq;
                        dev_err(&pdev->dev, "No irq resource for chan %d\n", i);
-                       goto err_irq;
+                       goto err_pm_disable;
                }
-               tdc->irq = res->start;
                snprintf(tdc->name, sizeof(tdc->name), "apbdma.%d", i);
-               ret = request_irq(tdc->irq, tegra_dma_isr, 0, tdc->name, tdc);
+               ret = devm_request_irq(&pdev->dev, irq, tegra_dma_isr, 0,
+                                      tdc->name, tdc);
                if (ret) {
                        dev_err(&pdev->dev,
                                "request_irq failed with err %d channel %d\n",
                                ret, i);
-                       goto err_irq;
+                       goto err_pm_disable;
                }
  
                tdc->dma_chan.device = &tdma->dma_dev;
                dma_cookie_init(&tdc->dma_chan);
                list_add_tail(&tdc->dma_chan.device_node,
-                               &tdma->dma_dev.channels);
+                             &tdma->dma_dev.channels);
                tdc->tdma = tdma;
                tdc->id = i;
                tdc->slave_id = TEGRA_APBDMA_SLAVE_ID_INVALID;
  
                tasklet_init(&tdc->tasklet, tegra_dma_tasklet,
-                               (unsigned long)tdc);
+                            (unsigned long)tdc);
                spin_lock_init(&tdc->lock);
+               init_waitqueue_head(&tdc->wq);
  
                INIT_LIST_HEAD(&tdc->pending_sg_req);
                INIT_LIST_HEAD(&tdc->free_sg_req);
        tdma->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
        tdma->dma_dev.device_config = tegra_dma_slave_config;
        tdma->dma_dev.device_terminate_all = tegra_dma_terminate_all;
+       tdma->dma_dev.device_synchronize = tegra_dma_synchronize;
        tdma->dma_dev.device_tx_status = tegra_dma_tx_status;
        tdma->dma_dev.device_issue_pending = tegra_dma_issue_pending;
  
        if (ret < 0) {
                dev_err(&pdev->dev,
                        "Tegra20 APB DMA driver registration failed %d\n", ret);
-               goto err_irq;
+               goto err_pm_disable;
        }
  
        ret = of_dma_controller_register(pdev->dev.of_node,
                goto err_unregister_dma_dev;
        }
  
-       dev_info(&pdev->dev, "Tegra20 APB DMA driver register %d channels\n",
-                       cdata->nr_channels);
+       dev_info(&pdev->dev, "Tegra20 APB DMA driver registered %u channels\n",
+                cdata->nr_channels);
        return 0;
  
  err_unregister_dma_dev:
        dma_async_device_unregister(&tdma->dma_dev);
- err_irq:
-       while (--i >= 0) {
-               struct tegra_dma_channel *tdc = &tdma->channels[i];
-               free_irq(tdc->irq, tdc);
-               tasklet_kill(&tdc->tasklet);
-       }
  
+ err_pm_disable:
        pm_runtime_disable(&pdev->dev);
-       if (!pm_runtime_status_suspended(&pdev->dev))
-               tegra_dma_runtime_suspend(&pdev->dev);
+ err_clk_unprepare:
+       clk_unprepare(tdma->dma_clk);
        return ret;
  }
  
  static int tegra_dma_remove(struct platform_device *pdev)
  {
        struct tegra_dma *tdma = platform_get_drvdata(pdev);
-       int i;
-       struct tegra_dma_channel *tdc;
  
+       of_dma_controller_free(pdev->dev.of_node);
        dma_async_device_unregister(&tdma->dma_dev);
-       for (i = 0; i < tdma->chip_data->nr_channels; ++i) {
-               tdc = &tdma->channels[i];
-               free_irq(tdc->irq, tdc);
-               tasklet_kill(&tdc->tasklet);
-       }
        pm_runtime_disable(&pdev->dev);
-       if (!pm_runtime_status_suspended(&pdev->dev))
-               tegra_dma_runtime_suspend(&pdev->dev);
+       clk_unprepare(tdma->dma_clk);
  
        return 0;
  }
  
- static int tegra_dma_runtime_suspend(struct device *dev)
+ static int __maybe_unused tegra_dma_runtime_suspend(struct device *dev)
  {
        struct tegra_dma *tdma = dev_get_drvdata(dev);
-       int i;
-       tdma->reg_gen = tdma_read(tdma, TEGRA_APBDMA_GENERAL);
-       for (i = 0; i < tdma->chip_data->nr_channels; i++) {
-               struct tegra_dma_channel *tdc = &tdma->channels[i];
-               struct tegra_dma_channel_regs *ch_reg = &tdc->channel_reg;
-               /* Only save the state of DMA channels that are in use */
-               if (!tdc->config_init)
-                       continue;
-               ch_reg->csr = tdc_read(tdc, TEGRA_APBDMA_CHAN_CSR);
-               ch_reg->ahb_ptr = tdc_read(tdc, TEGRA_APBDMA_CHAN_AHBPTR);
-               ch_reg->apb_ptr = tdc_read(tdc, TEGRA_APBDMA_CHAN_APBPTR);
-               ch_reg->ahb_seq = tdc_read(tdc, TEGRA_APBDMA_CHAN_AHBSEQ);
-               ch_reg->apb_seq = tdc_read(tdc, TEGRA_APBDMA_CHAN_APBSEQ);
-               if (tdma->chip_data->support_separate_wcount_reg)
-                       ch_reg->wcount = tdc_read(tdc,
-                                                 TEGRA_APBDMA_CHAN_WCOUNT);
-       }
  
-       clk_disable_unprepare(tdma->dma_clk);
+       clk_disable(tdma->dma_clk);
  
        return 0;
  }
  
- static int tegra_dma_runtime_resume(struct device *dev)
+ static int __maybe_unused tegra_dma_runtime_resume(struct device *dev)
  {
        struct tegra_dma *tdma = dev_get_drvdata(dev);
-       int i, ret;
  
-       ret = clk_prepare_enable(tdma->dma_clk);
-       if (ret < 0) {
-               dev_err(dev, "clk_enable failed: %d\n", ret);
-               return ret;
-       }
+       return clk_enable(tdma->dma_clk);
+ }
  
-       tdma_write(tdma, TEGRA_APBDMA_GENERAL, tdma->reg_gen);
-       tdma_write(tdma, TEGRA_APBDMA_CONTROL, 0);
-       tdma_write(tdma, TEGRA_APBDMA_IRQ_MASK_SET, 0xFFFFFFFFul);
+ static int __maybe_unused tegra_dma_dev_suspend(struct device *dev)
+ {
+       struct tegra_dma *tdma = dev_get_drvdata(dev);
+       unsigned long flags;
+       unsigned int i;
+       bool busy;
  
        for (i = 0; i < tdma->chip_data->nr_channels; i++) {
                struct tegra_dma_channel *tdc = &tdma->channels[i];
-               struct tegra_dma_channel_regs *ch_reg = &tdc->channel_reg;
-               /* Only restore the state of DMA channels that are in use */
-               if (!tdc->config_init)
-                       continue;
-               if (tdma->chip_data->support_separate_wcount_reg)
-                       tdc_write(tdc, TEGRA_APBDMA_CHAN_WCOUNT,
-                                 ch_reg->wcount);
-               tdc_write(tdc, TEGRA_APBDMA_CHAN_APBSEQ, ch_reg->apb_seq);
-               tdc_write(tdc, TEGRA_APBDMA_CHAN_APBPTR, ch_reg->apb_ptr);
-               tdc_write(tdc, TEGRA_APBDMA_CHAN_AHBSEQ, ch_reg->ahb_seq);
-               tdc_write(tdc, TEGRA_APBDMA_CHAN_AHBPTR, ch_reg->ahb_ptr);
-               tdc_write(tdc, TEGRA_APBDMA_CHAN_CSR,
-                       (ch_reg->csr & ~TEGRA_APBDMA_CSR_ENB));
+               tasklet_kill(&tdc->tasklet);
+               spin_lock_irqsave(&tdc->lock, flags);
+               busy = tdc->busy;
+               spin_unlock_irqrestore(&tdc->lock, flags);
+               if (busy) {
+                       dev_err(tdma->dev, "channel %u busy\n", i);
+                       return -EBUSY;
+               }
        }
  
-       return 0;
+       return pm_runtime_force_suspend(dev);
+ }
+ static int __maybe_unused tegra_dma_dev_resume(struct device *dev)
+ {
+       struct tegra_dma *tdma = dev_get_drvdata(dev);
+       int err;
+       err = tegra_dma_init_hw(tdma);
+       if (err)
+               return err;
+       return pm_runtime_force_resume(dev);
  }
  
  static const struct dev_pm_ops tegra_dma_dev_pm_ops = {
        SET_RUNTIME_PM_OPS(tegra_dma_runtime_suspend, tegra_dma_runtime_resume,
                           NULL)
-       SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
-                               pm_runtime_force_resume)
+       SET_SYSTEM_SLEEP_PM_OPS(tegra_dma_dev_suspend, tegra_dma_dev_resume)
  };
  
  static const struct of_device_id tegra_dma_of_match[] = {
@@@ -1668,7 -1691,6 +1687,6 @@@ static struct platform_driver tegra_dma
  
  module_platform_driver(tegra_dmac_driver);
  
- MODULE_ALIAS("platform:tegra20-apbdma");
  MODULE_DESCRIPTION("NVIDIA Tegra APB DMA Controller driver");
  MODULE_AUTHOR("Laxman Dewangan <ldewangan@nvidia.com>");
  MODULE_LICENSE("GPL v2");
@@@ -32,6 -32,7 +32,7 @@@ struct k3_udma_glue_common 
        bool epib;
        u32  psdata_size;
        u32  swdata_size;
+       u32  atype;
  };
  
  struct k3_udma_glue_tx_channel {
@@@ -121,6 -122,15 +122,15 @@@ static int of_k3_udma_glue_parse_chn(st
                return -ENOENT;
  
        thread_id = dma_spec.args[0];
+       if (dma_spec.args_count == 2) {
+               if (dma_spec.args[1] > 2) {
+                       dev_err(common->dev, "Invalid channel atype: %u\n",
+                               dma_spec.args[1]);
+                       ret = -EINVAL;
+                       goto out_put_spec;
+               }
+               common->atype = dma_spec.args[1];
+       }
  
        if (tx_chn && !(thread_id & K3_PSIL_DST_THREAD_ID_OFFSET)) {
                ret = -EINVAL;
@@@ -202,7 -212,8 +212,8 @@@ static int k3_udma_glue_cfg_tx_chn(stru
                        TI_SCI_MSG_VALUE_RM_UDMAP_CH_CHAN_TYPE_VALID |
                        TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_SUPR_TDPKT_VALID |
                        TI_SCI_MSG_VALUE_RM_UDMAP_CH_FETCH_SIZE_VALID |
-                       TI_SCI_MSG_VALUE_RM_UDMAP_CH_CQ_QNUM_VALID;
+                       TI_SCI_MSG_VALUE_RM_UDMAP_CH_CQ_QNUM_VALID |
+                       TI_SCI_MSG_VALUE_RM_UDMAP_CH_ATYPE_VALID;
        req.nav_id = tisci_rm->tisci_dev_id;
        req.index = tx_chn->udma_tchan_id;
        if (tx_chn->tx_pause_on_err)
                req.tx_supr_tdpkt = 1;
        req.tx_fetch_size = tx_chn->common.hdesc_size >> 2;
        req.txcq_qnum = k3_ringacc_get_ring_id(tx_chn->ringtxcq);
+       req.tx_atype = tx_chn->common.atype;
  
        return tisci_rm->tisci_udmap_ops->tx_ch_cfg(tisci_rm->tisci, &req);
  }
@@@ -502,7 -514,8 +514,8 @@@ static int k3_udma_glue_cfg_rx_chn(stru
                           TI_SCI_MSG_VALUE_RM_UDMAP_CH_CQ_QNUM_VALID |
                           TI_SCI_MSG_VALUE_RM_UDMAP_CH_CHAN_TYPE_VALID |
                           TI_SCI_MSG_VALUE_RM_UDMAP_CH_RX_FLOWID_START_VALID |
-                          TI_SCI_MSG_VALUE_RM_UDMAP_CH_RX_FLOWID_CNT_VALID;
+                          TI_SCI_MSG_VALUE_RM_UDMAP_CH_RX_FLOWID_CNT_VALID |
+                          TI_SCI_MSG_VALUE_RM_UDMAP_CH_ATYPE_VALID;
  
        req.nav_id = tisci_rm->tisci_dev_id;
        req.index = rx_chn->udma_rchan_id;
                req.flowid_cnt = rx_chn->flow_num;
        }
        req.rx_chan_type = TI_SCI_RM_UDMAP_CHAN_TYPE_PKT_PBRR;
+       req.rx_atype = rx_chn->common.atype;
  
        ret = tisci_rm->tisci_udmap_ops->rx_ch_cfg(tisci_rm->tisci, &req);
        if (ret)
@@@ -564,12 -578,12 +578,12 @@@ static int k3_udma_glue_cfg_rx_flow(str
        if (IS_ERR(flow->udma_rflow)) {
                ret = PTR_ERR(flow->udma_rflow);
                dev_err(dev, "UDMAX rflow get err %d\n", ret);
 -              goto err;
 +              return ret;
        }
  
        if (flow->udma_rflow_id != xudma_rflow_get_id(flow->udma_rflow)) {
 -              xudma_rflow_put(rx_chn->common.udmax, flow->udma_rflow);
 -              return -ENODEV;
 +              ret = -ENODEV;
 +              goto err_rflow_put;
        }
  
        /* request and cfg rings */
        if (!flow->ringrx) {
                ret = -ENODEV;
                dev_err(dev, "Failed to get RX ring\n");
 -              goto err;
 +              goto err_rflow_put;
        }
  
        flow->ringrxfdq = k3_ringacc_request_ring(rx_chn->common.ringacc,
        if (!flow->ringrxfdq) {
                ret = -ENODEV;
                dev_err(dev, "Failed to get RXFDQ ring\n");
 -              goto err;
 +              goto err_ringrx_free;
        }
  
        ret = k3_ringacc_ring_cfg(flow->ringrx, &flow_cfg->rx_cfg);
        if (ret) {
                dev_err(dev, "Failed to cfg ringrx %d\n", ret);
 -              goto err;
 +              goto err_ringrxfdq_free;
        }
  
        ret = k3_ringacc_ring_cfg(flow->ringrxfdq, &flow_cfg->rxfdq_cfg);
        if (ret) {
                dev_err(dev, "Failed to cfg ringrxfdq %d\n", ret);
 -              goto err;
 +              goto err_ringrxfdq_free;
        }
  
        if (rx_chn->remote) {
        if (ret) {
                dev_err(dev, "flow%d config failed: %d\n", flow->udma_rflow_id,
                        ret);
 -              goto err;
 +              goto err_ringrxfdq_free;
        }
  
        rx_chn->flows_ready++;
                flow->udma_rflow_id, rx_chn->flows_ready);
  
        return 0;
 -err:
 -      k3_udma_glue_release_rx_flow(rx_chn, flow_idx);
 +
 +err_ringrxfdq_free:
 +      k3_ringacc_ring_free(flow->ringrxfdq);
 +
 +err_ringrx_free:
 +      k3_ringacc_ring_free(flow->ringrx);
 +
 +err_rflow_put:
 +      xudma_rflow_put(rx_chn->common.udmax, flow->udma_rflow);
 +      flow->udma_rflow = NULL;
 +
        return ret;
  }
  
diff --combined drivers/dma/ti/k3-udma.c
@@@ -5,7 -5,6 +5,7 @@@
   */
  
  #include <linux/kernel.h>
 +#include <linux/delay.h>
  #include <linux/dmaengine.h>
  #include <linux/dma-mapping.h>
  #include <linux/dmapool.h>
@@@ -97,24 -96,6 +97,24 @@@ struct udma_match_data 
        u32 level_start_idx[];
  };
  
 +struct udma_hwdesc {
 +      size_t cppi5_desc_size;
 +      void *cppi5_desc_vaddr;
 +      dma_addr_t cppi5_desc_paddr;
 +
 +      /* TR descriptor internal pointers */
 +      void *tr_req_base;
 +      struct cppi5_tr_resp_t *tr_resp_base;
 +};
 +
 +struct udma_rx_flush {
 +      struct udma_hwdesc hwdescs[2];
 +
 +      size_t buffer_size;
 +      void *buffer_vaddr;
 +      dma_addr_t buffer_paddr;
 +};
 +
  struct udma_dev {
        struct dma_device ddev;
        struct device *dev;
        struct list_head desc_to_purge;
        spinlock_t lock;
  
 +      struct udma_rx_flush rx_flush;
 +
        int tchan_cnt;
        int echan_cnt;
        int rchan_cnt;
  
        struct udma_chan *channels;
        u32 psil_base;
+       u32 atype;
  };
  
 -struct udma_hwdesc {
 -      size_t cppi5_desc_size;
 -      void *cppi5_desc_vaddr;
 -      dma_addr_t cppi5_desc_paddr;
 -
 -      /* TR descriptor internal pointers */
 -      void *tr_req_base;
 -      struct cppi5_tr_resp_t *tr_resp_base;
 -};
 -
  struct udma_desc {
        struct virt_dma_desc vd;
  
@@@ -180,7 -170,7 +181,7 @@@ enum udma_chan_state 
  
  struct udma_tx_drain {
        struct delayed_work work;
 -      unsigned long jiffie;
 +      ktime_t tstamp;
        u32 residue;
  };
  
@@@ -192,6 -182,7 +193,7 @@@ struct udma_chan_config 
        u32 hdesc_size; /* Size of a packet descriptor in packet mode */
        bool notdpkt; /* Suppress sending TDC packet */
        int remote_thread_id;
+       u32 atype;
        u32 src_thread;
        u32 dst_thread;
        enum psil_endpoint_type ep_type;
@@@ -513,7 -504,7 +515,7 @@@ static bool udma_is_chan_paused(struct 
  {
        u32 val, pause_mask;
  
 -      switch (uc->desc->dir) {
 +      switch (uc->config.dir) {
        case DMA_DEV_TO_MEM:
                val = udma_rchanrt_read(uc->rchan,
                                        UDMA_RCHAN_RT_PEER_RT_EN_REG);
@@@ -562,17 -553,12 +564,17 @@@ static void udma_sync_for_device(struc
        }
  }
  
 +static inline dma_addr_t udma_get_rx_flush_hwdesc_paddr(struct udma_chan *uc)
 +{
 +      return uc->ud->rx_flush.hwdescs[uc->config.pkt_mode].cppi5_desc_paddr;
 +}
 +
  static int udma_push_to_ring(struct udma_chan *uc, int idx)
  {
        struct udma_desc *d = uc->desc;
 -
        struct k3_ring *ring = NULL;
 -      int ret = -EINVAL;
 +      dma_addr_t paddr;
 +      int ret;
  
        switch (uc->config.dir) {
        case DMA_DEV_TO_MEM:
                ring = uc->tchan->t_ring;
                break;
        default:
 -              break;
 +              return -EINVAL;
        }
  
 -      if (ring) {
 -              dma_addr_t desc_addr = udma_curr_cppi5_desc_paddr(d, idx);
 +      /* RX flush packet: idx == -1 is only passed in case of DEV_TO_MEM */
 +      if (idx == -1) {
 +              paddr = udma_get_rx_flush_hwdesc_paddr(uc);
 +      } else {
 +              paddr = udma_curr_cppi5_desc_paddr(d, idx);
  
                wmb(); /* Ensure that writes are not moved over this point */
                udma_sync_for_device(uc, idx);
 -              ret = k3_ringacc_ring_push(ring, &desc_addr);
 -              uc->in_ring_cnt++;
        }
  
 +      ret = k3_ringacc_ring_push(ring, &paddr);
 +      if (!ret)
 +              uc->in_ring_cnt++;
 +
        return ret;
  }
  
 +static bool udma_desc_is_rx_flush(struct udma_chan *uc, dma_addr_t addr)
 +{
 +      if (uc->config.dir != DMA_DEV_TO_MEM)
 +              return false;
 +
 +      if (addr == udma_get_rx_flush_hwdesc_paddr(uc))
 +              return true;
 +
 +      return false;
 +}
 +
  static int udma_pop_from_ring(struct udma_chan *uc, dma_addr_t *addr)
  {
        struct k3_ring *ring = NULL;
                if (cppi5_desc_is_tdcm(*addr))
                        return ret;
  
 +              /* Check for flush descriptor */
 +              if (udma_desc_is_rx_flush(uc, *addr))
 +                      return -ENOENT;
 +
                d = udma_udma_desc_from_paddr(uc, *addr);
  
                if (d)
@@@ -926,9 -892,6 +928,9 @@@ static int udma_stop(struct udma_chan *
  
        switch (uc->config.dir) {
        case DMA_DEV_TO_MEM:
 +              if (!uc->cyclic && !uc->desc)
 +                      udma_push_to_ring(uc, -1);
 +
                udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_PEER_RT_EN_REG,
                                   UDMA_PEER_RT_EN_ENABLE |
                                   UDMA_PEER_RT_EN_TEARDOWN);
@@@ -985,10 -948,9 +987,10 @@@ static bool udma_is_desc_really_done(st
        peer_bcnt = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_PEER_BCNT_REG);
        bcnt = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_BCNT_REG);
  
 +      /* Transfer is incomplete, store current residue and time stamp */
        if (peer_bcnt < bcnt) {
                uc->tx_drain.residue = bcnt - peer_bcnt;
 -              uc->tx_drain.jiffie = jiffies;
 +              uc->tx_drain.tstamp = ktime_get();
                return false;
        }
  
@@@ -1001,59 -963,35 +1003,59 @@@ static void udma_check_tx_completion(st
                                            tx_drain.work.work);
        bool desc_done = true;
        u32 residue_diff;
 -      unsigned long jiffie_diff, delay;
 +      ktime_t time_diff;
 +      unsigned long delay;
 +
 +      while (1) {
 +              if (uc->desc) {
 +                      /* Get previous residue and time stamp */
 +                      residue_diff = uc->tx_drain.residue;
 +                      time_diff = uc->tx_drain.tstamp;
 +                      /*
 +                       * Get current residue and time stamp or see if
 +                       * transfer is complete
 +                       */
 +                      desc_done = udma_is_desc_really_done(uc, uc->desc);
 +              }
  
 -      if (uc->desc) {
 -              residue_diff = uc->tx_drain.residue;
 -              jiffie_diff = uc->tx_drain.jiffie;
 -              desc_done = udma_is_desc_really_done(uc, uc->desc);
 -      }
 -
 -      if (!desc_done) {
 -              jiffie_diff = uc->tx_drain.jiffie - jiffie_diff;
 -              residue_diff -= uc->tx_drain.residue;
 -              if (residue_diff) {
 -                      /* Try to guess when we should check next time */
 -                      residue_diff /= jiffie_diff;
 -                      delay = uc->tx_drain.residue / residue_diff / 3;
 -                      if (jiffies_to_msecs(delay) < 5)
 -                              delay = 0;
 -              } else {
 -                      /* No progress, check again in 1 second  */
 -                      delay = HZ;
 +              if (!desc_done) {
 +                      /*
 +                       * Find the time delta and residue delta w.r.t
 +                       * previous poll
 +                       */
 +                      time_diff = ktime_sub(uc->tx_drain.tstamp,
 +                                            time_diff) + 1;
 +                      residue_diff -= uc->tx_drain.residue;
 +                      if (residue_diff) {
 +                              /*
 +                               * Try to guess when we should check
 +                               * next time by calculating rate at
 +                               * which data is being drained at the
 +                               * peer device
 +                               */
 +                              delay = (time_diff / residue_diff) *
 +                                      uc->tx_drain.residue;
 +                      } else {
 +                              /* No progress, check again in 1 second  */
 +                              schedule_delayed_work(&uc->tx_drain.work, HZ);
 +                              break;
 +                      }
 +
 +                      usleep_range(ktime_to_us(delay),
 +                                   ktime_to_us(delay) + 10);
 +                      continue;
                }
  
 -              schedule_delayed_work(&uc->tx_drain.work, delay);
 -      } else if (uc->desc) {
 -              struct udma_desc *d = uc->desc;
 +              if (uc->desc) {
 +                      struct udma_desc *d = uc->desc;
  
 -              uc->bcnt += d->residue;
 -              udma_start(uc);
 -              vchan_cookie_complete(&d->vd);
 +                      uc->bcnt += d->residue;
 +                      udma_start(uc);
 +                      vchan_cookie_complete(&d->vd);
 +                      break;
 +              }
 +
 +              break;
        }
  }
  
@@@ -1097,27 -1035,29 +1099,27 @@@ static irqreturn_t udma_ring_irq_handle
                        goto out;
                }
  
 -              if (uc->cyclic) {
 -                      /* push the descriptor back to the ring */
 -                      if (d == uc->desc) {
 +              if (d == uc->desc) {
 +                      /* active descriptor */
 +                      if (uc->cyclic) {
                                udma_cyclic_packet_elapsed(uc);
                                vchan_cyclic_callback(&d->vd);
 -                      }
 -              } else {
 -                      bool desc_done = false;
 -
 -                      if (d == uc->desc) {
 -                              desc_done = udma_is_desc_really_done(uc, d);
 -
 -                              if (desc_done) {
 +                      } else {
 +                              if (udma_is_desc_really_done(uc, d)) {
                                        uc->bcnt += d->residue;
                                        udma_start(uc);
 +                                      vchan_cookie_complete(&d->vd);
                                } else {
                                        schedule_delayed_work(&uc->tx_drain.work,
                                                              0);
                                }
                        }
 -
 -                      if (desc_done)
 -                              vchan_cookie_complete(&d->vd);
 +              } else {
 +                      /*
 +                       * terminated descriptor, mark the descriptor as
 +                       * completed to update the channel's cookie marker
 +                       */
 +                      dma_cookie_complete(&d->vd.tx);
                }
        }
  out:
@@@ -1569,7 -1509,8 +1571,8 @@@ err_rflow
        TI_SCI_MSG_VALUE_RM_UDMAP_CH_CHAN_TYPE_VALID |          \
        TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_SUPR_TDPKT_VALID |      \
        TI_SCI_MSG_VALUE_RM_UDMAP_CH_FETCH_SIZE_VALID |         \
-       TI_SCI_MSG_VALUE_RM_UDMAP_CH_CQ_QNUM_VALID)
+       TI_SCI_MSG_VALUE_RM_UDMAP_CH_CQ_QNUM_VALID |            \
+       TI_SCI_MSG_VALUE_RM_UDMAP_CH_ATYPE_VALID)
  
  #define TISCI_RCHAN_VALID_PARAMS (                            \
        TI_SCI_MSG_VALUE_RM_UDMAP_CH_PAUSE_ON_ERR_VALID |       \
        TI_SCI_MSG_VALUE_RM_UDMAP_CH_RX_IGNORE_SHORT_VALID |    \
        TI_SCI_MSG_VALUE_RM_UDMAP_CH_RX_IGNORE_LONG_VALID |     \
        TI_SCI_MSG_VALUE_RM_UDMAP_CH_RX_FLOWID_START_VALID |    \
-       TI_SCI_MSG_VALUE_RM_UDMAP_CH_RX_FLOWID_CNT_VALID)
+       TI_SCI_MSG_VALUE_RM_UDMAP_CH_RX_FLOWID_CNT_VALID |      \
+       TI_SCI_MSG_VALUE_RM_UDMAP_CH_ATYPE_VALID)
  
  static int udma_tisci_m2m_channel_config(struct udma_chan *uc)
  {
        req_tx.tx_chan_type = TI_SCI_RM_UDMAP_CHAN_TYPE_3RDP_BCOPY_PBRR;
        req_tx.tx_fetch_size = sizeof(struct cppi5_desc_hdr_t) >> 2;
        req_tx.txcq_qnum = tc_ring;
+       req_tx.tx_atype = ud->atype;
  
        ret = tisci_ops->tx_ch_cfg(tisci_rm->tisci, &req_tx);
        if (ret) {
        req_rx.rx_fetch_size = sizeof(struct cppi5_desc_hdr_t) >> 2;
        req_rx.rxcq_qnum = tc_ring;
        req_rx.rx_chan_type = TI_SCI_RM_UDMAP_CHAN_TYPE_3RDP_BCOPY_PBRR;
+       req_rx.rx_atype = ud->atype;
  
        ret = tisci_ops->rx_ch_cfg(tisci_rm->tisci, &req_rx);
        if (ret)
@@@ -1649,6 -1593,7 +1655,7 @@@ static int udma_tisci_tx_channel_config
        req_tx.tx_supr_tdpkt = uc->config.notdpkt;
        req_tx.tx_fetch_size = fetch_size >> 2;
        req_tx.txcq_qnum = tc_ring;
+       req_tx.tx_atype = uc->config.atype;
  
        ret = tisci_ops->tx_ch_cfg(tisci_rm->tisci, &req_tx);
        if (ret)
@@@ -1685,6 -1630,7 +1692,7 @@@ static int udma_tisci_rx_channel_config
        req_rx.rx_fetch_size =  fetch_size >> 2;
        req_rx.rxcq_qnum = rx_ring;
        req_rx.rx_chan_type = mode;
+       req_rx.rx_atype = uc->config.atype;
  
        ret = tisci_ops->rx_ch_cfg(tisci_rm->tisci, &req_rx);
        if (ret) {
@@@ -2027,81 -1973,36 +2035,81 @@@ static struct udma_desc *udma_alloc_tr_
        return d;
  }
  
 +/**
 + * udma_get_tr_counters - calculate TR counters for a given length
 + * @len: Length of the trasnfer
 + * @align_to: Preferred alignment
 + * @tr0_cnt0: First TR icnt0
 + * @tr0_cnt1: First TR icnt1
 + * @tr1_cnt0: Second (if used) TR icnt0
 + *
 + * For len < SZ_64K only one TR is enough, tr1_cnt0 is not updated
 + * For len >= SZ_64K two TRs are used in a simple way:
 + * First TR: SZ_64K-alignment blocks (tr0_cnt0, tr0_cnt1)
 + * Second TR: the remaining length (tr1_cnt0)
 + *
 + * Returns the number of TRs the length needs (1 or 2)
 + * -EINVAL if the length can not be supported
 + */
 +static int udma_get_tr_counters(size_t len, unsigned long align_to,
 +                              u16 *tr0_cnt0, u16 *tr0_cnt1, u16 *tr1_cnt0)
 +{
 +      if (len < SZ_64K) {
 +              *tr0_cnt0 = len;
 +              *tr0_cnt1 = 1;
 +
 +              return 1;
 +      }
 +
 +      if (align_to > 3)
 +              align_to = 3;
 +
 +realign:
 +      *tr0_cnt0 = SZ_64K - BIT(align_to);
 +      if (len / *tr0_cnt0 >= SZ_64K) {
 +              if (align_to) {
 +                      align_to--;
 +                      goto realign;
 +              }
 +              return -EINVAL;
 +      }
 +
 +      *tr0_cnt1 = len / *tr0_cnt0;
 +      *tr1_cnt0 = len % *tr0_cnt0;
 +
 +      return 2;
 +}
 +
  static struct udma_desc *
  udma_prep_slave_sg_tr(struct udma_chan *uc, struct scatterlist *sgl,
                      unsigned int sglen, enum dma_transfer_direction dir,
                      unsigned long tx_flags, void *context)
  {
 -      enum dma_slave_buswidth dev_width;
        struct scatterlist *sgent;
        struct udma_desc *d;
 -      size_t tr_size;
        struct cppi5_tr_type1_t *tr_req = NULL;
 +      u16 tr0_cnt0, tr0_cnt1, tr1_cnt0;
        unsigned int i;
 -      u32 burst;
 +      size_t tr_size;
 +      int num_tr = 0;
 +      int tr_idx = 0;
  
 -      if (dir == DMA_DEV_TO_MEM) {
 -              dev_width = uc->cfg.src_addr_width;
 -              burst = uc->cfg.src_maxburst;
 -      } else if (dir == DMA_MEM_TO_DEV) {
 -              dev_width = uc->cfg.dst_addr_width;
 -              burst = uc->cfg.dst_maxburst;
 -      } else {
 -              dev_err(uc->ud->dev, "%s: bad direction?\n", __func__);
 +      if (!is_slave_direction(dir)) {
 +              dev_err(uc->ud->dev, "Only slave cyclic is supported\n");
                return NULL;
        }
  
 -      if (!burst)
 -              burst = 1;
 +      /* estimate the number of TRs we will need */
 +      for_each_sg(sgl, sgent, sglen, i) {
 +              if (sg_dma_len(sgent) < SZ_64K)
 +                      num_tr++;
 +              else
 +                      num_tr += 2;
 +      }
  
        /* Now allocate and setup the descriptor. */
        tr_size = sizeof(struct cppi5_tr_type1_t);
 -      d = udma_alloc_tr_desc(uc, tr_size, sglen, dir);
 +      d = udma_alloc_tr_desc(uc, tr_size, num_tr, dir);
        if (!d)
                return NULL;
  
  
        tr_req = d->hwdesc[0].tr_req_base;
        for_each_sg(sgl, sgent, sglen, i) {
 -              d->residue += sg_dma_len(sgent);
 +              dma_addr_t sg_addr = sg_dma_address(sgent);
 +
 +              num_tr = udma_get_tr_counters(sg_dma_len(sgent), __ffs(sg_addr),
 +                                            &tr0_cnt0, &tr0_cnt1, &tr1_cnt0);
 +              if (num_tr < 0) {
 +                      dev_err(uc->ud->dev, "size %u is not supported\n",
 +                              sg_dma_len(sgent));
 +                      udma_free_hwdesc(uc, d);
 +                      kfree(d);
 +                      return NULL;
 +              }
  
                cppi5_tr_init(&tr_req[i].flags, CPPI5_TR_TYPE1, false, false,
                              CPPI5_TR_EVENT_SIZE_COMPLETION, 0);
                cppi5_tr_csf_set(&tr_req[i].flags, CPPI5_TR_CSF_SUPR_EVT);
  
 -              tr_req[i].addr = sg_dma_address(sgent);
 -              tr_req[i].icnt0 = burst * dev_width;
 -              tr_req[i].dim1 = burst * dev_width;
 -              tr_req[i].icnt1 = sg_dma_len(sgent) / tr_req[i].icnt0;
 +              tr_req[tr_idx].addr = sg_addr;
 +              tr_req[tr_idx].icnt0 = tr0_cnt0;
 +              tr_req[tr_idx].icnt1 = tr0_cnt1;
 +              tr_req[tr_idx].dim1 = tr0_cnt0;
 +              tr_idx++;
 +
 +              if (num_tr == 2) {
 +                      cppi5_tr_init(&tr_req[tr_idx].flags, CPPI5_TR_TYPE1,
 +                                    false, false,
 +                                    CPPI5_TR_EVENT_SIZE_COMPLETION, 0);
 +                      cppi5_tr_csf_set(&tr_req[tr_idx].flags,
 +                                       CPPI5_TR_CSF_SUPR_EVT);
 +
 +                      tr_req[tr_idx].addr = sg_addr + tr0_cnt1 * tr0_cnt0;
 +                      tr_req[tr_idx].icnt0 = tr1_cnt0;
 +                      tr_req[tr_idx].icnt1 = 1;
 +                      tr_req[tr_idx].dim1 = tr1_cnt0;
 +                      tr_idx++;
 +              }
 +
 +              d->residue += sg_dma_len(sgent);
        }
  
 -      cppi5_tr_csf_set(&tr_req[i - 1].flags, CPPI5_TR_CSF_EOP);
 +      cppi5_tr_csf_set(&tr_req[tr_idx - 1].flags, CPPI5_TR_CSF_EOP);
  
        return d;
  }
@@@ -2453,66 -2327,47 +2461,66 @@@ udma_prep_dma_cyclic_tr(struct udma_cha
                        size_t buf_len, size_t period_len,
                        enum dma_transfer_direction dir, unsigned long flags)
  {
 -      enum dma_slave_buswidth dev_width;
        struct udma_desc *d;
 -      size_t tr_size;
 +      size_t tr_size, period_addr;
        struct cppi5_tr_type1_t *tr_req;
 -      unsigned int i;
        unsigned int periods = buf_len / period_len;
 -      u32 burst;
 +      u16 tr0_cnt0, tr0_cnt1, tr1_cnt0;
 +      unsigned int i;
 +      int num_tr;
  
 -      if (dir == DMA_DEV_TO_MEM) {
 -              dev_width = uc->cfg.src_addr_width;
 -              burst = uc->cfg.src_maxburst;
 -      } else if (dir == DMA_MEM_TO_DEV) {
 -              dev_width = uc->cfg.dst_addr_width;
 -              burst = uc->cfg.dst_maxburst;
 -      } else {
 -              dev_err(uc->ud->dev, "%s: bad direction?\n", __func__);
 +      if (!is_slave_direction(dir)) {
 +              dev_err(uc->ud->dev, "Only slave cyclic is supported\n");
                return NULL;
        }
  
 -      if (!burst)
 -              burst = 1;
 +      num_tr = udma_get_tr_counters(period_len, __ffs(buf_addr), &tr0_cnt0,
 +                                    &tr0_cnt1, &tr1_cnt0);
 +      if (num_tr < 0) {
 +              dev_err(uc->ud->dev, "size %zu is not supported\n",
 +                      period_len);
 +              return NULL;
 +      }
  
        /* Now allocate and setup the descriptor. */
        tr_size = sizeof(struct cppi5_tr_type1_t);
 -      d = udma_alloc_tr_desc(uc, tr_size, periods, dir);
 +      d = udma_alloc_tr_desc(uc, tr_size, periods * num_tr, dir);
        if (!d)
                return NULL;
  
        tr_req = d->hwdesc[0].tr_req_base;
 +      period_addr = buf_addr;
        for (i = 0; i < periods; i++) {
 -              cppi5_tr_init(&tr_req[i].flags, CPPI5_TR_TYPE1, false, false,
 -                            CPPI5_TR_EVENT_SIZE_COMPLETION, 0);
 +              int tr_idx = i * num_tr;
  
 -              tr_req[i].addr = buf_addr + period_len * i;
 -              tr_req[i].icnt0 = dev_width;
 -              tr_req[i].icnt1 = period_len / dev_width;
 -              tr_req[i].dim1 = dev_width;
 +              cppi5_tr_init(&tr_req[tr_idx].flags, CPPI5_TR_TYPE1, false,
 +                            false, CPPI5_TR_EVENT_SIZE_COMPLETION, 0);
 +
 +              tr_req[tr_idx].addr = period_addr;
 +              tr_req[tr_idx].icnt0 = tr0_cnt0;
 +              tr_req[tr_idx].icnt1 = tr0_cnt1;
 +              tr_req[tr_idx].dim1 = tr0_cnt0;
 +
 +              if (num_tr == 2) {
 +                      cppi5_tr_csf_set(&tr_req[tr_idx].flags,
 +                                       CPPI5_TR_CSF_SUPR_EVT);
 +                      tr_idx++;
 +
 +                      cppi5_tr_init(&tr_req[tr_idx].flags, CPPI5_TR_TYPE1,
 +                                    false, false,
 +                                    CPPI5_TR_EVENT_SIZE_COMPLETION, 0);
 +
 +                      tr_req[tr_idx].addr = period_addr + tr0_cnt1 * tr0_cnt0;
 +                      tr_req[tr_idx].icnt0 = tr1_cnt0;
 +                      tr_req[tr_idx].icnt1 = 1;
 +                      tr_req[tr_idx].dim1 = tr1_cnt0;
 +              }
  
                if (!(flags & DMA_PREP_INTERRUPT))
 -                      cppi5_tr_csf_set(&tr_req[i].flags,
 +                      cppi5_tr_csf_set(&tr_req[tr_idx].flags,
                                         CPPI5_TR_CSF_SUPR_EVT);
 +
 +              period_addr += period_len;
        }
  
        return d;
@@@ -2670,12 -2525,29 +2678,12 @@@ udma_prep_dma_memcpy(struct dma_chan *c
                return NULL;
        }
  
 -      if (len < SZ_64K) {
 -              num_tr = 1;
 -              tr0_cnt0 = len;
 -              tr0_cnt1 = 1;
 -      } else {
 -              unsigned long align_to = __ffs(src | dest);
 -
 -              if (align_to > 3)
 -                      align_to = 3;
 -              /*
 -               * Keep simple: tr0: SZ_64K-alignment blocks,
 -               *              tr1: the remaining
 -               */
 -              num_tr = 2;
 -              tr0_cnt0 = (SZ_64K - BIT(align_to));
 -              if (len / tr0_cnt0 >= SZ_64K) {
 -                      dev_err(uc->ud->dev, "size %zu is not supported\n",
 -                              len);
 -                      return NULL;
 -              }
 -
 -              tr0_cnt1 = len / tr0_cnt0;
 -              tr1_cnt0 = len % tr0_cnt0;
 +      num_tr = udma_get_tr_counters(len, __ffs(src | dest), &tr0_cnt0,
 +                                    &tr0_cnt1, &tr1_cnt0);
 +      if (num_tr < 0) {
 +              dev_err(uc->ud->dev, "size %zu is not supported\n",
 +                      len);
 +              return NULL;
        }
  
        d = udma_alloc_tr_desc(uc, tr_size, num_tr, DMA_MEM_TO_MEM);
@@@ -2767,9 -2639,6 +2775,9 @@@ static enum dma_status udma_tx_status(s
  
        ret = dma_cookie_status(chan, cookie, txstate);
  
 +      if (!udma_is_chan_running(uc))
 +              ret = DMA_COMPLETE;
 +
        if (ret == DMA_IN_PROGRESS && udma_is_chan_paused(uc))
                ret = DMA_PAUSED;
  
@@@ -2836,8 -2705,11 +2844,8 @@@ static int udma_pause(struct dma_chan *
  {
        struct udma_chan *uc = to_udma_chan(chan);
  
 -      if (!uc->desc)
 -              return -EINVAL;
 -
        /* pause the channel */
 -      switch (uc->desc->dir) {
 +      switch (uc->config.dir) {
        case DMA_DEV_TO_MEM:
                udma_rchanrt_update_bits(uc->rchan,
                                         UDMA_RCHAN_RT_PEER_RT_EN_REG,
@@@ -2866,8 -2738,11 +2874,8 @@@ static int udma_resume(struct dma_chan 
  {
        struct udma_chan *uc = to_udma_chan(chan);
  
 -      if (!uc->desc)
 -              return -EINVAL;
 -
        /* resume the channel */
 -      switch (uc->desc->dir) {
 +      switch (uc->config.dir) {
        case DMA_DEV_TO_MEM:
                udma_rchanrt_update_bits(uc->rchan,
                                         UDMA_RCHAN_RT_PEER_RT_EN_REG,
@@@ -3063,13 -2938,18 +3071,18 @@@ static void udma_free_chan_resources(st
  
  static struct platform_driver udma_driver;
  
+ struct udma_filter_param {
+       int remote_thread_id;
+       u32 atype;
+ };
  static bool udma_dma_filter_fn(struct dma_chan *chan, void *param)
  {
        struct udma_chan_config *ucc;
        struct psil_endpoint_config *ep_config;
+       struct udma_filter_param *filter_param;
        struct udma_chan *uc;
        struct udma_dev *ud;
-       u32 *args;
  
        if (chan->device->dev->driver != &udma_driver.driver)
                return false;
        uc = to_udma_chan(chan);
        ucc = &uc->config;
        ud = uc->ud;
-       args = param;
+       filter_param = param;
+       if (filter_param->atype > 2) {
+               dev_err(ud->dev, "Invalid channel atype: %u\n",
+                       filter_param->atype);
+               return false;
+       }
  
-       ucc->remote_thread_id = args[0];
+       ucc->remote_thread_id = filter_param->remote_thread_id;
+       ucc->atype = filter_param->atype;
  
        if (ucc->remote_thread_id & K3_PSIL_DST_THREAD_ID_OFFSET)
                ucc->dir = DMA_MEM_TO_DEV;
                        ucc->remote_thread_id);
                ucc->dir = DMA_MEM_TO_MEM;
                ucc->remote_thread_id = -1;
+               ucc->atype = 0;
                return false;
        }
  
@@@ -3130,13 -3018,20 +3151,20 @@@ static struct dma_chan *udma_of_xlate(s
  {
        struct udma_dev *ud = ofdma->of_dma_data;
        dma_cap_mask_t mask = ud->ddev.cap_mask;
+       struct udma_filter_param filter_param;
        struct dma_chan *chan;
  
-       if (dma_spec->args_count != 1)
+       if (dma_spec->args_count != 1 && dma_spec->args_count != 2)
                return NULL;
  
-       chan = __dma_request_channel(&mask, udma_dma_filter_fn,
-                                    &dma_spec->args[0], ofdma->of_node);
+       filter_param.remote_thread_id = dma_spec->args[0];
+       if (dma_spec->args_count == 2)
+               filter_param.atype = dma_spec->args[1];
+       else
+               filter_param.atype = 0;
+       chan = __dma_request_channel(&mask, udma_dma_filter_fn, &filter_param,
+                                    ofdma->of_node);
        if (!chan) {
                dev_err(ud->dev, "get channel fail in %s.\n", __func__);
                return ERR_PTR(-EINVAL);
@@@ -3381,98 -3276,66 +3409,158 @@@ static int udma_setup_resources(struct 
        return ch_count;
  }
  
 +static int udma_setup_rx_flush(struct udma_dev *ud)
 +{
 +      struct udma_rx_flush *rx_flush = &ud->rx_flush;
 +      struct cppi5_desc_hdr_t *tr_desc;
 +      struct cppi5_tr_type1_t *tr_req;
 +      struct cppi5_host_desc_t *desc;
 +      struct device *dev = ud->dev;
 +      struct udma_hwdesc *hwdesc;
 +      size_t tr_size;
 +
 +      /* Allocate 1K buffer for discarded data on RX channel teardown */
 +      rx_flush->buffer_size = SZ_1K;
 +      rx_flush->buffer_vaddr = devm_kzalloc(dev, rx_flush->buffer_size,
 +                                            GFP_KERNEL);
 +      if (!rx_flush->buffer_vaddr)
 +              return -ENOMEM;
 +
 +      rx_flush->buffer_paddr = dma_map_single(dev, rx_flush->buffer_vaddr,
 +                                              rx_flush->buffer_size,
 +                                              DMA_TO_DEVICE);
 +      if (dma_mapping_error(dev, rx_flush->buffer_paddr))
 +              return -ENOMEM;
 +
 +      /* Set up descriptor to be used for TR mode */
 +      hwdesc = &rx_flush->hwdescs[0];
 +      tr_size = sizeof(struct cppi5_tr_type1_t);
 +      hwdesc->cppi5_desc_size = cppi5_trdesc_calc_size(tr_size, 1);
 +      hwdesc->cppi5_desc_size = ALIGN(hwdesc->cppi5_desc_size,
 +                                      ud->desc_align);
 +
 +      hwdesc->cppi5_desc_vaddr = devm_kzalloc(dev, hwdesc->cppi5_desc_size,
 +                                              GFP_KERNEL);
 +      if (!hwdesc->cppi5_desc_vaddr)
 +              return -ENOMEM;
 +
 +      hwdesc->cppi5_desc_paddr = dma_map_single(dev, hwdesc->cppi5_desc_vaddr,
 +                                                hwdesc->cppi5_desc_size,
 +                                                DMA_TO_DEVICE);
 +      if (dma_mapping_error(dev, hwdesc->cppi5_desc_paddr))
 +              return -ENOMEM;
 +
 +      /* Start of the TR req records */
 +      hwdesc->tr_req_base = hwdesc->cppi5_desc_vaddr + tr_size;
 +      /* Start address of the TR response array */
 +      hwdesc->tr_resp_base = hwdesc->tr_req_base + tr_size;
 +
 +      tr_desc = hwdesc->cppi5_desc_vaddr;
 +      cppi5_trdesc_init(tr_desc, 1, tr_size, 0, 0);
 +      cppi5_desc_set_pktids(tr_desc, 0, CPPI5_INFO1_DESC_FLOWID_DEFAULT);
 +      cppi5_desc_set_retpolicy(tr_desc, 0, 0);
 +
 +      tr_req = hwdesc->tr_req_base;
 +      cppi5_tr_init(&tr_req->flags, CPPI5_TR_TYPE1, false, false,
 +                    CPPI5_TR_EVENT_SIZE_COMPLETION, 0);
 +      cppi5_tr_csf_set(&tr_req->flags, CPPI5_TR_CSF_SUPR_EVT);
 +
 +      tr_req->addr = rx_flush->buffer_paddr;
 +      tr_req->icnt0 = rx_flush->buffer_size;
 +      tr_req->icnt1 = 1;
 +
 +      /* Set up descriptor to be used for packet mode */
 +      hwdesc = &rx_flush->hwdescs[1];
 +      hwdesc->cppi5_desc_size = ALIGN(sizeof(struct cppi5_host_desc_t) +
 +                                      CPPI5_INFO0_HDESC_EPIB_SIZE +
 +                                      CPPI5_INFO0_HDESC_PSDATA_MAX_SIZE,
 +                                      ud->desc_align);
 +
 +      hwdesc->cppi5_desc_vaddr = devm_kzalloc(dev, hwdesc->cppi5_desc_size,
 +                                              GFP_KERNEL);
 +      if (!hwdesc->cppi5_desc_vaddr)
 +              return -ENOMEM;
 +
 +      hwdesc->cppi5_desc_paddr = dma_map_single(dev, hwdesc->cppi5_desc_vaddr,
 +                                                hwdesc->cppi5_desc_size,
 +                                                DMA_TO_DEVICE);
 +      if (dma_mapping_error(dev, hwdesc->cppi5_desc_paddr))
 +              return -ENOMEM;
 +
 +      desc = hwdesc->cppi5_desc_vaddr;
 +      cppi5_hdesc_init(desc, 0, 0);
 +      cppi5_desc_set_pktids(&desc->hdr, 0, CPPI5_INFO1_DESC_FLOWID_DEFAULT);
 +      cppi5_desc_set_retpolicy(&desc->hdr, 0, 0);
 +
 +      cppi5_hdesc_attach_buf(desc,
 +                             rx_flush->buffer_paddr, rx_flush->buffer_size,
 +                             rx_flush->buffer_paddr, rx_flush->buffer_size);
 +
 +      dma_sync_single_for_device(dev, hwdesc->cppi5_desc_paddr,
 +                                 hwdesc->cppi5_desc_size, DMA_TO_DEVICE);
 +      return 0;
 +}
 +
+ #ifdef CONFIG_DEBUG_FS
+ static void udma_dbg_summary_show_chan(struct seq_file *s,
+                                      struct dma_chan *chan)
+ {
+       struct udma_chan *uc = to_udma_chan(chan);
+       struct udma_chan_config *ucc = &uc->config;
+       seq_printf(s, " %-13s| %s", dma_chan_name(chan),
+                  chan->dbg_client_name ?: "in-use");
+       seq_printf(s, " (%s, ", dmaengine_get_direction_text(uc->config.dir));
+       switch (uc->config.dir) {
+       case DMA_MEM_TO_MEM:
+               seq_printf(s, "chan%d pair [0x%04x -> 0x%04x], ", uc->tchan->id,
+                          ucc->src_thread, ucc->dst_thread);
+               break;
+       case DMA_DEV_TO_MEM:
+               seq_printf(s, "rchan%d [0x%04x -> 0x%04x], ", uc->rchan->id,
+                          ucc->src_thread, ucc->dst_thread);
+               break;
+       case DMA_MEM_TO_DEV:
+               seq_printf(s, "tchan%d [0x%04x -> 0x%04x], ", uc->tchan->id,
+                          ucc->src_thread, ucc->dst_thread);
+               break;
+       default:
+               seq_printf(s, ")\n");
+               return;
+       }
+       if (ucc->ep_type == PSIL_EP_NATIVE) {
+               seq_printf(s, "PSI-L Native");
+               if (ucc->metadata_size) {
+                       seq_printf(s, "[%s", ucc->needs_epib ? " EPIB" : "");
+                       if (ucc->psd_size)
+                               seq_printf(s, " PSDsize:%u", ucc->psd_size);
+                       seq_printf(s, " ]");
+               }
+       } else {
+               seq_printf(s, "PDMA");
+               if (ucc->enable_acc32 || ucc->enable_burst)
+                       seq_printf(s, "[%s%s ]",
+                                  ucc->enable_acc32 ? " ACC32" : "",
+                                  ucc->enable_burst ? " BURST" : "");
+       }
+       seq_printf(s, ", %s)\n", ucc->pkt_mode ? "Packet mode" : "TR mode");
+ }
+ static void udma_dbg_summary_show(struct seq_file *s,
+                                 struct dma_device *dma_dev)
+ {
+       struct dma_chan *chan;
+       list_for_each_entry(chan, &dma_dev->channels, device_node) {
+               if (chan->client_count)
+                       udma_dbg_summary_show_chan(s, chan);
+       }
+ }
+ #endif /* CONFIG_DEBUG_FS */
  #define TI_UDMAC_BUSWIDTHS    (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
                                 BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
                                 BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \
@@@ -3519,6 -3382,12 +3607,12 @@@ static int udma_probe(struct platform_d
                return ret;
        }
  
+       ret = of_property_read_u32(navss_node, "ti,udma-atype", &ud->atype);
+       if (!ret && ud->atype > 2) {
+               dev_err(dev, "Invalid atype: %u\n", ud->atype);
+               return -EINVAL;
+       }
        ud->tisci_rm.tisci_udmap_ops = &ud->tisci_rm.tisci->ops.rm_udmap_ops;
        ud->tisci_rm.tisci_psil_ops = &ud->tisci_rm.tisci->ops.rm_psil_ops;
  
        ud->ddev.device_resume = udma_resume;
        ud->ddev.device_terminate_all = udma_terminate_all;
        ud->ddev.device_synchronize = udma_synchronize;
+ #ifdef CONFIG_DEBUG_FS
+       ud->ddev.dbg_summary_show = udma_dbg_summary_show;
+ #endif
  
        ud->ddev.device_free_chan_resources = udma_free_chan_resources;
        ud->ddev.src_addr_widths = TI_UDMAC_BUSWIDTHS;
        if (ud->desc_align < dma_get_cache_alignment())
                ud->desc_align = dma_get_cache_alignment();
  
 +      ret = udma_setup_rx_flush(ud);
 +      if (ret)
 +              return ret;
 +
        for (i = 0; i < ud->tchan_cnt; i++) {
                struct udma_tchan *tchan = &ud->tchans[i];