Merge branch 'drm-next-3.12' of git://people.freedesktop.org/~agd5f/linux into drm...
authorDave Airlie <airlied@redhat.com>
Sun, 1 Sep 2013 23:31:40 +0000 (09:31 +1000)
committerDave Airlie <airlied@redhat.com>
Sun, 1 Sep 2013 23:31:40 +0000 (09:31 +1000)
Alex writes:
This is the radeon drm-next request.  Big changes include:
- support for dpm on CIK parts
- support for ASPM on CIK parts
- support for berlin GPUs
- major ring handling cleanup
- remove the old 3D blit code for bo moves in favor of CP DMA or sDMA
- lots of bug fixes

[airlied: fix up a bunch of conflicts from drm_order removal]

* 'drm-next-3.12' of git://people.freedesktop.org/~agd5f/linux: (898 commits)
  drm/radeon/dpm: make sure dc performance level limits are valid (CI)
  drm/radeon/dpm: make sure dc performance level limits are valid (BTC-SI) (v2)
  drm/radeon: gcc fixes for extended dpm tables
  drm/radeon: gcc fixes for kb/kv dpm
  drm/radeon: gcc fixes for ci dpm
  drm/radeon: gcc fixes for si dpm
  drm/radeon: gcc fixes for ni dpm
  drm/radeon: gcc fixes for trinity dpm
  drm/radeon: gcc fixes for sumo dpm
  drm/radeonn: gcc fixes for rv7xx/eg/btc dpm
  drm/radeon: gcc fixes for rv6xx dpm
  drm/radeon: gcc fixes for radeon_atombios.c
  drm/radeon: enable UVD interrupts on CIK
  drm/radeon: fix init ordering for r600+
  drm/radeon/dpm: only need to reprogram uvd if uvd pg is enabled
  drm/radeon: check the return value of uvd_v1_0_start in uvd_v1_0_init
  drm/radeon: split out radeon_uvd_resume from uvd_v4_2_resume
  radeon kms: fix uninitialised hotplug work usage in r100_irq_process()
  drm/radeon/audio: set up the sads on DCE3.2 asics
  drm/radeon: fix handling of variable sized arrays for router objects
  ...

Conflicts:
drivers/gpu/drm/i915/i915_dma.c
drivers/gpu/drm/i915/i915_gem_dmabuf.c
drivers/gpu/drm/i915/intel_pm.c
drivers/gpu/drm/radeon/cik.c
drivers/gpu/drm/radeon/ni.c
drivers/gpu/drm/radeon/r600.c

33 files changed:
1  2 
drivers/gpu/drm/ast/ast_ttm.c
drivers/gpu/drm/cirrus/cirrus_ttm.c
drivers/gpu/drm/drm_edid.c
drivers/gpu/drm/gma500/psb_intel_sdvo.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_panel.c
drivers/gpu/drm/i915/intel_pm.c
drivers/gpu/drm/i915/intel_ringbuffer.c
drivers/gpu/drm/mgag200/mgag200_ttm.c
drivers/gpu/drm/nouveau/core/subdev/mc/base.c
drivers/gpu/drm/nouveau/dispnv04/crtc.c
drivers/gpu/drm/nouveau/nouveau_bo.c
drivers/gpu/drm/nouveau/nouveau_display.c
drivers/gpu/drm/radeon/atombios_dp.c
drivers/gpu/drm/radeon/cik.c
drivers/gpu/drm/radeon/cik_sdma.c
drivers/gpu/drm/radeon/evergreen.c
drivers/gpu/drm/radeon/ni.c
drivers/gpu/drm/radeon/ni_dma.c
drivers/gpu/drm/radeon/r100.c
drivers/gpu/drm/radeon/r600.c
drivers/gpu/drm/radeon/r600_dma.c
drivers/gpu/drm/radeon/radeon.h
drivers/gpu/drm/radeon/radeon_device.c
drivers/gpu/drm/radeon/radeon_display.c
drivers/gpu/drm/radeon/radeon_drv.c
drivers/gpu/drm/radeon/radeon_kms.c
drivers/gpu/drm/radeon/si.c
drivers/gpu/drm/radeon/uvd_v1_0.c
include/drm/drm_edid.h
include/drm/drm_pciids.h

@@@ -323,7 -321,9 +323,8 @@@ int ast_bo_create(struct drm_device *de
                return ret;
        }
  
 -      astbo->gem.driver_private = NULL;
        astbo->bo.bdev = &ast->ttm.bdev;
+       astbo->bo.bdev->dev_mapping = dev->dev_mapping;
  
        ast_ttm_placement(astbo, TTM_PL_FLAG_VRAM | TTM_PL_FLAG_SYSTEM);
  
@@@ -328,7 -326,9 +328,8 @@@ int cirrus_bo_create(struct drm_device 
                return ret;
        }
  
 -      cirrusbo->gem.driver_private = NULL;
        cirrusbo->bo.bdev = &cirrus->ttm.bdev;
+       cirrusbo->bo.bdev->dev_mapping = dev->dev_mapping;
  
        cirrus_ttm_placement(cirrusbo, TTM_PL_FLAG_VRAM | TTM_PL_FLAG_SYSTEM);
  
Simple merge
@@@ -1749,14 -1582,12 +1749,15 @@@ void i915_queue_hangcheck(struct drm_de
  void i915_handle_error(struct drm_device *dev, bool wedged);
  
  extern void intel_irq_init(struct drm_device *dev);
+ extern void intel_pm_init(struct drm_device *dev);
  extern void intel_hpd_init(struct drm_device *dev);
 -extern void intel_gt_init(struct drm_device *dev);
 -extern void intel_gt_sanitize(struct drm_device *dev);
 +extern void intel_pm_init(struct drm_device *dev);
  
 -void i915_error_state_free(struct kref *error_ref);
 +extern void intel_uncore_sanitize(struct drm_device *dev);
 +extern void intel_uncore_early_sanitize(struct drm_device *dev);
 +extern void intel_uncore_init(struct drm_device *dev);
 +extern void intel_uncore_clear_errors(struct drm_device *dev);
 +extern void intel_uncore_check_errors(struct drm_device *dev);
  
  void
  i915_enable_pipestat(drm_i915_private_t *dev_priv, int pipe, u32 mask);
Simple merge
@@@ -10702,4 -10196,17 +10720,16 @@@ intel_display_print_error_state(struct 
                err_printf(m, "  POS: %08x\n", error->cursor[i].position);
                err_printf(m, "  BASE: %08x\n", error->cursor[i].base);
        }
+       for (i = 0; i < error->num_transcoders; i++) {
+               err_printf(m, "  CPU transcoder: %c\n",
+                          transcoder_name(error->transcoder[i].cpu_transcoder));
+               err_printf(m, "  CONF: %08x\n", error->transcoder[i].conf);
+               err_printf(m, "  HTOTAL: %08x\n", error->transcoder[i].htotal);
+               err_printf(m, "  HBLANK: %08x\n", error->transcoder[i].hblank);
+               err_printf(m, "  HSYNC: %08x\n", error->transcoder[i].hsync);
+               err_printf(m, "  VTOTAL: %08x\n", error->transcoder[i].vtotal);
+               err_printf(m, "  VBLANK: %08x\n", error->transcoder[i].vblank);
+               err_printf(m, "  VSYNC: %08x\n", error->transcoder[i].vsync);
+       }
  }
 -#endif
Simple merge
Simple merge
@@@ -323,7 -321,9 +323,8 @@@ int mgag200_bo_create(struct drm_devic
                return ret;
        }
  
 -      mgabo->gem.driver_private = NULL;
        mgabo->bo.bdev = &mdev->ttm.bdev;
+       mgabo->bo.bdev->dev_mapping = dev->dev_mapping;
  
        mgag200_ttm_placement(mgabo, TTM_PL_FLAG_VRAM | TTM_PL_FLAG_SYSTEM);
  
Simple merge
Simple merge
Simple merge
index 0000000,8925185..b628606
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,785 +1,785 @@@
 -              rb_bufsz = drm_order(ring->ring_size / 4);
+ /*
+  * Copyright 2013 Advanced Micro Devices, Inc.
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice shall be included in
+  * all copies or substantial portions of the Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+  * OTHER DEALINGS IN THE SOFTWARE.
+  *
+  * Authors: Alex Deucher
+  */
+ #include <linux/firmware.h>
+ #include <drm/drmP.h>
+ #include "radeon.h"
+ #include "radeon_asic.h"
+ #include "cikd.h"
+ /* sdma */
+ #define CIK_SDMA_UCODE_SIZE 1050
+ #define CIK_SDMA_UCODE_VERSION 64
+ u32 cik_gpu_check_soft_reset(struct radeon_device *rdev);
+ /*
+  * sDMA - System DMA
+  * Starting with CIK, the GPU has new asynchronous
+  * DMA engines.  These engines are used for compute
+  * and gfx.  There are two DMA engines (SDMA0, SDMA1)
+  * and each one supports 1 ring buffer used for gfx
+  * and 2 queues used for compute.
+  *
+  * The programming model is very similar to the CP
+  * (ring buffer, IBs, etc.), but sDMA has it's own
+  * packet format that is different from the PM4 format
+  * used by the CP. sDMA supports copying data, writing
+  * embedded data, solid fills, and a number of other
+  * things.  It also has support for tiling/detiling of
+  * buffers.
+  */
+ /**
+  * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
+  *
+  * @rdev: radeon_device pointer
+  * @ib: IB object to schedule
+  *
+  * Schedule an IB in the DMA ring (CIK).
+  */
+ void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
+                             struct radeon_ib *ib)
+ {
+       struct radeon_ring *ring = &rdev->ring[ib->ring];
+       u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
+       if (rdev->wb.enabled) {
+               u32 next_rptr = ring->wptr + 5;
+               while ((next_rptr & 7) != 4)
+                       next_rptr++;
+               next_rptr += 4;
+               radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
+               radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
+               radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
+               radeon_ring_write(ring, 1); /* number of DWs to follow */
+               radeon_ring_write(ring, next_rptr);
+       }
+       /* IB packet must end on a 8 DW boundary */
+       while ((ring->wptr & 7) != 4)
+               radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
+       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
+       radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
+       radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
+       radeon_ring_write(ring, ib->length_dw);
+ }
+ /**
+  * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
+  *
+  * @rdev: radeon_device pointer
+  * @fence: radeon fence object
+  *
+  * Add a DMA fence packet to the ring to write
+  * the fence seq number and DMA trap packet to generate
+  * an interrupt if needed (CIK).
+  */
+ void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
+                             struct radeon_fence *fence)
+ {
+       struct radeon_ring *ring = &rdev->ring[fence->ring];
+       u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
+       u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
+                         SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
+       u32 ref_and_mask;
+       if (fence->ring == R600_RING_TYPE_DMA_INDEX)
+               ref_and_mask = SDMA0;
+       else
+               ref_and_mask = SDMA1;
+       /* write the fence */
+       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
+       radeon_ring_write(ring, addr & 0xffffffff);
+       radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+       radeon_ring_write(ring, fence->seq);
+       /* generate an interrupt */
+       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
+       /* flush HDP */
+       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
+       radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
+       radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
+       radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
+       radeon_ring_write(ring, ref_and_mask); /* MASK */
+       radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
+ }
+ /**
+  * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
+  *
+  * @rdev: radeon_device pointer
+  * @ring: radeon_ring structure holding ring information
+  * @semaphore: radeon semaphore object
+  * @emit_wait: wait or signal semaphore
+  *
+  * Add a DMA semaphore packet to the ring wait on or signal
+  * other rings (CIK).
+  */
+ void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
+                                 struct radeon_ring *ring,
+                                 struct radeon_semaphore *semaphore,
+                                 bool emit_wait)
+ {
+       u64 addr = semaphore->gpu_addr;
+       u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
+       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
+       radeon_ring_write(ring, addr & 0xfffffff8);
+       radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+ }
+ /**
+  * cik_sdma_gfx_stop - stop the gfx async dma engines
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Stop the gfx async dma ring buffers (CIK).
+  */
+ static void cik_sdma_gfx_stop(struct radeon_device *rdev)
+ {
+       u32 rb_cntl, reg_offset;
+       int i;
+       radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
+       for (i = 0; i < 2; i++) {
+               if (i == 0)
+                       reg_offset = SDMA0_REGISTER_OFFSET;
+               else
+                       reg_offset = SDMA1_REGISTER_OFFSET;
+               rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
+               rb_cntl &= ~SDMA_RB_ENABLE;
+               WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
+               WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
+       }
+ }
+ /**
+  * cik_sdma_rlc_stop - stop the compute async dma engines
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Stop the compute async dma queues (CIK).
+  */
+ static void cik_sdma_rlc_stop(struct radeon_device *rdev)
+ {
+       /* XXX todo */
+ }
+ /**
+  * cik_sdma_enable - stop the async dma engines
+  *
+  * @rdev: radeon_device pointer
+  * @enable: enable/disable the DMA MEs.
+  *
+  * Halt or unhalt the async dma engines (CIK).
+  */
+ void cik_sdma_enable(struct radeon_device *rdev, bool enable)
+ {
+       u32 me_cntl, reg_offset;
+       int i;
+       for (i = 0; i < 2; i++) {
+               if (i == 0)
+                       reg_offset = SDMA0_REGISTER_OFFSET;
+               else
+                       reg_offset = SDMA1_REGISTER_OFFSET;
+               me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
+               if (enable)
+                       me_cntl &= ~SDMA_HALT;
+               else
+                       me_cntl |= SDMA_HALT;
+               WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
+       }
+ }
+ /**
+  * cik_sdma_gfx_resume - setup and start the async dma engines
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Set up the gfx DMA ring buffers and enable them (CIK).
+  * Returns 0 for success, error for failure.
+  */
+ static int cik_sdma_gfx_resume(struct radeon_device *rdev)
+ {
+       struct radeon_ring *ring;
+       u32 rb_cntl, ib_cntl;
+       u32 rb_bufsz;
+       u32 reg_offset, wb_offset;
+       int i, r;
+       for (i = 0; i < 2; i++) {
+               if (i == 0) {
+                       ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+                       reg_offset = SDMA0_REGISTER_OFFSET;
+                       wb_offset = R600_WB_DMA_RPTR_OFFSET;
+               } else {
+                       ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
+                       reg_offset = SDMA1_REGISTER_OFFSET;
+                       wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
+               }
+               WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
+               WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
+               /* Set ring buffer size in dwords */
++              rb_bufsz = order_base_2(ring->ring_size / 4);
+               rb_cntl = rb_bufsz << 1;
+ #ifdef __BIG_ENDIAN
+               rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
+ #endif
+               WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
+               /* Initialize the ring buffer's read and write pointers */
+               WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
+               WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
+               /* set the wb address whether it's enabled or not */
+               WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
+                      upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
+               WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
+                      ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
+               if (rdev->wb.enabled)
+                       rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
+               WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
+               WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
+               ring->wptr = 0;
+               WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
+               ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
+               /* enable DMA RB */
+               WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
+               ib_cntl = SDMA_IB_ENABLE;
+ #ifdef __BIG_ENDIAN
+               ib_cntl |= SDMA_IB_SWAP_ENABLE;
+ #endif
+               /* enable DMA IBs */
+               WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
+               ring->ready = true;
+               r = radeon_ring_test(rdev, ring->idx, ring);
+               if (r) {
+                       ring->ready = false;
+                       return r;
+               }
+       }
+       radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
+       return 0;
+ }
+ /**
+  * cik_sdma_rlc_resume - setup and start the async dma engines
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Set up the compute DMA queues and enable them (CIK).
+  * Returns 0 for success, error for failure.
+  */
+ static int cik_sdma_rlc_resume(struct radeon_device *rdev)
+ {
+       /* XXX todo */
+       return 0;
+ }
+ /**
+  * cik_sdma_load_microcode - load the sDMA ME ucode
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Loads the sDMA0/1 ucode.
+  * Returns 0 for success, -EINVAL if the ucode is not available.
+  */
+ static int cik_sdma_load_microcode(struct radeon_device *rdev)
+ {
+       const __be32 *fw_data;
+       int i;
+       if (!rdev->sdma_fw)
+               return -EINVAL;
+       /* stop the gfx rings and rlc compute queues */
+       cik_sdma_gfx_stop(rdev);
+       cik_sdma_rlc_stop(rdev);
+       /* halt the MEs */
+       cik_sdma_enable(rdev, false);
+       /* sdma0 */
+       fw_data = (const __be32 *)rdev->sdma_fw->data;
+       WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
+       for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
+               WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
+       WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
+       /* sdma1 */
+       fw_data = (const __be32 *)rdev->sdma_fw->data;
+       WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
+       for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
+               WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
+       WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
+       WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
+       WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
+       return 0;
+ }
+ /**
+  * cik_sdma_resume - setup and start the async dma engines
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Set up the DMA engines and enable them (CIK).
+  * Returns 0 for success, error for failure.
+  */
+ int cik_sdma_resume(struct radeon_device *rdev)
+ {
+       int r;
+       /* Reset dma */
+       WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
+       RREG32(SRBM_SOFT_RESET);
+       udelay(50);
+       WREG32(SRBM_SOFT_RESET, 0);
+       RREG32(SRBM_SOFT_RESET);
+       r = cik_sdma_load_microcode(rdev);
+       if (r)
+               return r;
+       /* unhalt the MEs */
+       cik_sdma_enable(rdev, true);
+       /* start the gfx rings and rlc compute queues */
+       r = cik_sdma_gfx_resume(rdev);
+       if (r)
+               return r;
+       r = cik_sdma_rlc_resume(rdev);
+       if (r)
+               return r;
+       return 0;
+ }
+ /**
+  * cik_sdma_fini - tear down the async dma engines
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Stop the async dma engines and free the rings (CIK).
+  */
+ void cik_sdma_fini(struct radeon_device *rdev)
+ {
+       /* stop the gfx rings and rlc compute queues */
+       cik_sdma_gfx_stop(rdev);
+       cik_sdma_rlc_stop(rdev);
+       /* halt the MEs */
+       cik_sdma_enable(rdev, false);
+       radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
+       radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
+       /* XXX - compute dma queue tear down */
+ }
+ /**
+  * cik_copy_dma - copy pages using the DMA engine
+  *
+  * @rdev: radeon_device pointer
+  * @src_offset: src GPU address
+  * @dst_offset: dst GPU address
+  * @num_gpu_pages: number of GPU pages to xfer
+  * @fence: radeon fence object
+  *
+  * Copy GPU paging using the DMA engine (CIK).
+  * Used by the radeon ttm implementation to move pages if
+  * registered as the asic copy callback.
+  */
+ int cik_copy_dma(struct radeon_device *rdev,
+                uint64_t src_offset, uint64_t dst_offset,
+                unsigned num_gpu_pages,
+                struct radeon_fence **fence)
+ {
+       struct radeon_semaphore *sem = NULL;
+       int ring_index = rdev->asic->copy.dma_ring_index;
+       struct radeon_ring *ring = &rdev->ring[ring_index];
+       u32 size_in_bytes, cur_size_in_bytes;
+       int i, num_loops;
+       int r = 0;
+       r = radeon_semaphore_create(rdev, &sem);
+       if (r) {
+               DRM_ERROR("radeon: moving bo (%d).\n", r);
+               return r;
+       }
+       size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
+       num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
+       r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
+       if (r) {
+               DRM_ERROR("radeon: moving bo (%d).\n", r);
+               radeon_semaphore_free(rdev, &sem, NULL);
+               return r;
+       }
+       if (radeon_fence_need_sync(*fence, ring->idx)) {
+               radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
+                                           ring->idx);
+               radeon_fence_note_sync(*fence, ring->idx);
+       } else {
+               radeon_semaphore_free(rdev, &sem, NULL);
+       }
+       for (i = 0; i < num_loops; i++) {
+               cur_size_in_bytes = size_in_bytes;
+               if (cur_size_in_bytes > 0x1fffff)
+                       cur_size_in_bytes = 0x1fffff;
+               size_in_bytes -= cur_size_in_bytes;
+               radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
+               radeon_ring_write(ring, cur_size_in_bytes);
+               radeon_ring_write(ring, 0); /* src/dst endian swap */
+               radeon_ring_write(ring, src_offset & 0xffffffff);
+               radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
+               radeon_ring_write(ring, dst_offset & 0xfffffffc);
+               radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
+               src_offset += cur_size_in_bytes;
+               dst_offset += cur_size_in_bytes;
+       }
+       r = radeon_fence_emit(rdev, fence, ring->idx);
+       if (r) {
+               radeon_ring_unlock_undo(rdev, ring);
+               return r;
+       }
+       radeon_ring_unlock_commit(rdev, ring);
+       radeon_semaphore_free(rdev, &sem, *fence);
+       return r;
+ }
+ /**
+  * cik_sdma_ring_test - simple async dma engine test
+  *
+  * @rdev: radeon_device pointer
+  * @ring: radeon_ring structure holding ring information
+  *
+  * Test the DMA engine by writing using it to write an
+  * value to memory. (CIK).
+  * Returns 0 for success, error for failure.
+  */
+ int cik_sdma_ring_test(struct radeon_device *rdev,
+                      struct radeon_ring *ring)
+ {
+       unsigned i;
+       int r;
+       void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
+       u32 tmp;
+       if (!ptr) {
+               DRM_ERROR("invalid vram scratch pointer\n");
+               return -EINVAL;
+       }
+       tmp = 0xCAFEDEAD;
+       writel(tmp, ptr);
+       r = radeon_ring_lock(rdev, ring, 4);
+       if (r) {
+               DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
+               return r;
+       }
+       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
+       radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
+       radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
+       radeon_ring_write(ring, 1); /* number of DWs to follow */
+       radeon_ring_write(ring, 0xDEADBEEF);
+       radeon_ring_unlock_commit(rdev, ring);
+       for (i = 0; i < rdev->usec_timeout; i++) {
+               tmp = readl(ptr);
+               if (tmp == 0xDEADBEEF)
+                       break;
+               DRM_UDELAY(1);
+       }
+       if (i < rdev->usec_timeout) {
+               DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
+       } else {
+               DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
+                         ring->idx, tmp);
+               r = -EINVAL;
+       }
+       return r;
+ }
+ /**
+  * cik_sdma_ib_test - test an IB on the DMA engine
+  *
+  * @rdev: radeon_device pointer
+  * @ring: radeon_ring structure holding ring information
+  *
+  * Test a simple IB in the DMA ring (CIK).
+  * Returns 0 on success, error on failure.
+  */
+ int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
+ {
+       struct radeon_ib ib;
+       unsigned i;
+       int r;
+       void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
+       u32 tmp = 0;
+       if (!ptr) {
+               DRM_ERROR("invalid vram scratch pointer\n");
+               return -EINVAL;
+       }
+       tmp = 0xCAFEDEAD;
+       writel(tmp, ptr);
+       r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
+       if (r) {
+               DRM_ERROR("radeon: failed to get ib (%d).\n", r);
+               return r;
+       }
+       ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
+       ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
+       ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
+       ib.ptr[3] = 1;
+       ib.ptr[4] = 0xDEADBEEF;
+       ib.length_dw = 5;
+       r = radeon_ib_schedule(rdev, &ib, NULL);
+       if (r) {
+               radeon_ib_free(rdev, &ib);
+               DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
+               return r;
+       }
+       r = radeon_fence_wait(ib.fence, false);
+       if (r) {
+               DRM_ERROR("radeon: fence wait failed (%d).\n", r);
+               return r;
+       }
+       for (i = 0; i < rdev->usec_timeout; i++) {
+               tmp = readl(ptr);
+               if (tmp == 0xDEADBEEF)
+                       break;
+               DRM_UDELAY(1);
+       }
+       if (i < rdev->usec_timeout) {
+               DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
+       } else {
+               DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
+               r = -EINVAL;
+       }
+       radeon_ib_free(rdev, &ib);
+       return r;
+ }
+ /**
+  * cik_sdma_is_lockup - Check if the DMA engine is locked up
+  *
+  * @rdev: radeon_device pointer
+  * @ring: radeon_ring structure holding ring information
+  *
+  * Check if the async DMA engine is locked up (CIK).
+  * Returns true if the engine appears to be locked up, false if not.
+  */
+ bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
+ {
+       u32 reset_mask = cik_gpu_check_soft_reset(rdev);
+       u32 mask;
+       if (ring->idx == R600_RING_TYPE_DMA_INDEX)
+               mask = RADEON_RESET_DMA;
+       else
+               mask = RADEON_RESET_DMA1;
+       if (!(reset_mask & mask)) {
+               radeon_ring_lockup_update(ring);
+               return false;
+       }
+       /* force ring activities */
+       radeon_ring_force_activity(rdev, ring);
+       return radeon_ring_test_lockup(rdev, ring);
+ }
+ /**
+  * cik_sdma_vm_set_page - update the page tables using sDMA
+  *
+  * @rdev: radeon_device pointer
+  * @ib: indirect buffer to fill with commands
+  * @pe: addr of the page entry
+  * @addr: dst addr to write into pe
+  * @count: number of page entries to update
+  * @incr: increase next addr by incr bytes
+  * @flags: access flags
+  *
+  * Update the page tables using sDMA (CIK).
+  */
+ void cik_sdma_vm_set_page(struct radeon_device *rdev,
+                         struct radeon_ib *ib,
+                         uint64_t pe,
+                         uint64_t addr, unsigned count,
+                         uint32_t incr, uint32_t flags)
+ {
+       uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
+       uint64_t value;
+       unsigned ndw;
+       if (flags & RADEON_VM_PAGE_SYSTEM) {
+               while (count) {
+                       ndw = count * 2;
+                       if (ndw > 0xFFFFE)
+                               ndw = 0xFFFFE;
+                       /* for non-physically contiguous pages (system) */
+                       ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
+                       ib->ptr[ib->length_dw++] = pe;
+                       ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+                       ib->ptr[ib->length_dw++] = ndw;
+                       for (; ndw > 0; ndw -= 2, --count, pe += 8) {
+                               if (flags & RADEON_VM_PAGE_SYSTEM) {
+                                       value = radeon_vm_map_gart(rdev, addr);
+                                       value &= 0xFFFFFFFFFFFFF000ULL;
+                               } else if (flags & RADEON_VM_PAGE_VALID) {
+                                       value = addr;
+                               } else {
+                                       value = 0;
+                               }
+                               addr += incr;
+                               value |= r600_flags;
+                               ib->ptr[ib->length_dw++] = value;
+                               ib->ptr[ib->length_dw++] = upper_32_bits(value);
+                       }
+               }
+       } else {
+               while (count) {
+                       ndw = count;
+                       if (ndw > 0x7FFFF)
+                               ndw = 0x7FFFF;
+                       if (flags & RADEON_VM_PAGE_VALID)
+                               value = addr;
+                       else
+                               value = 0;
+                       /* for physically contiguous pages (vram) */
+                       ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
+                       ib->ptr[ib->length_dw++] = pe; /* dst addr */
+                       ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+                       ib->ptr[ib->length_dw++] = r600_flags; /* mask */
+                       ib->ptr[ib->length_dw++] = 0;
+                       ib->ptr[ib->length_dw++] = value; /* value */
+                       ib->ptr[ib->length_dw++] = upper_32_bits(value);
+                       ib->ptr[ib->length_dw++] = incr; /* increment size */
+                       ib->ptr[ib->length_dw++] = 0;
+                       ib->ptr[ib->length_dw++] = ndw; /* number of entries */
+                       pe += ndw * 8;
+                       addr += ndw * incr;
+                       count -= ndw;
+               }
+       }
+       while (ib->length_dw & 0x7)
+               ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
+ }
+ /**
+  * cik_dma_vm_flush - cik vm flush using sDMA
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Update the page table base and flush the VM TLB
+  * using sDMA (CIK).
+  */
+ void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
+ {
+       struct radeon_ring *ring = &rdev->ring[ridx];
+       u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
+                         SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
+       u32 ref_and_mask;
+       if (vm == NULL)
+               return;
+       if (ridx == R600_RING_TYPE_DMA_INDEX)
+               ref_and_mask = SDMA0;
+       else
+               ref_and_mask = SDMA1;
+       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
+       if (vm->id < 8) {
+               radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
+       } else {
+               radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
+       }
+       radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
+       /* update SH_MEM_* regs */
+       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
+       radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
+       radeon_ring_write(ring, VMID(vm->id));
+       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
+       radeon_ring_write(ring, SH_MEM_BASES >> 2);
+       radeon_ring_write(ring, 0);
+       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
+       radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
+       radeon_ring_write(ring, 0);
+       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
+       radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
+       radeon_ring_write(ring, 1);
+       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
+       radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
+       radeon_ring_write(ring, 0);
+       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
+       radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
+       radeon_ring_write(ring, VMID(0));
+       /* flush HDP */
+       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
+       radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
+       radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
+       radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
+       radeon_ring_write(ring, ref_and_mask); /* MASK */
+       radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
+       /* flush TLB */
+       radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
+       radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
+       radeon_ring_write(ring, 1 << vm->id);
+ }
Simple merge
Simple merge
index 0000000,0f3c0ba..dd6e968
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,338 +1,338 @@@
 -              rb_bufsz = drm_order(ring->ring_size / 4);
+ /*
+  * Copyright 2010 Advanced Micro Devices, Inc.
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice shall be included in
+  * all copies or substantial portions of the Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+  * OTHER DEALINGS IN THE SOFTWARE.
+  *
+  * Authors: Alex Deucher
+  */
+ #include <drm/drmP.h>
+ #include "radeon.h"
+ #include "radeon_asic.h"
+ #include "nid.h"
+ u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev);
+ /*
+  * DMA
+  * Starting with R600, the GPU has an asynchronous
+  * DMA engine.  The programming model is very similar
+  * to the 3D engine (ring buffer, IBs, etc.), but the
+  * DMA controller has it's own packet format that is
+  * different form the PM4 format used by the 3D engine.
+  * It supports copying data, writing embedded data,
+  * solid fills, and a number of other things.  It also
+  * has support for tiling/detiling of buffers.
+  * Cayman and newer support two asynchronous DMA engines.
+  */
+ /**
+  * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine
+  *
+  * @rdev: radeon_device pointer
+  * @ib: IB object to schedule
+  *
+  * Schedule an IB in the DMA ring (cayman-SI).
+  */
+ void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
+                               struct radeon_ib *ib)
+ {
+       struct radeon_ring *ring = &rdev->ring[ib->ring];
+       if (rdev->wb.enabled) {
+               u32 next_rptr = ring->wptr + 4;
+               while ((next_rptr & 7) != 5)
+                       next_rptr++;
+               next_rptr += 3;
+               radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
+               radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
+               radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
+               radeon_ring_write(ring, next_rptr);
+       }
+       /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
+        * Pad as necessary with NOPs.
+        */
+       while ((ring->wptr & 7) != 5)
+               radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+       radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0));
+       radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
+       radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
+ }
+ /**
+  * cayman_dma_stop - stop the async dma engines
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Stop the async dma engines (cayman-SI).
+  */
+ void cayman_dma_stop(struct radeon_device *rdev)
+ {
+       u32 rb_cntl;
+       radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
+       /* dma0 */
+       rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
+       rb_cntl &= ~DMA_RB_ENABLE;
+       WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl);
+       /* dma1 */
+       rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
+       rb_cntl &= ~DMA_RB_ENABLE;
+       WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl);
+       rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
+       rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
+ }
+ /**
+  * cayman_dma_resume - setup and start the async dma engines
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Set up the DMA ring buffers and enable them. (cayman-SI).
+  * Returns 0 for success, error for failure.
+  */
+ int cayman_dma_resume(struct radeon_device *rdev)
+ {
+       struct radeon_ring *ring;
+       u32 rb_cntl, dma_cntl, ib_cntl;
+       u32 rb_bufsz;
+       u32 reg_offset, wb_offset;
+       int i, r;
+       /* Reset dma */
+       WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1);
+       RREG32(SRBM_SOFT_RESET);
+       udelay(50);
+       WREG32(SRBM_SOFT_RESET, 0);
+       for (i = 0; i < 2; i++) {
+               if (i == 0) {
+                       ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+                       reg_offset = DMA0_REGISTER_OFFSET;
+                       wb_offset = R600_WB_DMA_RPTR_OFFSET;
+               } else {
+                       ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
+                       reg_offset = DMA1_REGISTER_OFFSET;
+                       wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
+               }
+               WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
+               WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
+               /* Set ring buffer size in dwords */
++              rb_bufsz = order_base_2(ring->ring_size / 4);
+               rb_cntl = rb_bufsz << 1;
+ #ifdef __BIG_ENDIAN
+               rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
+ #endif
+               WREG32(DMA_RB_CNTL + reg_offset, rb_cntl);
+               /* Initialize the ring buffer's read and write pointers */
+               WREG32(DMA_RB_RPTR + reg_offset, 0);
+               WREG32(DMA_RB_WPTR + reg_offset, 0);
+               /* set the wb address whether it's enabled or not */
+               WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset,
+                      upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF);
+               WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset,
+                      ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
+               if (rdev->wb.enabled)
+                       rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
+               WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8);
+               /* enable DMA IBs */
+               ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE;
+ #ifdef __BIG_ENDIAN
+               ib_cntl |= DMA_IB_SWAP_ENABLE;
+ #endif
+               WREG32(DMA_IB_CNTL + reg_offset, ib_cntl);
+               dma_cntl = RREG32(DMA_CNTL + reg_offset);
+               dma_cntl &= ~CTXEMPTY_INT_ENABLE;
+               WREG32(DMA_CNTL + reg_offset, dma_cntl);
+               ring->wptr = 0;
+               WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2);
+               ring->rptr = RREG32(DMA_RB_RPTR + reg_offset) >> 2;
+               WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE);
+               ring->ready = true;
+               r = radeon_ring_test(rdev, ring->idx, ring);
+               if (r) {
+                       ring->ready = false;
+                       return r;
+               }
+       }
+       radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
+       return 0;
+ }
+ /**
+  * cayman_dma_fini - tear down the async dma engines
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Stop the async dma engines and free the rings (cayman-SI).
+  */
+ void cayman_dma_fini(struct radeon_device *rdev)
+ {
+       cayman_dma_stop(rdev);
+       radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
+       radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
+ }
+ /**
+  * cayman_dma_is_lockup - Check if the DMA engine is locked up
+  *
+  * @rdev: radeon_device pointer
+  * @ring: radeon_ring structure holding ring information
+  *
+  * Check if the async DMA engine is locked up.
+  * Returns true if the engine appears to be locked up, false if not.
+  */
+ bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
+ {
+       u32 reset_mask = cayman_gpu_check_soft_reset(rdev);
+       u32 mask;
+       if (ring->idx == R600_RING_TYPE_DMA_INDEX)
+               mask = RADEON_RESET_DMA;
+       else
+               mask = RADEON_RESET_DMA1;
+       if (!(reset_mask & mask)) {
+               radeon_ring_lockup_update(ring);
+               return false;
+       }
+       /* force ring activities */
+       radeon_ring_force_activity(rdev, ring);
+       return radeon_ring_test_lockup(rdev, ring);
+ }
+ /**
+  * cayman_dma_vm_set_page - update the page tables using the DMA
+  *
+  * @rdev: radeon_device pointer
+  * @ib: indirect buffer to fill with commands
+  * @pe: addr of the page entry
+  * @addr: dst addr to write into pe
+  * @count: number of page entries to update
+  * @incr: increase next addr by incr bytes
+  * @flags: access flags
+  * @r600_flags: hw access flags 
+  *
+  * Update the page tables using the DMA (cayman/TN).
+  */
+ void cayman_dma_vm_set_page(struct radeon_device *rdev,
+                           struct radeon_ib *ib,
+                           uint64_t pe,
+                           uint64_t addr, unsigned count,
+                           uint32_t incr, uint32_t flags)
+ {
+       uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
+       uint64_t value;
+       unsigned ndw;
+       if ((flags & RADEON_VM_PAGE_SYSTEM) || (count == 1)) {
+               while (count) {
+                       ndw = count * 2;
+                       if (ndw > 0xFFFFE)
+                               ndw = 0xFFFFE;
+                       /* for non-physically contiguous pages (system) */
+                       ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw);
+                       ib->ptr[ib->length_dw++] = pe;
+                       ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
+                       for (; ndw > 0; ndw -= 2, --count, pe += 8) {
+                               if (flags & RADEON_VM_PAGE_SYSTEM) {
+                                       value = radeon_vm_map_gart(rdev, addr);
+                                       value &= 0xFFFFFFFFFFFFF000ULL;
+                               } else if (flags & RADEON_VM_PAGE_VALID) {
+                                       value = addr;
+                               } else {
+                                       value = 0;
+                               }
+                               addr += incr;
+                               value |= r600_flags;
+                               ib->ptr[ib->length_dw++] = value;
+                               ib->ptr[ib->length_dw++] = upper_32_bits(value);
+                       }
+               }
+       } else {
+               while (count) {
+                       ndw = count * 2;
+                       if (ndw > 0xFFFFE)
+                               ndw = 0xFFFFE;
+                       if (flags & RADEON_VM_PAGE_VALID)
+                               value = addr;
+                       else
+                               value = 0;
+                       /* for physically contiguous pages (vram) */
+                       ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
+                       ib->ptr[ib->length_dw++] = pe; /* dst addr */
+                       ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
+                       ib->ptr[ib->length_dw++] = r600_flags; /* mask */
+                       ib->ptr[ib->length_dw++] = 0;
+                       ib->ptr[ib->length_dw++] = value; /* value */
+                       ib->ptr[ib->length_dw++] = upper_32_bits(value);
+                       ib->ptr[ib->length_dw++] = incr; /* increment size */
+                       ib->ptr[ib->length_dw++] = 0;
+                       pe += ndw * 4;
+                       addr += (ndw / 2) * incr;
+                       count -= ndw / 2;
+               }
+       }
+       while (ib->length_dw & 0x7)
+               ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
+ }
+ void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
+ {
+       struct radeon_ring *ring = &rdev->ring[ridx];
+       if (vm == NULL)
+               return;
+       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
+       radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
+       radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
+       /* flush hdp cache */
+       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
+       radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
+       radeon_ring_write(ring, 1);
+       /* bits 0-7 are the VM contexts0-7 */
+       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
+       radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
+       radeon_ring_write(ring, 1 << vm->id);
+ }
Simple merge
Simple merge
index 0000000,bff0557..3b31745
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,497 +1,497 @@@
 -      rb_bufsz = drm_order(ring->ring_size / 4);
+ /*
+  * Copyright 2013 Advanced Micro Devices, Inc.
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice shall be included in
+  * all copies or substantial portions of the Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+  * OTHER DEALINGS IN THE SOFTWARE.
+  *
+  * Authors: Alex Deucher
+  */
+ #include <drm/drmP.h>
+ #include "radeon.h"
+ #include "radeon_asic.h"
+ #include "r600d.h"
+ u32 r600_gpu_check_soft_reset(struct radeon_device *rdev);
+ /*
+  * DMA
+  * Starting with R600, the GPU has an asynchronous
+  * DMA engine.  The programming model is very similar
+  * to the 3D engine (ring buffer, IBs, etc.), but the
+  * DMA controller has it's own packet format that is
+  * different form the PM4 format used by the 3D engine.
+  * It supports copying data, writing embedded data,
+  * solid fills, and a number of other things.  It also
+  * has support for tiling/detiling of buffers.
+  */
+ /**
+  * r600_dma_get_rptr - get the current read pointer
+  *
+  * @rdev: radeon_device pointer
+  * @ring: radeon ring pointer
+  *
+  * Get the current rptr from the hardware (r6xx+).
+  */
+ uint32_t r600_dma_get_rptr(struct radeon_device *rdev,
+                          struct radeon_ring *ring)
+ {
+       return (radeon_ring_generic_get_rptr(rdev, ring) & 0x3fffc) >> 2;
+ }
+ /**
+  * r600_dma_get_wptr - get the current write pointer
+  *
+  * @rdev: radeon_device pointer
+  * @ring: radeon ring pointer
+  *
+  * Get the current wptr from the hardware (r6xx+).
+  */
+ uint32_t r600_dma_get_wptr(struct radeon_device *rdev,
+                          struct radeon_ring *ring)
+ {
+       return (RREG32(ring->wptr_reg) & 0x3fffc) >> 2;
+ }
+ /**
+  * r600_dma_set_wptr - commit the write pointer
+  *
+  * @rdev: radeon_device pointer
+  * @ring: radeon ring pointer
+  *
+  * Write the wptr back to the hardware (r6xx+).
+  */
+ void r600_dma_set_wptr(struct radeon_device *rdev,
+                      struct radeon_ring *ring)
+ {
+       WREG32(ring->wptr_reg, (ring->wptr << 2) & 0x3fffc);
+ }
+ /**
+  * r600_dma_stop - stop the async dma engine
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Stop the async dma engine (r6xx-evergreen).
+  */
+ void r600_dma_stop(struct radeon_device *rdev)
+ {
+       u32 rb_cntl = RREG32(DMA_RB_CNTL);
+       radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
+       rb_cntl &= ~DMA_RB_ENABLE;
+       WREG32(DMA_RB_CNTL, rb_cntl);
+       rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
+ }
+ /**
+  * r600_dma_resume - setup and start the async dma engine
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Set up the DMA ring buffer and enable it. (r6xx-evergreen).
+  * Returns 0 for success, error for failure.
+  */
+ int r600_dma_resume(struct radeon_device *rdev)
+ {
+       struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+       u32 rb_cntl, dma_cntl, ib_cntl;
+       u32 rb_bufsz;
+       int r;
+       /* Reset dma */
+       if (rdev->family >= CHIP_RV770)
+               WREG32(SRBM_SOFT_RESET, RV770_SOFT_RESET_DMA);
+       else
+               WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA);
+       RREG32(SRBM_SOFT_RESET);
+       udelay(50);
+       WREG32(SRBM_SOFT_RESET, 0);
+       WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL, 0);
+       WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL, 0);
+       /* Set ring buffer size in dwords */
++      rb_bufsz = order_base_2(ring->ring_size / 4);
+       rb_cntl = rb_bufsz << 1;
+ #ifdef __BIG_ENDIAN
+       rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
+ #endif
+       WREG32(DMA_RB_CNTL, rb_cntl);
+       /* Initialize the ring buffer's read and write pointers */
+       WREG32(DMA_RB_RPTR, 0);
+       WREG32(DMA_RB_WPTR, 0);
+       /* set the wb address whether it's enabled or not */
+       WREG32(DMA_RB_RPTR_ADDR_HI,
+              upper_32_bits(rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFF);
+       WREG32(DMA_RB_RPTR_ADDR_LO,
+              ((rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFFFFFFFC));
+       if (rdev->wb.enabled)
+               rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
+       WREG32(DMA_RB_BASE, ring->gpu_addr >> 8);
+       /* enable DMA IBs */
+       ib_cntl = DMA_IB_ENABLE;
+ #ifdef __BIG_ENDIAN
+       ib_cntl |= DMA_IB_SWAP_ENABLE;
+ #endif
+       WREG32(DMA_IB_CNTL, ib_cntl);
+       dma_cntl = RREG32(DMA_CNTL);
+       dma_cntl &= ~CTXEMPTY_INT_ENABLE;
+       WREG32(DMA_CNTL, dma_cntl);
+       if (rdev->family >= CHIP_RV770)
+               WREG32(DMA_MODE, 1);
+       ring->wptr = 0;
+       WREG32(DMA_RB_WPTR, ring->wptr << 2);
+       ring->rptr = RREG32(DMA_RB_RPTR) >> 2;
+       WREG32(DMA_RB_CNTL, rb_cntl | DMA_RB_ENABLE);
+       ring->ready = true;
+       r = radeon_ring_test(rdev, R600_RING_TYPE_DMA_INDEX, ring);
+       if (r) {
+               ring->ready = false;
+               return r;
+       }
+       radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
+       return 0;
+ }
+ /**
+  * r600_dma_fini - tear down the async dma engine
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Stop the async dma engine and free the ring (r6xx-evergreen).
+  */
+ void r600_dma_fini(struct radeon_device *rdev)
+ {
+       r600_dma_stop(rdev);
+       radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
+ }
+ /**
+  * r600_dma_is_lockup - Check if the DMA engine is locked up
+  *
+  * @rdev: radeon_device pointer
+  * @ring: radeon_ring structure holding ring information
+  *
+  * Check if the async DMA engine is locked up.
+  * Returns true if the engine appears to be locked up, false if not.
+  */
+ bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
+ {
+       u32 reset_mask = r600_gpu_check_soft_reset(rdev);
+       if (!(reset_mask & RADEON_RESET_DMA)) {
+               radeon_ring_lockup_update(ring);
+               return false;
+       }
+       /* force ring activities */
+       radeon_ring_force_activity(rdev, ring);
+       return radeon_ring_test_lockup(rdev, ring);
+ }
+ /**
+  * r600_dma_ring_test - simple async dma engine test
+  *
+  * @rdev: radeon_device pointer
+  * @ring: radeon_ring structure holding ring information
+  *
+  * Test the DMA engine by writing using it to write an
+  * value to memory. (r6xx-SI).
+  * Returns 0 for success, error for failure.
+  */
+ int r600_dma_ring_test(struct radeon_device *rdev,
+                      struct radeon_ring *ring)
+ {
+       unsigned i;
+       int r;
+       void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
+       u32 tmp;
+       if (!ptr) {
+               DRM_ERROR("invalid vram scratch pointer\n");
+               return -EINVAL;
+       }
+       tmp = 0xCAFEDEAD;
+       writel(tmp, ptr);
+       r = radeon_ring_lock(rdev, ring, 4);
+       if (r) {
+               DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
+               return r;
+       }
+       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
+       radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
+       radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff);
+       radeon_ring_write(ring, 0xDEADBEEF);
+       radeon_ring_unlock_commit(rdev, ring);
+       for (i = 0; i < rdev->usec_timeout; i++) {
+               tmp = readl(ptr);
+               if (tmp == 0xDEADBEEF)
+                       break;
+               DRM_UDELAY(1);
+       }
+       if (i < rdev->usec_timeout) {
+               DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
+       } else {
+               DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
+                         ring->idx, tmp);
+               r = -EINVAL;
+       }
+       return r;
+ }
+ /**
+  * r600_dma_fence_ring_emit - emit a fence on the DMA ring
+  *
+  * @rdev: radeon_device pointer
+  * @fence: radeon fence object
+  *
+  * Add a DMA fence packet to the ring to write
+  * the fence seq number and DMA trap packet to generate
+  * an interrupt if needed (r6xx-r7xx).
+  */
+ void r600_dma_fence_ring_emit(struct radeon_device *rdev,
+                             struct radeon_fence *fence)
+ {
+       struct radeon_ring *ring = &rdev->ring[fence->ring];
+       u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
+       /* write the fence */
+       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0));
+       radeon_ring_write(ring, addr & 0xfffffffc);
+       radeon_ring_write(ring, (upper_32_bits(addr) & 0xff));
+       radeon_ring_write(ring, lower_32_bits(fence->seq));
+       /* generate an interrupt */
+       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0));
+ }
+ /**
+  * r600_dma_semaphore_ring_emit - emit a semaphore on the dma ring
+  *
+  * @rdev: radeon_device pointer
+  * @ring: radeon_ring structure holding ring information
+  * @semaphore: radeon semaphore object
+  * @emit_wait: wait or signal semaphore
+  *
+  * Add a DMA semaphore packet to the ring wait on or signal
+  * other rings (r6xx-SI).
+  */
+ void r600_dma_semaphore_ring_emit(struct radeon_device *rdev,
+                                 struct radeon_ring *ring,
+                                 struct radeon_semaphore *semaphore,
+                                 bool emit_wait)
+ {
+       u64 addr = semaphore->gpu_addr;
+       u32 s = emit_wait ? 0 : 1;
+       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0));
+       radeon_ring_write(ring, addr & 0xfffffffc);
+       radeon_ring_write(ring, upper_32_bits(addr) & 0xff);
+ }
+ /**
+  * r600_dma_ib_test - test an IB on the DMA engine
+  *
+  * @rdev: radeon_device pointer
+  * @ring: radeon_ring structure holding ring information
+  *
+  * Test a simple IB in the DMA ring (r6xx-SI).
+  * Returns 0 on success, error on failure.
+  */
+ int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
+ {
+       struct radeon_ib ib;
+       unsigned i;
+       int r;
+       void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
+       u32 tmp = 0;
+       if (!ptr) {
+               DRM_ERROR("invalid vram scratch pointer\n");
+               return -EINVAL;
+       }
+       tmp = 0xCAFEDEAD;
+       writel(tmp, ptr);
+       r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
+       if (r) {
+               DRM_ERROR("radeon: failed to get ib (%d).\n", r);
+               return r;
+       }
+       ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1);
+       ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
+       ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff;
+       ib.ptr[3] = 0xDEADBEEF;
+       ib.length_dw = 4;
+       r = radeon_ib_schedule(rdev, &ib, NULL);
+       if (r) {
+               radeon_ib_free(rdev, &ib);
+               DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
+               return r;
+       }
+       r = radeon_fence_wait(ib.fence, false);
+       if (r) {
+               DRM_ERROR("radeon: fence wait failed (%d).\n", r);
+               return r;
+       }
+       for (i = 0; i < rdev->usec_timeout; i++) {
+               tmp = readl(ptr);
+               if (tmp == 0xDEADBEEF)
+                       break;
+               DRM_UDELAY(1);
+       }
+       if (i < rdev->usec_timeout) {
+               DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
+       } else {
+               DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
+               r = -EINVAL;
+       }
+       radeon_ib_free(rdev, &ib);
+       return r;
+ }
+ /**
+  * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine
+  *
+  * @rdev: radeon_device pointer
+  * @ib: IB object to schedule
+  *
+  * Schedule an IB in the DMA ring (r6xx-r7xx).
+  */
+ void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
+ {
+       struct radeon_ring *ring = &rdev->ring[ib->ring];
+       if (rdev->wb.enabled) {
+               u32 next_rptr = ring->wptr + 4;
+               while ((next_rptr & 7) != 5)
+                       next_rptr++;
+               next_rptr += 3;
+               radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
+               radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
+               radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
+               radeon_ring_write(ring, next_rptr);
+       }
+       /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
+        * Pad as necessary with NOPs.
+        */
+       while ((ring->wptr & 7) != 5)
+               radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0));
+       radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
+       radeon_ring_write(ring, (ib->length_dw << 16) | (upper_32_bits(ib->gpu_addr) & 0xFF));
+ }
+ /**
+  * r600_copy_dma - copy pages using the DMA engine
+  *
+  * @rdev: radeon_device pointer
+  * @src_offset: src GPU address
+  * @dst_offset: dst GPU address
+  * @num_gpu_pages: number of GPU pages to xfer
+  * @fence: radeon fence object
+  *
+  * Copy GPU paging using the DMA engine (r6xx).
+  * Used by the radeon ttm implementation to move pages if
+  * registered as the asic copy callback.
+  */
+ int r600_copy_dma(struct radeon_device *rdev,
+                 uint64_t src_offset, uint64_t dst_offset,
+                 unsigned num_gpu_pages,
+                 struct radeon_fence **fence)
+ {
+       struct radeon_semaphore *sem = NULL;
+       int ring_index = rdev->asic->copy.dma_ring_index;
+       struct radeon_ring *ring = &rdev->ring[ring_index];
+       u32 size_in_dw, cur_size_in_dw;
+       int i, num_loops;
+       int r = 0;
+       r = radeon_semaphore_create(rdev, &sem);
+       if (r) {
+               DRM_ERROR("radeon: moving bo (%d).\n", r);
+               return r;
+       }
+       size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
+       num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFE);
+       r = radeon_ring_lock(rdev, ring, num_loops * 4 + 8);
+       if (r) {
+               DRM_ERROR("radeon: moving bo (%d).\n", r);
+               radeon_semaphore_free(rdev, &sem, NULL);
+               return r;
+       }
+       if (radeon_fence_need_sync(*fence, ring->idx)) {
+               radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
+                                           ring->idx);
+               radeon_fence_note_sync(*fence, ring->idx);
+       } else {
+               radeon_semaphore_free(rdev, &sem, NULL);
+       }
+       for (i = 0; i < num_loops; i++) {
+               cur_size_in_dw = size_in_dw;
+               if (cur_size_in_dw > 0xFFFE)
+                       cur_size_in_dw = 0xFFFE;
+               size_in_dw -= cur_size_in_dw;
+               radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw));
+               radeon_ring_write(ring, dst_offset & 0xfffffffc);
+               radeon_ring_write(ring, src_offset & 0xfffffffc);
+               radeon_ring_write(ring, (((upper_32_bits(dst_offset) & 0xff) << 16) |
+                                        (upper_32_bits(src_offset) & 0xff)));
+               src_offset += cur_size_in_dw * 4;
+               dst_offset += cur_size_in_dw * 4;
+       }
+       r = radeon_fence_emit(rdev, fence, ring->idx);
+       if (r) {
+               radeon_ring_unlock_undo(rdev, ring);
+               return r;
+       }
+       radeon_ring_unlock_commit(rdev, ring);
+       radeon_semaphore_free(rdev, &sem, *fence);
+       return r;
+ }
Simple merge
Simple merge
Simple merge
Simple merge
index 0000000,3426be9..7266805
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,436 +1,436 @@@
 -      rb_bufsz = drm_order(ring->ring_size);
+ /*
+  * Copyright 2013 Advanced Micro Devices, Inc.
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice shall be included in
+  * all copies or substantial portions of the Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+  * OTHER DEALINGS IN THE SOFTWARE.
+  *
+  * Authors: Christian König <christian.koenig@amd.com>
+  */
+ #include <drm/drmP.h>
+ #include "radeon.h"
+ #include "radeon_asic.h"
+ #include "r600d.h"
+ /**
+  * uvd_v1_0_get_rptr - get read pointer
+  *
+  * @rdev: radeon_device pointer
+  * @ring: radeon_ring pointer
+  *
+  * Returns the current hardware read pointer
+  */
+ uint32_t uvd_v1_0_get_rptr(struct radeon_device *rdev,
+                          struct radeon_ring *ring)
+ {
+       return RREG32(UVD_RBC_RB_RPTR);
+ }
+ /**
+  * uvd_v1_0_get_wptr - get write pointer
+  *
+  * @rdev: radeon_device pointer
+  * @ring: radeon_ring pointer
+  *
+  * Returns the current hardware write pointer
+  */
+ uint32_t uvd_v1_0_get_wptr(struct radeon_device *rdev,
+                          struct radeon_ring *ring)
+ {
+       return RREG32(UVD_RBC_RB_WPTR);
+ }
+ /**
+  * uvd_v1_0_set_wptr - set write pointer
+  *
+  * @rdev: radeon_device pointer
+  * @ring: radeon_ring pointer
+  *
+  * Commits the write pointer to the hardware
+  */
+ void uvd_v1_0_set_wptr(struct radeon_device *rdev,
+                      struct radeon_ring *ring)
+ {
+       WREG32(UVD_RBC_RB_WPTR, ring->wptr);
+ }
+ /**
+  * uvd_v1_0_init - start and test UVD block
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Initialize the hardware, boot up the VCPU and do some testing
+  */
+ int uvd_v1_0_init(struct radeon_device *rdev)
+ {
+       struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+       uint32_t tmp;
+       int r;
+       /* raise clocks while booting up the VCPU */
+       radeon_set_uvd_clocks(rdev, 53300, 40000);
+       r = uvd_v1_0_start(rdev);
+       if (r)
+               goto done;
+       ring->ready = true;
+       r = radeon_ring_test(rdev, R600_RING_TYPE_UVD_INDEX, ring);
+       if (r) {
+               ring->ready = false;
+               goto done;
+       }
+       r = radeon_ring_lock(rdev, ring, 10);
+       if (r) {
+               DRM_ERROR("radeon: ring failed to lock UVD ring (%d).\n", r);
+               goto done;
+       }
+       tmp = PACKET0(UVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL, 0);
+       radeon_ring_write(ring, tmp);
+       radeon_ring_write(ring, 0xFFFFF);
+       tmp = PACKET0(UVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL, 0);
+       radeon_ring_write(ring, tmp);
+       radeon_ring_write(ring, 0xFFFFF);
+       tmp = PACKET0(UVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL, 0);
+       radeon_ring_write(ring, tmp);
+       radeon_ring_write(ring, 0xFFFFF);
+       /* Clear timeout status bits */
+       radeon_ring_write(ring, PACKET0(UVD_SEMA_TIMEOUT_STATUS, 0));
+       radeon_ring_write(ring, 0x8);
+       radeon_ring_write(ring, PACKET0(UVD_SEMA_CNTL, 0));
+       radeon_ring_write(ring, 3);
+       radeon_ring_unlock_commit(rdev, ring);
+ done:
+       /* lower clocks again */
+       radeon_set_uvd_clocks(rdev, 0, 0);
+       if (!r)
+               DRM_INFO("UVD initialized successfully.\n");
+       return r;
+ }
+ /**
+  * uvd_v1_0_fini - stop the hardware block
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Stop the UVD block, mark ring as not ready any more
+  */
+ void uvd_v1_0_fini(struct radeon_device *rdev)
+ {
+       struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+       uvd_v1_0_stop(rdev);
+       ring->ready = false;
+ }
+ /**
+  * uvd_v1_0_start - start UVD block
+  *
+  * @rdev: radeon_device pointer
+  *
+  * Setup and start the UVD block
+  */
+ int uvd_v1_0_start(struct radeon_device *rdev)
+ {
+       struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+       uint32_t rb_bufsz;
+       int i, j, r;
+       /* disable byte swapping */
+       u32 lmi_swap_cntl = 0;
+       u32 mp_swap_cntl = 0;
+       /* disable clock gating */
+       WREG32(UVD_CGC_GATE, 0);
+       /* disable interupt */
+       WREG32_P(UVD_MASTINT_EN, 0, ~(1 << 1));
+       /* Stall UMC and register bus before resetting VCPU */
+       WREG32_P(UVD_LMI_CTRL2, 1 << 8, ~(1 << 8));
+       WREG32_P(UVD_RB_ARB_CTRL, 1 << 3, ~(1 << 3));
+       mdelay(1);
+       /* put LMI, VCPU, RBC etc... into reset */
+       WREG32(UVD_SOFT_RESET, LMI_SOFT_RESET | VCPU_SOFT_RESET |
+              LBSI_SOFT_RESET | RBC_SOFT_RESET | CSM_SOFT_RESET |
+              CXW_SOFT_RESET | TAP_SOFT_RESET | LMI_UMC_SOFT_RESET);
+       mdelay(5);
+       /* take UVD block out of reset */
+       WREG32_P(SRBM_SOFT_RESET, 0, ~SOFT_RESET_UVD);
+       mdelay(5);
+       /* initialize UVD memory controller */
+       WREG32(UVD_LMI_CTRL, 0x40 | (1 << 8) | (1 << 13) |
+                            (1 << 21) | (1 << 9) | (1 << 20));
+ #ifdef __BIG_ENDIAN
+       /* swap (8 in 32) RB and IB */
+       lmi_swap_cntl = 0xa;
+       mp_swap_cntl = 0;
+ #endif
+       WREG32(UVD_LMI_SWAP_CNTL, lmi_swap_cntl);
+       WREG32(UVD_MP_SWAP_CNTL, mp_swap_cntl);
+       WREG32(UVD_MPC_SET_MUXA0, 0x40c2040);
+       WREG32(UVD_MPC_SET_MUXA1, 0x0);
+       WREG32(UVD_MPC_SET_MUXB0, 0x40c2040);
+       WREG32(UVD_MPC_SET_MUXB1, 0x0);
+       WREG32(UVD_MPC_SET_ALU, 0);
+       WREG32(UVD_MPC_SET_MUX, 0x88);
+       /* take all subblocks out of reset, except VCPU */
+       WREG32(UVD_SOFT_RESET, VCPU_SOFT_RESET);
+       mdelay(5);
+       /* enable VCPU clock */
+       WREG32(UVD_VCPU_CNTL,  1 << 9);
+       /* enable UMC */
+       WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8));
+       /* boot up the VCPU */
+       WREG32(UVD_SOFT_RESET, 0);
+       mdelay(10);
+       WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3));
+       for (i = 0; i < 10; ++i) {
+               uint32_t status;
+               for (j = 0; j < 100; ++j) {
+                       status = RREG32(UVD_STATUS);
+                       if (status & 2)
+                               break;
+                       mdelay(10);
+               }
+               r = 0;
+               if (status & 2)
+                       break;
+               DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n");
+               WREG32_P(UVD_SOFT_RESET, VCPU_SOFT_RESET, ~VCPU_SOFT_RESET);
+               mdelay(10);
+               WREG32_P(UVD_SOFT_RESET, 0, ~VCPU_SOFT_RESET);
+               mdelay(10);
+               r = -1;
+       }
+       if (r) {
+               DRM_ERROR("UVD not responding, giving up!!!\n");
+               return r;
+       }
+       /* enable interupt */
+       WREG32_P(UVD_MASTINT_EN, 3<<1, ~(3 << 1));
+       /* force RBC into idle state */
+       WREG32(UVD_RBC_RB_CNTL, 0x11010101);
+       /* Set the write pointer delay */
+       WREG32(UVD_RBC_RB_WPTR_CNTL, 0);
+       /* programm the 4GB memory segment for rptr and ring buffer */
+       WREG32(UVD_LMI_EXT40_ADDR, upper_32_bits(ring->gpu_addr) |
+                                  (0x7 << 16) | (0x1 << 31));
+       /* Initialize the ring buffer's read and write pointers */
+       WREG32(UVD_RBC_RB_RPTR, 0x0);
+       ring->wptr = ring->rptr = RREG32(UVD_RBC_RB_RPTR);
+       WREG32(UVD_RBC_RB_WPTR, ring->wptr);
+       /* set the ring address */
+       WREG32(UVD_RBC_RB_BASE, ring->gpu_addr);
+       /* Set ring buffer size */
++      rb_bufsz = order_base_2(ring->ring_size);
+       rb_bufsz = (0x1 << 8) | rb_bufsz;
+       WREG32_P(UVD_RBC_RB_CNTL, rb_bufsz, ~0x11f1f);
+       return 0;
+ }
+ /**
+  * uvd_v1_0_stop - stop UVD block
+  *
+  * @rdev: radeon_device pointer
+  *
+  * stop the UVD block
+  */
+ void uvd_v1_0_stop(struct radeon_device *rdev)
+ {
+       /* force RBC into idle state */
+       WREG32(UVD_RBC_RB_CNTL, 0x11010101);
+       /* Stall UMC and register bus before resetting VCPU */
+       WREG32_P(UVD_LMI_CTRL2, 1 << 8, ~(1 << 8));
+       WREG32_P(UVD_RB_ARB_CTRL, 1 << 3, ~(1 << 3));
+       mdelay(1);
+       /* put VCPU into reset */
+       WREG32(UVD_SOFT_RESET, VCPU_SOFT_RESET);
+       mdelay(5);
+       /* disable VCPU clock */
+       WREG32(UVD_VCPU_CNTL, 0x0);
+       /* Unstall UMC and register bus */
+       WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8));
+       WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3));
+ }
+ /**
+  * uvd_v1_0_ring_test - register write test
+  *
+  * @rdev: radeon_device pointer
+  * @ring: radeon_ring pointer
+  *
+  * Test if we can successfully write to the context register
+  */
+ int uvd_v1_0_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
+ {
+       uint32_t tmp = 0;
+       unsigned i;
+       int r;
+       WREG32(UVD_CONTEXT_ID, 0xCAFEDEAD);
+       r = radeon_ring_lock(rdev, ring, 3);
+       if (r) {
+               DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n",
+                         ring->idx, r);
+               return r;
+       }
+       radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0));
+       radeon_ring_write(ring, 0xDEADBEEF);
+       radeon_ring_unlock_commit(rdev, ring);
+       for (i = 0; i < rdev->usec_timeout; i++) {
+               tmp = RREG32(UVD_CONTEXT_ID);
+               if (tmp == 0xDEADBEEF)
+                       break;
+               DRM_UDELAY(1);
+       }
+       if (i < rdev->usec_timeout) {
+               DRM_INFO("ring test on %d succeeded in %d usecs\n",
+                        ring->idx, i);
+       } else {
+               DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
+                         ring->idx, tmp);
+               r = -EINVAL;
+       }
+       return r;
+ }
+ /**
+  * uvd_v1_0_semaphore_emit - emit semaphore command
+  *
+  * @rdev: radeon_device pointer
+  * @ring: radeon_ring pointer
+  * @semaphore: semaphore to emit commands for
+  * @emit_wait: true if we should emit a wait command
+  *
+  * Emit a semaphore command (either wait or signal) to the UVD ring.
+  */
+ void uvd_v1_0_semaphore_emit(struct radeon_device *rdev,
+                            struct radeon_ring *ring,
+                            struct radeon_semaphore *semaphore,
+                            bool emit_wait)
+ {
+       uint64_t addr = semaphore->gpu_addr;
+       radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0));
+       radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF);
+       radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0));
+       radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF);
+       radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0));
+       radeon_ring_write(ring, emit_wait ? 1 : 0);
+ }
+ /**
+  * uvd_v1_0_ib_execute - execute indirect buffer
+  *
+  * @rdev: radeon_device pointer
+  * @ib: indirect buffer to execute
+  *
+  * Write ring commands to execute the indirect buffer
+  */
+ void uvd_v1_0_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
+ {
+       struct radeon_ring *ring = &rdev->ring[ib->ring];
+       radeon_ring_write(ring, PACKET0(UVD_RBC_IB_BASE, 0));
+       radeon_ring_write(ring, ib->gpu_addr);
+       radeon_ring_write(ring, PACKET0(UVD_RBC_IB_SIZE, 0));
+       radeon_ring_write(ring, ib->length_dw);
+ }
+ /**
+  * uvd_v1_0_ib_test - test ib execution
+  *
+  * @rdev: radeon_device pointer
+  * @ring: radeon_ring pointer
+  *
+  * Test if we can successfully execute an IB
+  */
+ int uvd_v1_0_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
+ {
+       struct radeon_fence *fence = NULL;
+       int r;
+       r = radeon_set_uvd_clocks(rdev, 53300, 40000);
+       if (r) {
+               DRM_ERROR("radeon: failed to raise UVD clocks (%d).\n", r);
+               return r;
+       }
+       r = radeon_uvd_get_create_msg(rdev, ring->idx, 1, NULL);
+       if (r) {
+               DRM_ERROR("radeon: failed to get create msg (%d).\n", r);
+               goto error;
+       }
+       r = radeon_uvd_get_destroy_msg(rdev, ring->idx, 1, &fence);
+       if (r) {
+               DRM_ERROR("radeon: failed to get destroy ib (%d).\n", r);
+               goto error;
+       }
+       r = radeon_fence_wait(fence, false);
+       if (r) {
+               DRM_ERROR("radeon: fence wait failed (%d).\n", r);
+               goto error;
+       }
+       DRM_INFO("ib test on ring %d succeeded\n",  ring->idx);
+ error:
+       radeon_fence_unref(&fence);
+       radeon_set_uvd_clocks(rdev, 0, 0);
+       return r;
+ }
Simple merge
Simple merge