drivers/gpu/host1x/dev.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * Tegra host1x driver
   4  *
   5  * Copyright (c) 2010-2013, NVIDIA Corporation.
   6  */
   7
   8 #include <linux/clk.h>
   9 #include <linux/delay.h>
  10 #include <linux/dma-mapping.h>
  11 #include <linux/io.h>
  12 #include <linux/list.h>
  13 #include <linux/module.h>
  14 #include <linux/of_device.h>
  15 #include <linux/of.h>
  16 #include <linux/pm_runtime.h>
  17 #include <linux/slab.h>
  18
  19 #include <soc/tegra/common.h>
  20
  21 #define CREATE_TRACE_POINTS
  22 #include <trace/events/host1x.h>
  23 #undef CREATE_TRACE_POINTS
  24
  25 #if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
  26 #include <asm/dma-iommu.h>
  27 #endif
  28
  29 #include "bus.h"
  30 #include "channel.h"
  31 #include "debug.h"
  32 #include "dev.h"
  33 #include "intr.h"
  34
  35 #include "hw/host1x01.h"
  36 #include "hw/host1x02.h"
  37 #include "hw/host1x04.h"
  38 #include "hw/host1x05.h"
  39 #include "hw/host1x06.h"
  40 #include "hw/host1x07.h"
  41
  42 void host1x_hypervisor_writel(struct host1x *host1x, u32 v, u32 r)
  43 {
  44         writel(v, host1x->hv_regs + r);
  45 }
  46
  47 u32 host1x_hypervisor_readl(struct host1x *host1x, u32 r)
  48 {
  49         return readl(host1x->hv_regs + r);
  50 }
  51
  52 void host1x_sync_writel(struct host1x *host1x, u32 v, u32 r)
  53 {
  54         void __iomem *sync_regs = host1x->regs + host1x->info->sync_offset;
  55
  56         writel(v, sync_regs + r);
  57 }
  58
  59 u32 host1x_sync_readl(struct host1x *host1x, u32 r)
  60 {
  61         void __iomem *sync_regs = host1x->regs + host1x->info->sync_offset;
  62
  63         return readl(sync_regs + r);
  64 }
  65
  66 void host1x_ch_writel(struct host1x_channel *ch, u32 v, u32 r)
  67 {
  68         writel(v, ch->regs + r);
  69 }
  70
  71 u32 host1x_ch_readl(struct host1x_channel *ch, u32 r)
  72 {
  73         return readl(ch->regs + r);
  74 }
  75
  76 static const struct host1x_info host1x01_info = {
  77         .nb_channels = 8,
  78         .nb_pts = 32,
  79         .nb_mlocks = 16,
  80         .nb_bases = 8,
  81         .init = host1x01_init,
  82         .sync_offset = 0x3000,
  83         .dma_mask = DMA_BIT_MASK(32),
  84         .has_wide_gather = false,
  85         .has_hypervisor = false,
  86         .num_sid_entries = 0,
  87         .sid_table = NULL,
  88         .reserve_vblank_syncpts = true,
  89 };
  90
  91 static const struct host1x_info host1x02_info = {
  92         .nb_channels = 9,
  93         .nb_pts = 32,
  94         .nb_mlocks = 16,
  95         .nb_bases = 12,
  96         .init = host1x02_init,
  97         .sync_offset = 0x3000,
  98         .dma_mask = DMA_BIT_MASK(32),
  99         .has_wide_gather = false,
 100         .has_hypervisor = false,
 101         .num_sid_entries = 0,
 102         .sid_table = NULL,
 103         .reserve_vblank_syncpts = true,
 104 };
 105
 106 static const struct host1x_info host1x04_info = {
 107         .nb_channels = 12,
 108         .nb_pts = 192,
 109         .nb_mlocks = 16,
 110         .nb_bases = 64,
 111         .init = host1x04_init,
 112         .sync_offset = 0x2100,
 113         .dma_mask = DMA_BIT_MASK(34),
 114         .has_wide_gather = false,
 115         .has_hypervisor = false,
 116         .num_sid_entries = 0,
 117         .sid_table = NULL,
 118         .reserve_vblank_syncpts = false,
 119 };
 120
 121 static const struct host1x_info host1x05_info = {
 122         .nb_channels = 14,
 123         .nb_pts = 192,
 124         .nb_mlocks = 16,
 125         .nb_bases = 64,
 126         .init = host1x05_init,
 127         .sync_offset = 0x2100,
 128         .dma_mask = DMA_BIT_MASK(34),
 129         .has_wide_gather = false,
 130         .has_hypervisor = false,
 131         .num_sid_entries = 0,
 132         .sid_table = NULL,
 133         .reserve_vblank_syncpts = false,
 134 };
 135
 136 static const struct host1x_sid_entry tegra186_sid_table[] = {
 137         {
 138                 /* VIC */
 139                 .base = 0x1af0,
 140                 .offset = 0x30,
 141                 .limit = 0x34
 142         },
 143         {
 144                 /* NVDEC */
 145                 .base = 0x1b00,
 146                 .offset = 0x30,
 147                 .limit = 0x34
 148         },
 149 };
 150
 151 static const struct host1x_info host1x06_info = {
 152         .nb_channels = 63,
 153         .nb_pts = 576,
 154         .nb_mlocks = 24,
 155         .nb_bases = 16,
 156         .init = host1x06_init,
 157         .sync_offset = 0x0,
 158         .dma_mask = DMA_BIT_MASK(40),
 159         .has_wide_gather = true,
 160         .has_hypervisor = true,
 161         .num_sid_entries = ARRAY_SIZE(tegra186_sid_table),
 162         .sid_table = tegra186_sid_table,
 163         .reserve_vblank_syncpts = false,
 164 };
 165
 166 static const struct host1x_sid_entry tegra194_sid_table[] = {
 167         {
 168                 /* VIC */
 169                 .base = 0x1af0,
 170                 .offset = 0x30,
 171                 .limit = 0x34
 172         },
 173         {
 174                 /* NVDEC */
 175                 .base = 0x1b00,
 176                 .offset = 0x30,
 177                 .limit = 0x34
 178         },
 179         {
 180                 /* NVDEC1 */
 181                 .base = 0x1bc0,
 182                 .offset = 0x30,
 183                 .limit = 0x34
 184         },
 185 };
 186
 187 static const struct host1x_info host1x07_info = {
 188         .nb_channels = 63,
 189         .nb_pts = 704,
 190         .nb_mlocks = 32,
 191         .nb_bases = 0,
 192         .init = host1x07_init,
 193         .sync_offset = 0x0,
 194         .dma_mask = DMA_BIT_MASK(40),
 195         .has_wide_gather = true,
 196         .has_hypervisor = true,
 197         .num_sid_entries = ARRAY_SIZE(tegra194_sid_table),
 198         .sid_table = tegra194_sid_table,
 199         .reserve_vblank_syncpts = false,
 200 };
 201
 202 static const struct of_device_id host1x_of_match[] = {
 203         { .compatible = "nvidia,tegra194-host1x", .data = &host1x07_info, },
 204         { .compatible = "nvidia,tegra186-host1x", .data = &host1x06_info, },
 205         { .compatible = "nvidia,tegra210-host1x", .data = &host1x05_info, },
 206         { .compatible = "nvidia,tegra124-host1x", .data = &host1x04_info, },
 207         { .compatible = "nvidia,tegra114-host1x", .data = &host1x02_info, },
 208         { .compatible = "nvidia,tegra30-host1x", .data = &host1x01_info, },
 209         { .compatible = "nvidia,tegra20-host1x", .data = &host1x01_info, },
 210         { },
 211 };
 212 MODULE_DEVICE_TABLE(of, host1x_of_match);
 213
 214 static void host1x_setup_sid_table(struct host1x *host)
 215 {
 216         const struct host1x_info *info = host->info;
 217         unsigned int i;
 218
 219         if (!info->has_hypervisor)
 220                 return;
 221
 222         for (i = 0; i < info->num_sid_entries; i++) {
 223                 const struct host1x_sid_entry *entry = &info->sid_table[i];
 224
 225                 host1x_hypervisor_writel(host, entry->offset, entry->base);
 226                 host1x_hypervisor_writel(host, entry->limit, entry->base + 4);
 227         }
 228 }
 229
 230 static bool host1x_wants_iommu(struct host1x *host1x)
 231 {
 232         /*
 233          * If we support addressing a maximum of 32 bits of physical memory
 234          * and if the host1x firewall is enabled, there's no need to enable
 235          * IOMMU support. This can happen for example on Tegra20, Tegra30
 236          * and Tegra114.
 237          *
 238          * Tegra124 and later can address up to 34 bits of physical memory and
 239          * many platforms come equipped with more than 2 GiB of system memory,
 240          * which requires crossing the 4 GiB boundary. But there's a catch: on
 241          * SoCs before Tegra186 (i.e. Tegra124 and Tegra210), the host1x can
 242          * only address up to 32 bits of memory in GATHER opcodes, which means
 243          * that command buffers need to either be in the first 2 GiB of system
 244          * memory (which could quickly lead to memory exhaustion), or command
 245          * buffers need to be treated differently from other buffers (which is
 246          * not possible with the current ABI).
 247          *
 248          * A third option is to use the IOMMU in these cases to make sure all
 249          * buffers will be mapped into a 32-bit IOVA space that host1x can
 250          * address. This allows all of the system memory to be used and works
 251          * within the limitations of the host1x on these SoCs.
 252          *
 253          * In summary, default to enable IOMMU on Tegra124 and later. For any
 254          * of the earlier SoCs, only use the IOMMU for additional safety when
 255          * the host1x firewall is disabled.
 256          */
 257         if (host1x->info->dma_mask <= DMA_BIT_MASK(32)) {
 258                 if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
 259                         return false;
 260         }
 261
 262         return true;
 263 }
 264
 265 static struct iommu_domain *host1x_iommu_attach(struct host1x *host)
 266 {
 267         struct iommu_domain *domain = iommu_get_domain_for_dev(host->dev);
 268         int err;
 269
 270 #if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
 271         if (host->dev->archdata.mapping) {
 272                 struct dma_iommu_mapping *mapping =
 273                                 to_dma_iommu_mapping(host->dev);
 274                 arm_iommu_detach_device(host->dev);
 275                 arm_iommu_release_mapping(mapping);
 276
 277                 domain = iommu_get_domain_for_dev(host->dev);
 278         }
 279 #endif
 280
 281         /*
 282          * We may not always want to enable IOMMU support (for example if the
 283          * host1x firewall is already enabled and we don't support addressing
 284          * more than 32 bits of physical memory), so check for that first.
 285          *
 286          * Similarly, if host1x is already attached to an IOMMU (via the DMA
 287          * API), don't try to attach again.
 288          */
 289         if (!host1x_wants_iommu(host) || domain)
 290                 return domain;
 291
 292         host->group = iommu_group_get(host->dev);
 293         if (host->group) {
 294                 struct iommu_domain_geometry *geometry;
 295                 dma_addr_t start, end;
 296                 unsigned long order;
 297
 298                 err = iova_cache_get();
 299                 if (err < 0)
 300                         goto put_group;
 301
 302                 host->domain = iommu_domain_alloc(&platform_bus_type);
 303                 if (!host->domain) {
 304                         err = -ENOMEM;
 305                         goto put_cache;
 306                 }
 307
 308                 err = iommu_attach_group(host->domain, host->group);
 309                 if (err) {
 310                         if (err == -ENODEV)
 311                                 err = 0;
 312
 313                         goto free_domain;
 314                 }
 315
 316                 geometry = &host->domain->geometry;
 317                 start = geometry->aperture_start & host->info->dma_mask;
 318                 end = geometry->aperture_end & host->info->dma_mask;
 319
 320                 order = __ffs(host->domain->pgsize_bitmap);
 321                 init_iova_domain(&host->iova, 1UL << order, start >> order);
 322                 host->iova_end = end;
 323
 324                 domain = host->domain;
 325         }
 326
 327         return domain;
 328
 329 free_domain:
 330         iommu_domain_free(host->domain);
 331         host->domain = NULL;
 332 put_cache:
 333         iova_cache_put();
 334 put_group:
 335         iommu_group_put(host->group);
 336         host->group = NULL;
 337
 338         return ERR_PTR(err);
 339 }
 340
 341 static int host1x_iommu_init(struct host1x *host)
 342 {
 343         u64 mask = host->info->dma_mask;
 344         struct iommu_domain *domain;
 345         int err;
 346
 347         domain = host1x_iommu_attach(host);
 348         if (IS_ERR(domain)) {
 349                 err = PTR_ERR(domain);
 350                 dev_err(host->dev, "failed to attach to IOMMU: %d\n", err);
 351                 return err;
 352         }
 353
 354         /*
 355          * If we're not behind an IOMMU make sure we don't get push buffers
 356          * that are allocated outside of the range addressable by the GATHER
 357          * opcode.
 358          *
 359          * Newer generations of Tegra (Tegra186 and later) support a wide
 360          * variant of the GATHER opcode that allows addressing more bits.
 361          */
 362         if (!domain && !host->info->has_wide_gather)
 363                 mask = DMA_BIT_MASK(32);
 364
 365         err = dma_coerce_mask_and_coherent(host->dev, mask);
 366         if (err < 0) {
 367                 dev_err(host->dev, "failed to set DMA mask: %d\n", err);
 368                 return err;
 369         }
 370
 371         return 0;
 372 }
 373
 374 static void host1x_iommu_exit(struct host1x *host)
 375 {
 376         if (host->domain) {
 377                 put_iova_domain(&host->iova);
 378                 iommu_detach_group(host->domain, host->group);
 379
 380                 iommu_domain_free(host->domain);
 381                 host->domain = NULL;
 382
 383                 iova_cache_put();
 384
 385                 iommu_group_put(host->group);
 386                 host->group = NULL;
 387         }
 388 }
 389
 390 static int host1x_get_resets(struct host1x *host)
 391 {
 392         int err;
 393
 394         host->resets[0].id = "mc";
 395         host->resets[1].id = "host1x";
 396         host->nresets = ARRAY_SIZE(host->resets);
 397
 398         err = devm_reset_control_bulk_get_optional_exclusive_released(
 399                                 host->dev, host->nresets, host->resets);
 400         if (err) {
 401                 dev_err(host->dev, "failed to get reset: %d\n", err);
 402                 return err;
 403         }
 404
 405         if (WARN_ON(!host->resets[1].rstc))
 406                 return -ENOENT;
 407
 408         return 0;
 409 }
 410
 411 static int host1x_probe(struct platform_device *pdev)
 412 {
 413         struct host1x *host;
 414         struct resource *regs, *hv_regs = NULL;
 415         int syncpt_irq;
 416         int err;
 417
 418         host = devm_kzalloc(&pdev->dev, sizeof(*host), GFP_KERNEL);
 419         if (!host)
 420                 return -ENOMEM;
 421
 422         host->info = of_device_get_match_data(&pdev->dev);
 423
 424         if (host->info->has_hypervisor) {
 425                 regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "vm");
 426                 if (!regs) {
 427                         dev_err(&pdev->dev, "failed to get vm registers\n");
 428                         return -ENXIO;
 429                 }
 430
 431                 hv_regs = platform_get_resource_byname(pdev, IORESOURCE_MEM,
 432                                                        "hypervisor");
 433                 if (!hv_regs) {
 434                         dev_err(&pdev->dev,
 435                                 "failed to get hypervisor registers\n");
 436                         return -ENXIO;
 437                 }
 438         } else {
 439                 regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 440                 if (!regs) {
 441                         dev_err(&pdev->dev, "failed to get registers\n");
 442                         return -ENXIO;
 443                 }
 444         }
 445
 446         syncpt_irq = platform_get_irq(pdev, 0);
 447         if (syncpt_irq < 0)
 448                 return syncpt_irq;
 449
 450         mutex_init(&host->devices_lock);
 451         INIT_LIST_HEAD(&host->devices);
 452         INIT_LIST_HEAD(&host->list);
 453         host->dev = &pdev->dev;
 454
 455         /* set common host1x device data */
 456         platform_set_drvdata(pdev, host);
 457
 458         host->regs = devm_ioremap_resource(&pdev->dev, regs);
 459         if (IS_ERR(host->regs))
 460                 return PTR_ERR(host->regs);
 461
 462         if (host->info->has_hypervisor) {
 463                 host->hv_regs = devm_ioremap_resource(&pdev->dev, hv_regs);
 464                 if (IS_ERR(host->hv_regs))
 465                         return PTR_ERR(host->hv_regs);
 466         }
 467
 468         host->dev->dma_parms = &host->dma_parms;
 469         dma_set_max_seg_size(host->dev, UINT_MAX);
 470
 471         if (host->info->init) {
 472                 err = host->info->init(host);
 473                 if (err)
 474                         return err;
 475         }
 476
 477         host->clk = devm_clk_get(&pdev->dev, NULL);
 478         if (IS_ERR(host->clk)) {
 479                 err = PTR_ERR(host->clk);
 480
 481                 if (err != -EPROBE_DEFER)
 482                         dev_err(&pdev->dev, "failed to get clock: %d\n", err);
 483
 484                 return err;
 485         }
 486
 487         err = host1x_get_resets(host);
 488         if (err)
 489                 return err;
 490
 491         host1x_bo_cache_init(&host->cache);
 492
 493         err = host1x_iommu_init(host);
 494         if (err < 0) {
 495                 dev_err(&pdev->dev, "failed to setup IOMMU: %d\n", err);
 496                 goto destroy_cache;
 497         }
 498
 499         err = host1x_channel_list_init(&host->channel_list,
 500                                        host->info->nb_channels);
 501         if (err) {
 502                 dev_err(&pdev->dev, "failed to initialize channel list\n");
 503                 goto iommu_exit;
 504         }
 505
 506         err = host1x_syncpt_init(host);
 507         if (err) {
 508                 dev_err(&pdev->dev, "failed to initialize syncpts\n");
 509                 goto free_channels;
 510         }
 511
 512         err = host1x_intr_init(host, syncpt_irq);
 513         if (err) {
 514                 dev_err(&pdev->dev, "failed to initialize interrupts\n");
 515                 goto deinit_syncpt;
 516         }
 517
 518         pm_runtime_enable(&pdev->dev);
 519
 520         err = devm_tegra_core_dev_init_opp_table_common(&pdev->dev);
 521         if (err)
 522                 goto pm_disable;
 523
 524         /* the driver's code isn't ready yet for the dynamic RPM */
 525         err = pm_runtime_resume_and_get(&pdev->dev);
 526         if (err)
 527                 goto pm_disable;
 528
 529         host1x_debug_init(host);
 530
 531         err = host1x_register(host);
 532         if (err < 0)
 533                 goto deinit_debugfs;
 534
 535         err = devm_of_platform_populate(&pdev->dev);
 536         if (err < 0)
 537                 goto unregister;
 538
 539         return 0;
 540
 541 unregister:
 542         host1x_unregister(host);
 543 deinit_debugfs:
 544         host1x_debug_deinit(host);
 545
 546         pm_runtime_put_sync_suspend(&pdev->dev);
 547 pm_disable:
 548         pm_runtime_disable(&pdev->dev);
 549
 550         host1x_intr_deinit(host);
 551 deinit_syncpt:
 552         host1x_syncpt_deinit(host);
 553 free_channels:
 554         host1x_channel_list_free(&host->channel_list);
 555 iommu_exit:
 556         host1x_iommu_exit(host);
 557 destroy_cache:
 558         host1x_bo_cache_destroy(&host->cache);
 559
 560         return err;
 561 }
 562
 563 static int host1x_remove(struct platform_device *pdev)
 564 {
 565         struct host1x *host = platform_get_drvdata(pdev);
 566
 567         host1x_unregister(host);
 568         host1x_debug_deinit(host);
 569
 570         pm_runtime_force_suspend(&pdev->dev);
 571
 572         host1x_intr_deinit(host);
 573         host1x_syncpt_deinit(host);
 574         host1x_channel_list_free(&host->channel_list);
 575         host1x_iommu_exit(host);
 576         host1x_bo_cache_destroy(&host->cache);
 577
 578         return 0;
 579 }
 580
 581 static int __maybe_unused host1x_runtime_suspend(struct device *dev)
 582 {
 583         struct host1x *host = dev_get_drvdata(dev);
 584         int err;
 585
 586         host1x_intr_stop(host);
 587         host1x_syncpt_save(host);
 588
 589         err = reset_control_bulk_assert(host->nresets, host->resets);
 590         if (err) {
 591                 dev_err(dev, "failed to assert reset: %d\n", err);
 592                 goto resume_host1x;
 593         }
 594
 595         usleep_range(1000, 2000);
 596
 597         clk_disable_unprepare(host->clk);
 598         reset_control_bulk_release(host->nresets, host->resets);
 599
 600         return 0;
 601
 602 resume_host1x:
 603         host1x_setup_sid_table(host);
 604         host1x_syncpt_restore(host);
 605         host1x_intr_start(host);
 606
 607         return err;
 608 }
 609
 610 static int __maybe_unused host1x_runtime_resume(struct device *dev)
 611 {
 612         struct host1x *host = dev_get_drvdata(dev);
 613         int err;
 614
 615         err = reset_control_bulk_acquire(host->nresets, host->resets);
 616         if (err) {
 617                 dev_err(dev, "failed to acquire reset: %d\n", err);
 618                 return err;
 619         }
 620
 621         err = clk_prepare_enable(host->clk);
 622         if (err) {
 623                 dev_err(dev, "failed to enable clock: %d\n", err);
 624                 goto release_reset;
 625         }
 626
 627         err = reset_control_bulk_deassert(host->nresets, host->resets);
 628         if (err < 0) {
 629                 dev_err(dev, "failed to deassert reset: %d\n", err);
 630                 goto disable_clk;
 631         }
 632
 633         host1x_setup_sid_table(host);
 634         host1x_syncpt_restore(host);
 635         host1x_intr_start(host);
 636
 637         return 0;
 638
 639 disable_clk:
 640         clk_disable_unprepare(host->clk);
 641 release_reset:
 642         reset_control_bulk_release(host->nresets, host->resets);
 643
 644         return err;
 645 }
 646
 647 static const struct dev_pm_ops host1x_pm_ops = {
 648         SET_RUNTIME_PM_OPS(host1x_runtime_suspend, host1x_runtime_resume,
 649                            NULL)
 650         /* TODO: add system suspend-resume once driver will be ready for that */
 651 };
 652
 653 static struct platform_driver tegra_host1x_driver = {
 654         .driver = {
 655                 .name = "tegra-host1x",
 656                 .of_match_table = host1x_of_match,
 657                 .pm = &host1x_pm_ops,
 658         },
 659         .probe = host1x_probe,
 660         .remove = host1x_remove,
 661 };
 662
 663 static struct platform_driver * const drivers[] = {
 664         &tegra_host1x_driver,
 665         &tegra_mipi_driver,
 666 };
 667
 668 static int __init tegra_host1x_init(void)
 669 {
 670         int err;
 671
 672         err = bus_register(&host1x_bus_type);
 673         if (err < 0)
 674                 return err;
 675
 676         err = platform_register_drivers(drivers, ARRAY_SIZE(drivers));
 677         if (err < 0)
 678                 bus_unregister(&host1x_bus_type);
 679
 680         return err;
 681 }
 682 module_init(tegra_host1x_init);
 683
 684 static void __exit tegra_host1x_exit(void)
 685 {
 686         platform_unregister_drivers(drivers, ARRAY_SIZE(drivers));
 687         bus_unregister(&host1x_bus_type);
 688 }
 689 module_exit(tegra_host1x_exit);
 690
 691 /**
 692  * host1x_get_dma_mask() - query the supported DMA mask for host1x
 693  * @host1x: host1x instance
 694  *
 695  * Note that this returns the supported DMA mask for host1x, which can be
 696  * different from the applicable DMA mask under certain circumstances.
 697  */
 698 u64 host1x_get_dma_mask(struct host1x *host1x)
 699 {
 700         return host1x->info->dma_mask;
 701 }
 702 EXPORT_SYMBOL(host1x_get_dma_mask);
 703
 704 MODULE_AUTHOR("Thierry Reding <thierry.reding@avionic-design.de>");
 705 MODULE_AUTHOR("Terje Bergstrom <tbergstrom@nvidia.com>");
 706 MODULE_DESCRIPTION("Host1x driver for Tegra products");
 707 MODULE_LICENSE("GPL");