net/sched/sch_taprio.c

   1 // SPDX-License-Identifier: GPL-2.0
   2
   3 /* net/sched/sch_taprio.c        Time Aware Priority Scheduler
   4  *
   5  * Authors:     Vinicius Costa Gomes <vinicius.gomes@intel.com>
   6  *
   7  */
   8
   9 #include <linux/types.h>
  10 #include <linux/slab.h>
  11 #include <linux/kernel.h>
  12 #include <linux/string.h>
  13 #include <linux/list.h>
  14 #include <linux/errno.h>
  15 #include <linux/skbuff.h>
  16 #include <linux/module.h>
  17 #include <linux/spinlock.h>
  18 #include <net/netlink.h>
  19 #include <net/pkt_sched.h>
  20 #include <net/pkt_cls.h>
  21 #include <net/sch_generic.h>
  22
  23 #define TAPRIO_ALL_GATES_OPEN -1
  24
  25 struct sched_entry {
  26         struct list_head list;
  27
  28         /* The instant that this entry "closes" and the next one
  29          * should open, the qdisc will make some effort so that no
  30          * packet leaves after this time.
  31          */
  32         ktime_t close_time;
  33         atomic_t budget;
  34         int index;
  35         u32 gate_mask;
  36         u32 interval;
  37         u8 command;
  38 };
  39
  40 struct taprio_sched {
  41         struct Qdisc **qdiscs;
  42         struct Qdisc *root;
  43         s64 base_time;
  44         int clockid;
  45         int picos_per_byte; /* Using picoseconds because for 10Gbps+
  46                              * speeds it's sub-nanoseconds per byte
  47                              */
  48         size_t num_entries;
  49
  50         /* Protects the update side of the RCU protected current_entry */
  51         spinlock_t current_entry_lock;
  52         struct sched_entry __rcu *current_entry;
  53         struct list_head entries;
  54         ktime_t (*get_time)(void);
  55         struct hrtimer advance_timer;
  56 };
  57
  58 static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
  59                           struct sk_buff **to_free)
  60 {
  61         struct taprio_sched *q = qdisc_priv(sch);
  62         struct Qdisc *child;
  63         int queue;
  64
  65         queue = skb_get_queue_mapping(skb);
  66
  67         child = q->qdiscs[queue];
  68         if (unlikely(!child))
  69                 return qdisc_drop(skb, sch, to_free);
  70
  71         qdisc_qstats_backlog_inc(sch, skb);
  72         sch->q.qlen++;
  73
  74         return qdisc_enqueue(skb, child, to_free);
  75 }
  76
  77 static struct sk_buff *taprio_peek(struct Qdisc *sch)
  78 {
  79         struct taprio_sched *q = qdisc_priv(sch);
  80         struct net_device *dev = qdisc_dev(sch);
  81         struct sched_entry *entry;
  82         struct sk_buff *skb;
  83         u32 gate_mask;
  84         int i;
  85
  86         rcu_read_lock();
  87         entry = rcu_dereference(q->current_entry);
  88         gate_mask = entry ? entry->gate_mask : -1;
  89         rcu_read_unlock();
  90
  91         if (!gate_mask)
  92                 return NULL;
  93
  94         for (i = 0; i < dev->num_tx_queues; i++) {
  95                 struct Qdisc *child = q->qdiscs[i];
  96                 int prio;
  97                 u8 tc;
  98
  99                 if (unlikely(!child))
 100                         continue;
 101
 102                 skb = child->ops->peek(child);
 103                 if (!skb)
 104                         continue;
 105
 106                 prio = skb->priority;
 107                 tc = netdev_get_prio_tc_map(dev, prio);
 108
 109                 if (!(gate_mask & BIT(tc)))
 110                         return NULL;
 111
 112                 return skb;
 113         }
 114
 115         return NULL;
 116 }
 117
 118 static inline int length_to_duration(struct taprio_sched *q, int len)
 119 {
 120         return (len * q->picos_per_byte) / 1000;
 121 }
 122
 123 static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
 124 {
 125         struct taprio_sched *q = qdisc_priv(sch);
 126         struct net_device *dev = qdisc_dev(sch);
 127         struct sched_entry *entry;
 128         struct sk_buff *skb;
 129         u32 gate_mask;
 130         int i;
 131
 132         rcu_read_lock();
 133         entry = rcu_dereference(q->current_entry);
 134         /* if there's no entry, it means that the schedule didn't
 135          * start yet, so force all gates to be open, this is in
 136          * accordance to IEEE 802.1Qbv-2015 Section 8.6.9.4.5
 137          * "AdminGateSates"
 138          */
 139         gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN;
 140         rcu_read_unlock();
 141
 142         if (!gate_mask)
 143                 return NULL;
 144
 145         for (i = 0; i < dev->num_tx_queues; i++) {
 146                 struct Qdisc *child = q->qdiscs[i];
 147                 ktime_t guard;
 148                 int prio;
 149                 int len;
 150                 u8 tc;
 151
 152                 if (unlikely(!child))
 153                         continue;
 154
 155                 skb = child->ops->peek(child);
 156                 if (!skb)
 157                         continue;
 158
 159                 prio = skb->priority;
 160                 tc = netdev_get_prio_tc_map(dev, prio);
 161
 162                 if (!(gate_mask & BIT(tc)))
 163                         continue;
 164
 165                 len = qdisc_pkt_len(skb);
 166                 guard = ktime_add_ns(q->get_time(),
 167                                      length_to_duration(q, len));
 168
 169                 /* In the case that there's no gate entry, there's no
 170                  * guard band ...
 171                  */
 172                 if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
 173                     ktime_after(guard, entry->close_time))
 174                         return NULL;
 175
 176                 /* ... and no budget. */
 177                 if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
 178                     atomic_sub_return(len, &entry->budget) < 0)
 179                         return NULL;
 180
 181                 skb = child->ops->dequeue(child);
 182                 if (unlikely(!skb))
 183                         return NULL;
 184
 185                 qdisc_bstats_update(sch, skb);
 186                 qdisc_qstats_backlog_dec(sch, skb);
 187                 sch->q.qlen--;
 188
 189                 return skb;
 190         }
 191
 192         return NULL;
 193 }
 194
 195 static bool should_restart_cycle(const struct taprio_sched *q,
 196                                  const struct sched_entry *entry)
 197 {
 198         WARN_ON(!entry);
 199
 200         return list_is_last(&entry->list, &q->entries);
 201 }
 202
 203 static enum hrtimer_restart advance_sched(struct hrtimer *timer)
 204 {
 205         struct taprio_sched *q = container_of(timer, struct taprio_sched,
 206                                               advance_timer);
 207         struct sched_entry *entry, *next;
 208         struct Qdisc *sch = q->root;
 209         ktime_t close_time;
 210
 211         spin_lock(&q->current_entry_lock);
 212         entry = rcu_dereference_protected(q->current_entry,
 213                                           lockdep_is_held(&q->current_entry_lock));
 214
 215         /* This is the case that it's the first time that the schedule
 216          * runs, so it only happens once per schedule. The first entry
 217          * is pre-calculated during the schedule initialization.
 218          */
 219         if (unlikely(!entry)) {
 220                 next = list_first_entry(&q->entries, struct sched_entry,
 221                                         list);
 222                 close_time = next->close_time;
 223                 goto first_run;
 224         }
 225
 226         if (should_restart_cycle(q, entry))
 227                 next = list_first_entry(&q->entries, struct sched_entry,
 228                                         list);
 229         else
 230                 next = list_next_entry(entry, list);
 231
 232         close_time = ktime_add_ns(entry->close_time, next->interval);
 233
 234         next->close_time = close_time;
 235         atomic_set(&next->budget,
 236                    (next->interval * 1000) / q->picos_per_byte);
 237
 238 first_run:
 239         rcu_assign_pointer(q->current_entry, next);
 240         spin_unlock(&q->current_entry_lock);
 241
 242         hrtimer_set_expires(&q->advance_timer, close_time);
 243
 244         rcu_read_lock();
 245         __netif_schedule(sch);
 246         rcu_read_unlock();
 247
 248         return HRTIMER_RESTART;
 249 }
 250
 251 static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = {
 252         [TCA_TAPRIO_SCHED_ENTRY_INDEX]     = { .type = NLA_U32 },
 253         [TCA_TAPRIO_SCHED_ENTRY_CMD]       = { .type = NLA_U8 },
 254         [TCA_TAPRIO_SCHED_ENTRY_GATE_MASK] = { .type = NLA_U32 },
 255         [TCA_TAPRIO_SCHED_ENTRY_INTERVAL]  = { .type = NLA_U32 },
 256 };
 257
 258 static const struct nla_policy entry_list_policy[TCA_TAPRIO_SCHED_MAX + 1] = {
 259         [TCA_TAPRIO_SCHED_ENTRY] = { .type = NLA_NESTED },
 260 };
 261
 262 static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
 263         [TCA_TAPRIO_ATTR_PRIOMAP]              = {
 264                 .len = sizeof(struct tc_mqprio_qopt)
 265         },
 266         [TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST]     = { .type = NLA_NESTED },
 267         [TCA_TAPRIO_ATTR_SCHED_BASE_TIME]      = { .type = NLA_S64 },
 268         [TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]   = { .type = NLA_NESTED },
 269         [TCA_TAPRIO_ATTR_SCHED_CLOCKID]        = { .type = NLA_S32 },
 270 };
 271
 272 static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry,
 273                             struct netlink_ext_ack *extack)
 274 {
 275         u32 interval = 0;
 276
 277         if (tb[TCA_TAPRIO_SCHED_ENTRY_CMD])
 278                 entry->command = nla_get_u8(
 279                         tb[TCA_TAPRIO_SCHED_ENTRY_CMD]);
 280
 281         if (tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK])
 282                 entry->gate_mask = nla_get_u32(
 283                         tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK]);
 284
 285         if (tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL])
 286                 interval = nla_get_u32(
 287                         tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]);
 288
 289         if (interval == 0) {
 290                 NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry");
 291                 return -EINVAL;
 292         }
 293
 294         entry->interval = interval;
 295
 296         return 0;
 297 }
 298
 299 static int parse_sched_entry(struct nlattr *n, struct sched_entry *entry,
 300                              int index, struct netlink_ext_ack *extack)
 301 {
 302         struct nlattr *tb[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { };
 303         int err;
 304
 305         err = nla_parse_nested(tb, TCA_TAPRIO_SCHED_ENTRY_MAX, n,
 306                                entry_policy, NULL);
 307         if (err < 0) {
 308                 NL_SET_ERR_MSG(extack, "Could not parse nested entry");
 309                 return -EINVAL;
 310         }
 311
 312         entry->index = index;
 313
 314         return fill_sched_entry(tb, entry, extack);
 315 }
 316
 317 /* Returns the number of entries in case of success */
 318 static int parse_sched_single_entry(struct nlattr *n,
 319                                     struct taprio_sched *q,
 320                                     struct netlink_ext_ack *extack)
 321 {
 322         struct nlattr *tb_entry[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { };
 323         struct nlattr *tb_list[TCA_TAPRIO_SCHED_MAX + 1] = { };
 324         struct sched_entry *entry;
 325         bool found = false;
 326         u32 index;
 327         int err;
 328
 329         err = nla_parse_nested(tb_list, TCA_TAPRIO_SCHED_MAX,
 330                                n, entry_list_policy, NULL);
 331         if (err < 0) {
 332                 NL_SET_ERR_MSG(extack, "Could not parse nested entry");
 333                 return -EINVAL;
 334         }
 335
 336         if (!tb_list[TCA_TAPRIO_SCHED_ENTRY]) {
 337                 NL_SET_ERR_MSG(extack, "Single-entry must include an entry");
 338                 return -EINVAL;
 339         }
 340
 341         err = nla_parse_nested(tb_entry, TCA_TAPRIO_SCHED_ENTRY_MAX,
 342                                tb_list[TCA_TAPRIO_SCHED_ENTRY],
 343                                entry_policy, NULL);
 344         if (err < 0) {
 345                 NL_SET_ERR_MSG(extack, "Could not parse nested entry");
 346                 return -EINVAL;
 347         }
 348
 349         if (!tb_entry[TCA_TAPRIO_SCHED_ENTRY_INDEX]) {
 350                 NL_SET_ERR_MSG(extack, "Entry must specify an index\n");
 351                 return -EINVAL;
 352         }
 353
 354         index = nla_get_u32(tb_entry[TCA_TAPRIO_SCHED_ENTRY_INDEX]);
 355         if (index >= q->num_entries) {
 356                 NL_SET_ERR_MSG(extack, "Index for single entry exceeds number of entries in schedule");
 357                 return -EINVAL;
 358         }
 359
 360         list_for_each_entry(entry, &q->entries, list) {
 361                 if (entry->index == index) {
 362                         found = true;
 363                         break;
 364                 }
 365         }
 366
 367         if (!found) {
 368                 NL_SET_ERR_MSG(extack, "Could not find entry");
 369                 return -ENOENT;
 370         }
 371
 372         err = fill_sched_entry(tb_entry, entry, extack);
 373         if (err < 0)
 374                 return err;
 375
 376         return q->num_entries;
 377 }
 378
 379 static int parse_sched_list(struct nlattr *list,
 380                             struct taprio_sched *q,
 381                             struct netlink_ext_ack *extack)
 382 {
 383         struct nlattr *n;
 384         int err, rem;
 385         int i = 0;
 386
 387         if (!list)
 388                 return -EINVAL;
 389
 390         nla_for_each_nested(n, list, rem) {
 391                 struct sched_entry *entry;
 392
 393                 if (nla_type(n) != TCA_TAPRIO_SCHED_ENTRY) {
 394                         NL_SET_ERR_MSG(extack, "Attribute is not of type 'entry'");
 395                         continue;
 396                 }
 397
 398                 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
 399                 if (!entry) {
 400                         NL_SET_ERR_MSG(extack, "Not enough memory for entry");
 401                         return -ENOMEM;
 402                 }
 403
 404                 err = parse_sched_entry(n, entry, i, extack);
 405                 if (err < 0) {
 406                         kfree(entry);
 407                         return err;
 408                 }
 409
 410                 list_add_tail(&entry->list, &q->entries);
 411                 i++;
 412         }
 413
 414         q->num_entries = i;
 415
 416         return i;
 417 }
 418
 419 /* Returns the number of entries in case of success */
 420 static int parse_taprio_opt(struct nlattr **tb, struct taprio_sched *q,
 421                             struct netlink_ext_ack *extack)
 422 {
 423         int err = 0;
 424         int clockid;
 425
 426         if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] &&
 427             tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY])
 428                 return -EINVAL;
 429
 430         if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] && q->num_entries == 0)
 431                 return -EINVAL;
 432
 433         if (q->clockid == -1 && !tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID])
 434                 return -EINVAL;
 435
 436         if (tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME])
 437                 q->base_time = nla_get_s64(
 438                         tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]);
 439
 440         if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
 441                 clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]);
 442
 443                 /* We only support static clockids and we don't allow
 444                  * for it to be modified after the first init.
 445                  */
 446                 if (clockid < 0 || (q->clockid != -1 && q->clockid != clockid))
 447                         return -EINVAL;
 448
 449                 q->clockid = clockid;
 450         }
 451
 452         if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST])
 453                 err = parse_sched_list(
 454                         tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST], q, extack);
 455         else if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY])
 456                 err = parse_sched_single_entry(
 457                         tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY], q, extack);
 458
 459         /* parse_sched_* return the number of entries in the schedule,
 460          * a schedule with zero entries is an error.
 461          */
 462         if (err == 0) {
 463                 NL_SET_ERR_MSG(extack, "The schedule should contain at least one entry");
 464                 return -EINVAL;
 465         }
 466
 467         return err;
 468 }
 469
 470 static int taprio_parse_mqprio_opt(struct net_device *dev,
 471                                    struct tc_mqprio_qopt *qopt,
 472                                    struct netlink_ext_ack *extack)
 473 {
 474         int i, j;
 475
 476         if (!qopt) {
 477                 NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary");
 478                 return -EINVAL;
 479         }
 480
 481         /* Verify num_tc is not out of max range */
 482         if (qopt->num_tc > TC_MAX_QUEUE) {
 483                 NL_SET_ERR_MSG(extack, "Number of traffic classes is outside valid range");
 484                 return -EINVAL;
 485         }
 486
 487         /* taprio imposes that traffic classes map 1:n to tx queues */
 488         if (qopt->num_tc > dev->num_tx_queues) {
 489                 NL_SET_ERR_MSG(extack, "Number of traffic classes is greater than number of HW queues");
 490                 return -EINVAL;
 491         }
 492
 493         /* Verify priority mapping uses valid tcs */
 494         for (i = 0; i < TC_BITMASK + 1; i++) {
 495                 if (qopt->prio_tc_map[i] >= qopt->num_tc) {
 496                         NL_SET_ERR_MSG(extack, "Invalid traffic class in priority to traffic class mapping");
 497                         return -EINVAL;
 498                 }
 499         }
 500
 501         for (i = 0; i < qopt->num_tc; i++) {
 502                 unsigned int last = qopt->offset[i] + qopt->count[i];
 503
 504                 /* Verify the queue count is in tx range being equal to the
 505                  * real_num_tx_queues indicates the last queue is in use.
 506                  */
 507                 if (qopt->offset[i] >= dev->num_tx_queues ||
 508                     !qopt->count[i] ||
 509                     last > dev->real_num_tx_queues) {
 510                         NL_SET_ERR_MSG(extack, "Invalid queue in traffic class to queue mapping");
 511                         return -EINVAL;
 512                 }
 513
 514                 /* Verify that the offset and counts do not overlap */
 515                 for (j = i + 1; j < qopt->num_tc; j++) {
 516                         if (last > qopt->offset[j]) {
 517                                 NL_SET_ERR_MSG(extack, "Detected overlap in the traffic class to queue mapping");
 518                                 return -EINVAL;
 519                         }
 520                 }
 521         }
 522
 523         return 0;
 524 }
 525
 526 static ktime_t taprio_get_start_time(struct Qdisc *sch)
 527 {
 528         struct taprio_sched *q = qdisc_priv(sch);
 529         struct sched_entry *entry;
 530         ktime_t now, base, cycle;
 531         s64 n;
 532
 533         base = ns_to_ktime(q->base_time);
 534         cycle = 0;
 535
 536         /* Calculate the cycle_time, by summing all the intervals.
 537          */
 538         list_for_each_entry(entry, &q->entries, list)
 539                 cycle = ktime_add_ns(cycle, entry->interval);
 540
 541         if (!cycle)
 542                 return base;
 543
 544         now = q->get_time();
 545
 546         if (ktime_after(base, now))
 547                 return base;
 548
 549         /* Schedule the start time for the beginning of the next
 550          * cycle.
 551          */
 552         n = div64_s64(ktime_sub_ns(now, base), cycle);
 553
 554         return ktime_add_ns(base, (n + 1) * cycle);
 555 }
 556
 557 static void taprio_start_sched(struct Qdisc *sch, ktime_t start)
 558 {
 559         struct taprio_sched *q = qdisc_priv(sch);
 560         struct sched_entry *first;
 561         unsigned long flags;
 562
 563         spin_lock_irqsave(&q->current_entry_lock, flags);
 564
 565         first = list_first_entry(&q->entries, struct sched_entry,
 566                                  list);
 567
 568         first->close_time = ktime_add_ns(start, first->interval);
 569         atomic_set(&first->budget,
 570                    (first->interval * 1000) / q->picos_per_byte);
 571         rcu_assign_pointer(q->current_entry, NULL);
 572
 573         spin_unlock_irqrestore(&q->current_entry_lock, flags);
 574
 575         hrtimer_start(&q->advance_timer, start, HRTIMER_MODE_ABS);
 576 }
 577
 578 static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
 579                          struct netlink_ext_ack *extack)
 580 {
 581         struct nlattr *tb[TCA_TAPRIO_ATTR_MAX + 1] = { };
 582         struct taprio_sched *q = qdisc_priv(sch);
 583         struct net_device *dev = qdisc_dev(sch);
 584         struct tc_mqprio_qopt *mqprio = NULL;
 585         struct ethtool_link_ksettings ecmd;
 586         int i, err, size;
 587         s64 link_speed;
 588         ktime_t start;
 589
 590         err = nla_parse_nested(tb, TCA_TAPRIO_ATTR_MAX, opt,
 591                                taprio_policy, extack);
 592         if (err < 0)
 593                 return err;
 594
 595         err = -EINVAL;
 596         if (tb[TCA_TAPRIO_ATTR_PRIOMAP])
 597                 mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]);
 598
 599         err = taprio_parse_mqprio_opt(dev, mqprio, extack);
 600         if (err < 0)
 601                 return err;
 602
 603         /* A schedule with less than one entry is an error */
 604         size = parse_taprio_opt(tb, q, extack);
 605         if (size < 0)
 606                 return size;
 607
 608         hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS);
 609         q->advance_timer.function = advance_sched;
 610
 611         switch (q->clockid) {
 612         case CLOCK_REALTIME:
 613                 q->get_time = ktime_get_real;
 614                 break;
 615         case CLOCK_MONOTONIC:
 616                 q->get_time = ktime_get;
 617                 break;
 618         case CLOCK_BOOTTIME:
 619                 q->get_time = ktime_get_boottime;
 620                 break;
 621         case CLOCK_TAI:
 622                 q->get_time = ktime_get_clocktai;
 623                 break;
 624         default:
 625                 return -ENOTSUPP;
 626         }
 627
 628         for (i = 0; i < dev->num_tx_queues; i++) {
 629                 struct netdev_queue *dev_queue;
 630                 struct Qdisc *qdisc;
 631
 632                 dev_queue = netdev_get_tx_queue(dev, i);
 633                 qdisc = qdisc_create_dflt(dev_queue,
 634                                           &pfifo_qdisc_ops,
 635                                           TC_H_MAKE(TC_H_MAJ(sch->handle),
 636                                                     TC_H_MIN(i + 1)),
 637                                           extack);
 638                 if (!qdisc)
 639                         return -ENOMEM;
 640
 641                 if (i < dev->real_num_tx_queues)
 642                         qdisc_hash_add(qdisc, false);
 643
 644                 q->qdiscs[i] = qdisc;
 645         }
 646
 647         if (mqprio) {
 648                 netdev_set_num_tc(dev, mqprio->num_tc);
 649                 for (i = 0; i < mqprio->num_tc; i++)
 650                         netdev_set_tc_queue(dev, i,
 651                                             mqprio->count[i],
 652                                             mqprio->offset[i]);
 653
 654                 /* Always use supplied priority mappings */
 655                 for (i = 0; i < TC_BITMASK + 1; i++)
 656                         netdev_set_prio_tc_map(dev, i,
 657                                                mqprio->prio_tc_map[i]);
 658         }
 659
 660         if (!__ethtool_get_link_ksettings(dev, &ecmd))
 661                 link_speed = ecmd.base.speed;
 662         else
 663                 link_speed = SPEED_1000;
 664
 665         q->picos_per_byte = div64_s64(NSEC_PER_SEC * 1000LL * 8,
 666                                       link_speed * 1000 * 1000);
 667
 668         start = taprio_get_start_time(sch);
 669         if (!start)
 670                 return 0;
 671
 672         taprio_start_sched(sch, start);
 673
 674         return 0;
 675 }
 676
 677 static void taprio_destroy(struct Qdisc *sch)
 678 {
 679         struct taprio_sched *q = qdisc_priv(sch);
 680         struct net_device *dev = qdisc_dev(sch);
 681         struct sched_entry *entry, *n;
 682         unsigned int i;
 683
 684         hrtimer_cancel(&q->advance_timer);
 685
 686         if (q->qdiscs) {
 687                 for (i = 0; i < dev->num_tx_queues && q->qdiscs[i]; i++)
 688                         qdisc_put(q->qdiscs[i]);
 689
 690                 kfree(q->qdiscs);
 691         }
 692         q->qdiscs = NULL;
 693
 694         netdev_set_num_tc(dev, 0);
 695
 696         list_for_each_entry_safe(entry, n, &q->entries, list) {
 697                 list_del(&entry->list);
 698                 kfree(entry);
 699         }
 700 }
 701
 702 static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
 703                        struct netlink_ext_ack *extack)
 704 {
 705         struct taprio_sched *q = qdisc_priv(sch);
 706         struct net_device *dev = qdisc_dev(sch);
 707
 708         INIT_LIST_HEAD(&q->entries);
 709         spin_lock_init(&q->current_entry_lock);
 710
 711         /* We may overwrite the configuration later */
 712         hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS);
 713
 714         q->root = sch;
 715
 716         /* We only support static clockids. Use an invalid value as default
 717          * and get the valid one on taprio_change().
 718          */
 719         q->clockid = -1;
 720
 721         if (sch->parent != TC_H_ROOT)
 722                 return -EOPNOTSUPP;
 723
 724         if (!netif_is_multiqueue(dev))
 725                 return -EOPNOTSUPP;
 726
 727         /* pre-allocate qdisc, attachment can't fail */
 728         q->qdiscs = kcalloc(dev->num_tx_queues,
 729                             sizeof(q->qdiscs[0]),
 730                             GFP_KERNEL);
 731
 732         if (!q->qdiscs)
 733                 return -ENOMEM;
 734
 735         if (!opt)
 736                 return -EINVAL;
 737
 738         return taprio_change(sch, opt, extack);
 739 }
 740
 741 static struct netdev_queue *taprio_queue_get(struct Qdisc *sch,
 742                                              unsigned long cl)
 743 {
 744         struct net_device *dev = qdisc_dev(sch);
 745         unsigned long ntx = cl - 1;
 746
 747         if (ntx >= dev->num_tx_queues)
 748                 return NULL;
 749
 750         return netdev_get_tx_queue(dev, ntx);
 751 }
 752
 753 static int taprio_graft(struct Qdisc *sch, unsigned long cl,
 754                         struct Qdisc *new, struct Qdisc **old,
 755                         struct netlink_ext_ack *extack)
 756 {
 757         struct taprio_sched *q = qdisc_priv(sch);
 758         struct net_device *dev = qdisc_dev(sch);
 759         struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
 760
 761         if (!dev_queue)
 762                 return -EINVAL;
 763
 764         if (dev->flags & IFF_UP)
 765                 dev_deactivate(dev);
 766
 767         *old = q->qdiscs[cl - 1];
 768         q->qdiscs[cl - 1] = new;
 769
 770         if (new)
 771                 new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
 772
 773         if (dev->flags & IFF_UP)
 774                 dev_activate(dev);
 775
 776         return 0;
 777 }
 778
 779 static int dump_entry(struct sk_buff *msg,
 780                       const struct sched_entry *entry)
 781 {
 782         struct nlattr *item;
 783
 784         item = nla_nest_start(msg, TCA_TAPRIO_SCHED_ENTRY);
 785         if (!item)
 786                 return -ENOSPC;
 787
 788         if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INDEX, entry->index))
 789                 goto nla_put_failure;
 790
 791         if (nla_put_u8(msg, TCA_TAPRIO_SCHED_ENTRY_CMD, entry->command))
 792                 goto nla_put_failure;
 793
 794         if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_GATE_MASK,
 795                         entry->gate_mask))
 796                 goto nla_put_failure;
 797
 798         if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INTERVAL,
 799                         entry->interval))
 800                 goto nla_put_failure;
 801
 802         return nla_nest_end(msg, item);
 803
 804 nla_put_failure:
 805         nla_nest_cancel(msg, item);
 806         return -1;
 807 }
 808
 809 static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
 810 {
 811         struct taprio_sched *q = qdisc_priv(sch);
 812         struct net_device *dev = qdisc_dev(sch);
 813         struct tc_mqprio_qopt opt = { 0 };
 814         struct nlattr *nest, *entry_list;
 815         struct sched_entry *entry;
 816         unsigned int i;
 817
 818         opt.num_tc = netdev_get_num_tc(dev);
 819         memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
 820
 821         for (i = 0; i < netdev_get_num_tc(dev); i++) {
 822                 opt.count[i] = dev->tc_to_txq[i].count;
 823                 opt.offset[i] = dev->tc_to_txq[i].offset;
 824         }
 825
 826         nest = nla_nest_start(skb, TCA_OPTIONS);
 827         if (!nest)
 828                 return -ENOSPC;
 829
 830         if (nla_put(skb, TCA_TAPRIO_ATTR_PRIOMAP, sizeof(opt), &opt))
 831                 goto options_error;
 832
 833         if (nla_put_s64(skb, TCA_TAPRIO_ATTR_SCHED_BASE_TIME,
 834                         q->base_time, TCA_TAPRIO_PAD))
 835                 goto options_error;
 836
 837         if (nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid))
 838                 goto options_error;
 839
 840         entry_list = nla_nest_start(skb, TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST);
 841         if (!entry_list)
 842                 goto options_error;
 843
 844         list_for_each_entry(entry, &q->entries, list) {
 845                 if (dump_entry(skb, entry) < 0)
 846                         goto options_error;
 847         }
 848
 849         nla_nest_end(skb, entry_list);
 850
 851         return nla_nest_end(skb, nest);
 852
 853 options_error:
 854         nla_nest_cancel(skb, nest);
 855         return -1;
 856 }
 857
 858 static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl)
 859 {
 860         struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
 861
 862         if (!dev_queue)
 863                 return NULL;
 864
 865         return dev_queue->qdisc_sleeping;
 866 }
 867
 868 static unsigned long taprio_find(struct Qdisc *sch, u32 classid)
 869 {
 870         unsigned int ntx = TC_H_MIN(classid);
 871
 872         if (!taprio_queue_get(sch, ntx))
 873                 return 0;
 874         return ntx;
 875 }
 876
 877 static int taprio_dump_class(struct Qdisc *sch, unsigned long cl,
 878                              struct sk_buff *skb, struct tcmsg *tcm)
 879 {
 880         struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
 881
 882         tcm->tcm_parent = TC_H_ROOT;
 883         tcm->tcm_handle |= TC_H_MIN(cl);
 884         tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
 885
 886         return 0;
 887 }
 888
 889 static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 890                                    struct gnet_dump *d)
 891         __releases(d->lock)
 892         __acquires(d->lock)
 893 {
 894         struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
 895
 896         sch = dev_queue->qdisc_sleeping;
 897         if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 ||
 898             gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0)
 899                 return -1;
 900         return 0;
 901 }
 902
 903 static void taprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 904 {
 905         struct net_device *dev = qdisc_dev(sch);
 906         unsigned long ntx;
 907
 908         if (arg->stop)
 909                 return;
 910
 911         arg->count = arg->skip;
 912         for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) {
 913                 if (arg->fn(sch, ntx + 1, arg) < 0) {
 914                         arg->stop = 1;
 915                         break;
 916                 }
 917                 arg->count++;
 918         }
 919 }
 920
 921 static struct netdev_queue *taprio_select_queue(struct Qdisc *sch,
 922                                                 struct tcmsg *tcm)
 923 {
 924         return taprio_queue_get(sch, TC_H_MIN(tcm->tcm_parent));
 925 }
 926
 927 static const struct Qdisc_class_ops taprio_class_ops = {
 928         .graft          = taprio_graft,
 929         .leaf           = taprio_leaf,
 930         .find           = taprio_find,
 931         .walk           = taprio_walk,
 932         .dump           = taprio_dump_class,
 933         .dump_stats     = taprio_dump_class_stats,
 934         .select_queue   = taprio_select_queue,
 935 };
 936
 937 static struct Qdisc_ops taprio_qdisc_ops __read_mostly = {
 938         .cl_ops         = &taprio_class_ops,
 939         .id             = "taprio",
 940         .priv_size      = sizeof(struct taprio_sched),
 941         .init           = taprio_init,
 942         .destroy        = taprio_destroy,
 943         .peek           = taprio_peek,
 944         .dequeue        = taprio_dequeue,
 945         .enqueue        = taprio_enqueue,
 946         .dump           = taprio_dump,
 947         .owner          = THIS_MODULE,
 948 };
 949
 950 static int __init taprio_module_init(void)
 951 {
 952         return register_qdisc(&taprio_qdisc_ops);
 953 }
 954
 955 static void __exit taprio_module_exit(void)
 956 {
 957         unregister_qdisc(&taprio_qdisc_ops);
 958 }
 959
 960 module_init(taprio_module_init);
 961 module_exit(taprio_module_exit);
 962 MODULE_LICENSE("GPL");