For each traffic class, firmware handles up to 4 * MTU amount of data
per burst cycle. Under heavy load, this small buffer size is a
bottleneck when buffering large TSO packets in <= 1500 MTU case.
Increase the burst buffer size to 8 * MTU when supported.
Also, keep the driver's traffic class configuration API similar to
the firmware API counterpart.
Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
* programmed with various parameters.
*/
struct ch_sched_params {
* programmed with various parameters.
*/
struct ch_sched_params {
- s8 type; /* packet or flow */
+ u8 type; /* packet or flow */
- s8 level; /* scheduler hierarchy level */
- s8 mode; /* per-class or per-flow */
- s8 rateunit; /* bit or packet rate */
- s8 ratemode; /* %port relative or kbps absolute */
- s8 channel; /* scheduler channel [0..N] */
- s8 class; /* scheduler class [0..N] */
- s32 minrate; /* minimum rate */
- s32 maxrate; /* maximum rate */
- s16 weight; /* percent weight */
- s16 pktsize; /* average packet size */
+ u8 level; /* scheduler hierarchy level */
+ u8 mode; /* per-class or per-flow */
+ u8 rateunit; /* bit or packet rate */
+ u8 ratemode; /* %port relative or kbps absolute */
+ u8 channel; /* scheduler channel [0..N] */
+ u8 class; /* scheduler class [0..N] */
+ u32 minrate; /* minimum rate */
+ u32 maxrate; /* maximum rate */
+ u16 weight; /* percent weight */
+ u16 pktsize; /* average packet size */
+ u16 burstsize; /* burst buffer size */
enum ctxt_type ctype, u32 *data);
int t4_sge_ctxt_rd_bd(struct adapter *adap, unsigned int cid,
enum ctxt_type ctype, u32 *data);
enum ctxt_type ctype, u32 *data);
int t4_sge_ctxt_rd_bd(struct adapter *adap, unsigned int cid,
enum ctxt_type ctype, u32 *data);
-int t4_sched_params(struct adapter *adapter, int type, int level, int mode,
- int rateunit, int ratemode, int channel, int class,
- int minrate, int maxrate, int weight, int pktsize);
+int t4_sched_params(struct adapter *adapter, u8 type, u8 level, u8 mode,
+ u8 rateunit, u8 ratemode, u8 channel, u8 class,
+ u32 minrate, u32 maxrate, u16 weight, u16 pktsize,
+ u16 burstsize);
void t4_sge_decode_idma_state(struct adapter *adapter, int state);
void t4_idma_monitor_init(struct adapter *adapter,
struct sge_idma_monitor_state *idma);
void t4_sge_decode_idma_state(struct adapter *adapter, int state);
void t4_idma_monitor_init(struct adapter *adapter,
struct sge_idma_monitor_state *idma);
SCHED_CLASS_RATEUNIT_BITS,
SCHED_CLASS_RATEMODE_ABS,
pi->tx_chan, class_id, 0,
SCHED_CLASS_RATEUNIT_BITS,
SCHED_CLASS_RATEMODE_ABS,
pi->tx_chan, class_id, 0,
- max_tx_rate * 1000, 0, pktsize);
+ max_tx_rate * 1000, 0, pktsize, 0);
if (ret) {
dev_err(adap->pdev_dev, "Err %d for Traffic Class config\n",
ret);
if (ret) {
dev_err(adap->pdev_dev, "Err %d for Traffic Class config\n",
ret);
p.u.params.minrate = div_u64(mqprio->min_rate[i] * 8, 1000);
p.u.params.maxrate = div_u64(mqprio->max_rate[i] * 8, 1000);
p.u.params.minrate = div_u64(mqprio->min_rate[i] * 8, 1000);
p.u.params.maxrate = div_u64(mqprio->max_rate[i] * 8, 1000);
+ /* Request larger burst buffer for smaller MTU, so
+ * that hardware can work on more data per burst
+ * cycle.
+ */
+ if (dev->mtu <= ETH_DATA_LEN)
+ p.u.params.burstsize = 8 * dev->mtu;
+
e = cxgb4_sched_class_alloc(dev, &p);
if (!e) {
ret = -ENOMEM;
e = cxgb4_sched_class_alloc(dev, &p);
if (!e) {
ret = -ENOMEM;
p->u.params.ratemode,
p->u.params.channel, e->idx,
p->u.params.minrate, p->u.params.maxrate,
p->u.params.ratemode,
p->u.params.channel, e->idx,
p->u.params.minrate, p->u.params.maxrate,
- p->u.params.weight, p->u.params.pktsize);
+ p->u.params.weight, p->u.params.pktsize,
+ p->u.params.burstsize);
break;
default:
err = -ENOTSUPP;
break;
default:
err = -ENOTSUPP;
-int t4_sched_params(struct adapter *adapter, int type, int level, int mode,
- int rateunit, int ratemode, int channel, int class,
- int minrate, int maxrate, int weight, int pktsize)
+int t4_sched_params(struct adapter *adapter, u8 type, u8 level, u8 mode,
+ u8 rateunit, u8 ratemode, u8 channel, u8 class,
+ u32 minrate, u32 maxrate, u16 weight, u16 pktsize,
+ u16 burstsize)
{
struct fw_sched_cmd cmd;
{
struct fw_sched_cmd cmd;
cmd.u.params.max = cpu_to_be32(maxrate);
cmd.u.params.weight = cpu_to_be16(weight);
cmd.u.params.pktsize = cpu_to_be16(pktsize);
cmd.u.params.max = cpu_to_be32(maxrate);
cmd.u.params.weight = cpu_to_be16(weight);
cmd.u.params.pktsize = cpu_to_be16(pktsize);
+ cmd.u.params.burstsize = cpu_to_be16(burstsize);
return t4_wr_mbox_meat(adapter, adapter->mbox, &cmd, sizeof(cmd),
NULL, 1);
return t4_wr_mbox_meat(adapter, adapter->mbox, &cmd, sizeof(cmd),
NULL, 1);