1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (C) 2019 Linaro Ltd.
5 * Author: Stanimir Varbanov <stanimir.varbanov@linaro.org>
8 #include <linux/interconnect.h>
9 #include <linux/iopoll.h>
10 #include <linux/kernel.h>
11 #include <linux/pm_domain.h>
12 #include <linux/pm_opp.h>
13 #include <linux/pm_runtime.h>
14 #include <linux/types.h>
15 #include <media/v4l2-mem2mem.h>
18 #include "hfi_parser.h"
19 #include "hfi_venus_io.h"
20 #include "pm_helpers.h"
22 static bool legacy_binding;
24 static int core_clks_get(struct venus_core *core)
26 const struct venus_resources *res = core->res;
27 struct device *dev = core->dev;
30 for (i = 0; i < res->clks_num; i++) {
31 core->clks[i] = devm_clk_get(dev, res->clks[i]);
32 if (IS_ERR(core->clks[i]))
33 return PTR_ERR(core->clks[i]);
39 static int core_clks_enable(struct venus_core *core)
41 const struct venus_resources *res = core->res;
45 for (i = 0; i < res->clks_num; i++) {
46 ret = clk_prepare_enable(core->clks[i]);
54 clk_disable_unprepare(core->clks[i]);
59 static void core_clks_disable(struct venus_core *core)
61 const struct venus_resources *res = core->res;
62 unsigned int i = res->clks_num;
65 clk_disable_unprepare(core->clks[i]);
68 static int core_clks_set_rate(struct venus_core *core, unsigned long freq)
72 ret = dev_pm_opp_set_rate(core->dev, freq);
76 ret = clk_set_rate(core->vcodec0_clks[0], freq);
80 ret = clk_set_rate(core->vcodec1_clks[0], freq);
87 static int vcodec_clks_get(struct venus_core *core, struct device *dev,
88 struct clk **clks, const char * const *id)
90 const struct venus_resources *res = core->res;
93 for (i = 0; i < res->vcodec_clks_num; i++) {
96 clks[i] = devm_clk_get(dev, id[i]);
98 return PTR_ERR(clks[i]);
104 static int vcodec_clks_enable(struct venus_core *core, struct clk **clks)
106 const struct venus_resources *res = core->res;
110 for (i = 0; i < res->vcodec_clks_num; i++) {
111 ret = clk_prepare_enable(clks[i]);
119 clk_disable_unprepare(clks[i]);
124 static void vcodec_clks_disable(struct venus_core *core, struct clk **clks)
126 const struct venus_resources *res = core->res;
127 unsigned int i = res->vcodec_clks_num;
130 clk_disable_unprepare(clks[i]);
133 static u32 load_per_instance(struct venus_inst *inst)
137 if (!inst || !(inst->state >= INST_INIT && inst->state < INST_STOP))
140 mbs = (ALIGN(inst->width, 16) / 16) * (ALIGN(inst->height, 16) / 16);
142 return mbs * inst->fps;
145 static u32 load_per_type(struct venus_core *core, u32 session_type)
147 struct venus_inst *inst = NULL;
150 mutex_lock(&core->lock);
151 list_for_each_entry(inst, &core->instances, list) {
152 if (inst->session_type != session_type)
155 mbs_per_sec += load_per_instance(inst);
157 mutex_unlock(&core->lock);
162 static void mbs_to_bw(struct venus_inst *inst, u32 mbs, u32 *avg, u32 *peak)
164 const struct venus_resources *res = inst->core->res;
165 const struct bw_tbl *bw_tbl;
166 unsigned int num_rows, i;
174 if (inst->session_type == VIDC_SESSION_TYPE_ENC) {
175 num_rows = res->bw_tbl_enc_size;
176 bw_tbl = res->bw_tbl_enc;
177 } else if (inst->session_type == VIDC_SESSION_TYPE_DEC) {
178 num_rows = res->bw_tbl_dec_size;
179 bw_tbl = res->bw_tbl_dec;
184 if (!bw_tbl || num_rows == 0)
187 for (i = 0; i < num_rows; i++) {
188 if (mbs > bw_tbl[i].mbs_per_sec)
191 if (inst->dpb_fmt & HFI_COLOR_FORMAT_10_BIT_BASE) {
192 *avg = bw_tbl[i].avg_10bit;
193 *peak = bw_tbl[i].peak_10bit;
195 *avg = bw_tbl[i].avg;
196 *peak = bw_tbl[i].peak;
201 static int load_scale_bw(struct venus_core *core)
203 struct venus_inst *inst = NULL;
204 u32 mbs_per_sec, avg, peak, total_avg = 0, total_peak = 0;
206 mutex_lock(&core->lock);
207 list_for_each_entry(inst, &core->instances, list) {
208 mbs_per_sec = load_per_instance(inst);
209 mbs_to_bw(inst, mbs_per_sec, &avg, &peak);
213 mutex_unlock(&core->lock);
215 dev_dbg(core->dev, VDBGL "total: avg_bw: %u, peak_bw: %u\n",
216 total_avg, total_peak);
218 return icc_set_bw(core->video_path, total_avg, total_peak);
221 static int load_scale_v1(struct venus_inst *inst)
223 struct venus_core *core = inst->core;
224 const struct freq_tbl *table = core->res->freq_tbl;
225 unsigned int num_rows = core->res->freq_tbl_size;
226 unsigned long freq = table[0].freq;
227 struct device *dev = core->dev;
232 mbs_per_sec = load_per_type(core, VIDC_SESSION_TYPE_ENC) +
233 load_per_type(core, VIDC_SESSION_TYPE_DEC);
235 if (mbs_per_sec > core->res->max_load)
236 dev_warn(dev, "HW is overloaded, needed: %d max: %d\n",
237 mbs_per_sec, core->res->max_load);
239 if (!mbs_per_sec && num_rows > 1) {
240 freq = table[num_rows - 1].freq;
244 for (i = 0; i < num_rows; i++) {
245 if (mbs_per_sec > table[i].load)
247 freq = table[i].freq;
252 ret = core_clks_set_rate(core, freq);
254 dev_err(dev, "failed to set clock rate %lu (%d)\n",
259 ret = load_scale_bw(core);
261 dev_err(dev, "failed to set bandwidth (%d)\n",
269 static int core_get_v1(struct device *dev)
271 struct venus_core *core = dev_get_drvdata(dev);
273 return core_clks_get(core);
276 static int core_power_v1(struct device *dev, int on)
278 struct venus_core *core = dev_get_drvdata(dev);
282 ret = core_clks_enable(core);
284 core_clks_disable(core);
289 static const struct venus_pm_ops pm_ops_v1 = {
290 .core_get = core_get_v1,
291 .core_power = core_power_v1,
292 .load_scale = load_scale_v1,
296 vcodec_control_v3(struct venus_core *core, u32 session_type, bool enable)
300 if (session_type == VIDC_SESSION_TYPE_DEC)
301 ctrl = core->base + WRAPPER_VDEC_VCODEC_POWER_CONTROL;
303 ctrl = core->base + WRAPPER_VENC_VCODEC_POWER_CONTROL;
311 static int vdec_get_v3(struct device *dev)
313 struct venus_core *core = dev_get_drvdata(dev);
315 return vcodec_clks_get(core, dev, core->vcodec0_clks,
316 core->res->vcodec0_clks);
319 static int vdec_power_v3(struct device *dev, int on)
321 struct venus_core *core = dev_get_drvdata(dev);
324 vcodec_control_v3(core, VIDC_SESSION_TYPE_DEC, true);
327 ret = vcodec_clks_enable(core, core->vcodec0_clks);
329 vcodec_clks_disable(core, core->vcodec0_clks);
331 vcodec_control_v3(core, VIDC_SESSION_TYPE_DEC, false);
336 static int venc_get_v3(struct device *dev)
338 struct venus_core *core = dev_get_drvdata(dev);
340 return vcodec_clks_get(core, dev, core->vcodec1_clks,
341 core->res->vcodec1_clks);
344 static int venc_power_v3(struct device *dev, int on)
346 struct venus_core *core = dev_get_drvdata(dev);
349 vcodec_control_v3(core, VIDC_SESSION_TYPE_ENC, true);
352 ret = vcodec_clks_enable(core, core->vcodec1_clks);
354 vcodec_clks_disable(core, core->vcodec1_clks);
356 vcodec_control_v3(core, VIDC_SESSION_TYPE_ENC, false);
361 static const struct venus_pm_ops pm_ops_v3 = {
362 .core_get = core_get_v1,
363 .core_power = core_power_v1,
364 .vdec_get = vdec_get_v3,
365 .vdec_power = vdec_power_v3,
366 .venc_get = venc_get_v3,
367 .venc_power = venc_power_v3,
368 .load_scale = load_scale_v1,
371 static int vcodec_control_v4(struct venus_core *core, u32 coreid, bool enable)
373 void __iomem *ctrl, *stat;
377 if (coreid == VIDC_CORE_ID_1) {
378 ctrl = core->base + WRAPPER_VCODEC0_MMCC_POWER_CONTROL;
379 stat = core->base + WRAPPER_VCODEC0_MMCC_POWER_STATUS;
381 ctrl = core->base + WRAPPER_VCODEC1_MMCC_POWER_CONTROL;
382 stat = core->base + WRAPPER_VCODEC1_MMCC_POWER_STATUS;
388 ret = readl_poll_timeout(stat, val, val & BIT(1), 1, 100);
394 ret = readl_poll_timeout(stat, val, !(val & BIT(1)), 1, 100);
402 static int poweroff_coreid(struct venus_core *core, unsigned int coreid_mask)
406 if (coreid_mask & VIDC_CORE_ID_1) {
407 ret = vcodec_control_v4(core, VIDC_CORE_ID_1, true);
411 vcodec_clks_disable(core, core->vcodec0_clks);
413 ret = vcodec_control_v4(core, VIDC_CORE_ID_1, false);
417 ret = pm_runtime_put_sync(core->pmdomains[1]);
422 if (coreid_mask & VIDC_CORE_ID_2) {
423 ret = vcodec_control_v4(core, VIDC_CORE_ID_2, true);
427 vcodec_clks_disable(core, core->vcodec1_clks);
429 ret = vcodec_control_v4(core, VIDC_CORE_ID_2, false);
433 ret = pm_runtime_put_sync(core->pmdomains[2]);
441 static int poweron_coreid(struct venus_core *core, unsigned int coreid_mask)
445 if (coreid_mask & VIDC_CORE_ID_1) {
446 ret = pm_runtime_get_sync(core->pmdomains[1]);
450 ret = vcodec_control_v4(core, VIDC_CORE_ID_1, true);
454 ret = vcodec_clks_enable(core, core->vcodec0_clks);
458 ret = vcodec_control_v4(core, VIDC_CORE_ID_1, false);
463 if (coreid_mask & VIDC_CORE_ID_2) {
464 ret = pm_runtime_get_sync(core->pmdomains[2]);
468 ret = vcodec_control_v4(core, VIDC_CORE_ID_2, true);
472 ret = vcodec_clks_enable(core, core->vcodec1_clks);
476 ret = vcodec_control_v4(core, VIDC_CORE_ID_2, false);
485 min_loaded_core(struct venus_inst *inst, u32 *min_coreid, u32 *min_load)
487 u32 mbs_per_sec, load, core1_load = 0, core2_load = 0;
488 u32 cores_max = core_num_max(inst);
489 struct venus_core *core = inst->core;
490 struct venus_inst *inst_pos;
491 unsigned long vpp_freq;
494 mutex_lock(&core->lock);
496 list_for_each_entry(inst_pos, &core->instances, list) {
497 if (inst_pos == inst)
500 if (inst_pos->state != INST_START)
503 vpp_freq = inst_pos->clk_data.codec_freq_data->vpp_freq;
504 coreid = inst_pos->clk_data.core_id;
506 mbs_per_sec = load_per_instance(inst_pos);
507 load = mbs_per_sec * vpp_freq;
509 if ((coreid & VIDC_CORE_ID_3) == VIDC_CORE_ID_3) {
510 core1_load += load / 2;
511 core2_load += load / 2;
512 } else if (coreid & VIDC_CORE_ID_1) {
514 } else if (coreid & VIDC_CORE_ID_2) {
519 *min_coreid = core1_load <= core2_load ?
520 VIDC_CORE_ID_1 : VIDC_CORE_ID_2;
521 *min_load = min(core1_load, core2_load);
523 if (cores_max < VIDC_CORE_ID_2 || core->res->vcodec_num < 2) {
524 *min_coreid = VIDC_CORE_ID_1;
525 *min_load = core1_load;
528 mutex_unlock(&core->lock);
531 static int decide_core(struct venus_inst *inst)
533 const u32 ptype = HFI_PROPERTY_CONFIG_VIDEOCORES_USAGE;
534 struct venus_core *core = inst->core;
535 u32 min_coreid, min_load, inst_load;
536 struct hfi_videocores_usage_type cu;
537 unsigned long max_freq;
539 if (legacy_binding) {
540 if (inst->session_type == VIDC_SESSION_TYPE_DEC)
541 cu.video_core_enable_mask = VIDC_CORE_ID_1;
543 cu.video_core_enable_mask = VIDC_CORE_ID_2;
548 if (inst->clk_data.core_id != VIDC_CORE_ID_DEFAULT)
551 inst_load = load_per_instance(inst);
552 inst_load *= inst->clk_data.codec_freq_data->vpp_freq;
553 max_freq = core->res->freq_tbl[0].freq;
555 min_loaded_core(inst, &min_coreid, &min_load);
557 if ((inst_load + min_load) > max_freq) {
558 dev_warn(core->dev, "HW is overloaded, needed: %u max: %lu\n",
559 inst_load, max_freq);
563 inst->clk_data.core_id = min_coreid;
564 cu.video_core_enable_mask = min_coreid;
567 return hfi_session_set_property(inst, ptype, &cu);
570 static int acquire_core(struct venus_inst *inst)
572 struct venus_core *core = inst->core;
573 unsigned int coreid_mask = 0;
575 if (inst->core_acquired)
578 inst->core_acquired = true;
580 if (inst->clk_data.core_id & VIDC_CORE_ID_1) {
581 if (core->core0_usage_count++)
584 coreid_mask = VIDC_CORE_ID_1;
587 if (inst->clk_data.core_id & VIDC_CORE_ID_2) {
588 if (core->core1_usage_count++)
591 coreid_mask |= VIDC_CORE_ID_2;
594 return poweron_coreid(core, coreid_mask);
597 static int release_core(struct venus_inst *inst)
599 struct venus_core *core = inst->core;
600 unsigned int coreid_mask = 0;
603 if (!inst->core_acquired)
606 if (inst->clk_data.core_id & VIDC_CORE_ID_1) {
607 if (--core->core0_usage_count)
610 coreid_mask = VIDC_CORE_ID_1;
613 if (inst->clk_data.core_id & VIDC_CORE_ID_2) {
614 if (--core->core1_usage_count)
617 coreid_mask |= VIDC_CORE_ID_2;
620 ret = poweroff_coreid(core, coreid_mask);
625 inst->clk_data.core_id = VIDC_CORE_ID_DEFAULT;
626 inst->core_acquired = false;
630 static int coreid_power_v4(struct venus_inst *inst, int on)
632 struct venus_core *core = inst->core;
638 if (on == POWER_ON) {
639 ret = decide_core(inst);
643 mutex_lock(&core->lock);
644 ret = acquire_core(inst);
645 mutex_unlock(&core->lock);
647 mutex_lock(&core->lock);
648 ret = release_core(inst);
649 mutex_unlock(&core->lock);
655 static int vdec_get_v4(struct device *dev)
657 struct venus_core *core = dev_get_drvdata(dev);
662 return vcodec_clks_get(core, dev, core->vcodec0_clks,
663 core->res->vcodec0_clks);
666 static void vdec_put_v4(struct device *dev)
668 struct venus_core *core = dev_get_drvdata(dev);
674 for (i = 0; i < core->res->vcodec_clks_num; i++)
675 core->vcodec0_clks[i] = NULL;
678 static int vdec_power_v4(struct device *dev, int on)
680 struct venus_core *core = dev_get_drvdata(dev);
686 ret = vcodec_control_v4(core, VIDC_CORE_ID_1, true);
691 ret = vcodec_clks_enable(core, core->vcodec0_clks);
693 vcodec_clks_disable(core, core->vcodec0_clks);
695 vcodec_control_v4(core, VIDC_CORE_ID_1, false);
700 static int venc_get_v4(struct device *dev)
702 struct venus_core *core = dev_get_drvdata(dev);
707 return vcodec_clks_get(core, dev, core->vcodec1_clks,
708 core->res->vcodec1_clks);
711 static void venc_put_v4(struct device *dev)
713 struct venus_core *core = dev_get_drvdata(dev);
719 for (i = 0; i < core->res->vcodec_clks_num; i++)
720 core->vcodec1_clks[i] = NULL;
723 static int venc_power_v4(struct device *dev, int on)
725 struct venus_core *core = dev_get_drvdata(dev);
731 ret = vcodec_control_v4(core, VIDC_CORE_ID_2, true);
736 ret = vcodec_clks_enable(core, core->vcodec1_clks);
738 vcodec_clks_disable(core, core->vcodec1_clks);
740 vcodec_control_v4(core, VIDC_CORE_ID_2, false);
745 static int vcodec_domains_get(struct device *dev)
748 struct opp_table *opp_table;
749 struct device **opp_virt_dev;
750 struct venus_core *core = dev_get_drvdata(dev);
751 const struct venus_resources *res = core->res;
755 if (!res->vcodec_pmdomains_num)
758 for (i = 0; i < res->vcodec_pmdomains_num; i++) {
759 pd = dev_pm_domain_attach_by_name(dev,
760 res->vcodec_pmdomains[i]);
763 core->pmdomains[i] = pd;
766 core->pd_dl_venus = device_link_add(dev, core->pmdomains[0],
770 if (!core->pd_dl_venus)
774 if (!core->has_opp_table)
777 /* Attach the power domain for setting performance state */
778 opp_table = dev_pm_opp_attach_genpd(dev, res->opp_pmdomain, &opp_virt_dev);
779 if (IS_ERR(opp_table)) {
780 ret = PTR_ERR(opp_table);
784 core->opp_pmdomain = *opp_virt_dev;
785 core->opp_dl_venus = device_link_add(dev, core->opp_pmdomain,
789 if (!core->opp_dl_venus) {
797 dev_pm_opp_detach_genpd(core->opp_table);
799 if (core->pd_dl_venus) {
800 device_link_del(core->pd_dl_venus);
801 for (i = 0; i < res->vcodec_pmdomains_num; i++) {
802 if (IS_ERR_OR_NULL(core->pmdomains[i]))
804 dev_pm_domain_detach(core->pmdomains[i], true);
810 static void vcodec_domains_put(struct device *dev)
812 struct venus_core *core = dev_get_drvdata(dev);
813 const struct venus_resources *res = core->res;
816 if (!res->vcodec_pmdomains_num)
819 if (core->pd_dl_venus)
820 device_link_del(core->pd_dl_venus);
822 for (i = 0; i < res->vcodec_pmdomains_num; i++) {
823 if (IS_ERR_OR_NULL(core->pmdomains[i]))
825 dev_pm_domain_detach(core->pmdomains[i], true);
829 if (!core->has_opp_table)
832 if (core->opp_dl_venus)
833 device_link_del(core->opp_dl_venus);
835 dev_pm_opp_detach_genpd(core->opp_table);
838 static int core_get_v4(struct device *dev)
840 struct venus_core *core = dev_get_drvdata(dev);
841 const struct venus_resources *res = core->res;
844 ret = core_clks_get(core);
848 if (!res->vcodec_pmdomains_num)
849 legacy_binding = true;
851 dev_info(dev, "%s legacy binding\n", legacy_binding ? "" : "non");
853 ret = vcodec_clks_get(core, dev, core->vcodec0_clks, res->vcodec0_clks);
857 ret = vcodec_clks_get(core, dev, core->vcodec1_clks, res->vcodec1_clks);
864 core->opp_table = dev_pm_opp_set_clkname(dev, "core");
865 if (IS_ERR(core->opp_table))
866 return PTR_ERR(core->opp_table);
868 if (core->res->opp_pmdomain) {
869 ret = dev_pm_opp_of_add_table(dev);
871 core->has_opp_table = true;
872 } else if (ret != -ENODEV) {
873 dev_err(dev, "invalid OPP table in device tree\n");
874 dev_pm_opp_put_clkname(core->opp_table);
879 ret = vcodec_domains_get(dev);
881 if (core->has_opp_table)
882 dev_pm_opp_of_remove_table(dev);
883 dev_pm_opp_put_clkname(core->opp_table);
890 static void core_put_v4(struct device *dev)
892 struct venus_core *core = dev_get_drvdata(dev);
897 vcodec_domains_put(dev);
899 if (core->has_opp_table)
900 dev_pm_opp_of_remove_table(dev);
902 dev_pm_opp_put_clkname(core->opp_table);
906 static int core_power_v4(struct device *dev, int on)
908 struct venus_core *core = dev_get_drvdata(dev);
911 if (on == POWER_ON) {
912 ret = core_clks_enable(core);
914 /* Drop the performance state vote */
915 if (core->opp_pmdomain)
916 dev_pm_opp_set_rate(dev, 0);
918 core_clks_disable(core);
924 static unsigned long calculate_inst_freq(struct venus_inst *inst,
925 unsigned long filled_len)
927 unsigned long vpp_freq = 0, vsp_freq = 0;
928 u32 fps = (u32)inst->fps;
931 mbs_per_sec = load_per_instance(inst) / fps;
933 vpp_freq = mbs_per_sec * inst->clk_data.codec_freq_data->vpp_freq;
934 /* 21 / 20 is overhead factor */
935 vpp_freq += vpp_freq / 20;
936 vsp_freq = mbs_per_sec * inst->clk_data.codec_freq_data->vsp_freq;
938 /* 10 / 7 is overhead factor */
939 if (inst->session_type == VIDC_SESSION_TYPE_ENC)
940 vsp_freq += (inst->controls.enc.bitrate * 10) / 7;
942 vsp_freq += ((fps * filled_len * 8) * 10) / 7;
944 return max(vpp_freq, vsp_freq);
947 static int load_scale_v4(struct venus_inst *inst)
949 struct venus_core *core = inst->core;
950 const struct freq_tbl *table = core->res->freq_tbl;
951 unsigned int num_rows = core->res->freq_tbl_size;
952 struct device *dev = core->dev;
953 unsigned long freq = 0, freq_core1 = 0, freq_core2 = 0;
954 unsigned long filled_len = 0;
957 for (i = 0; i < inst->num_input_bufs; i++)
958 filled_len = max(filled_len, inst->payloads[i]);
960 if (inst->session_type == VIDC_SESSION_TYPE_DEC && !filled_len)
963 freq = calculate_inst_freq(inst, filled_len);
964 inst->clk_data.freq = freq;
966 mutex_lock(&core->lock);
967 list_for_each_entry(inst, &core->instances, list) {
968 if (inst->clk_data.core_id == VIDC_CORE_ID_1) {
969 freq_core1 += inst->clk_data.freq;
970 } else if (inst->clk_data.core_id == VIDC_CORE_ID_2) {
971 freq_core2 += inst->clk_data.freq;
972 } else if (inst->clk_data.core_id == VIDC_CORE_ID_3) {
973 freq_core1 += inst->clk_data.freq;
974 freq_core2 += inst->clk_data.freq;
977 mutex_unlock(&core->lock);
979 freq = max(freq_core1, freq_core2);
981 if (freq >= table[0].freq) {
982 freq = table[0].freq;
983 dev_warn(dev, "HW is overloaded, needed: %lu max: %lu\n",
984 freq, table[0].freq);
988 for (i = num_rows - 1 ; i >= 0; i--) {
989 if (freq <= table[i].freq) {
990 freq = table[i].freq;
997 ret = core_clks_set_rate(core, freq);
999 dev_err(dev, "failed to set clock rate %lu (%d)\n",
1004 ret = load_scale_bw(core);
1006 dev_err(dev, "failed to set bandwidth (%d)\n",
1014 static const struct venus_pm_ops pm_ops_v4 = {
1015 .core_get = core_get_v4,
1016 .core_put = core_put_v4,
1017 .core_power = core_power_v4,
1018 .vdec_get = vdec_get_v4,
1019 .vdec_put = vdec_put_v4,
1020 .vdec_power = vdec_power_v4,
1021 .venc_get = venc_get_v4,
1022 .venc_put = venc_put_v4,
1023 .venc_power = venc_power_v4,
1024 .coreid_power = coreid_power_v4,
1025 .load_scale = load_scale_v4,
1028 const struct venus_pm_ops *venus_pm_get(enum hfi_version version)
1031 case HFI_VERSION_1XX:
1034 case HFI_VERSION_3XX:
1036 case HFI_VERSION_4XX: