1189 lines
39 KiB
Diff
1189 lines
39 KiB
Diff
From 4a47b09deb67c3854ac102bcb18ef0df00aae437 Mon Sep 17 00:00:00 2001
|
|
From: Peter Jung <admin@ptr1337.dev>
|
|
Date: Wed, 3 Apr 2024 17:06:20 +0200
|
|
Subject: [PATCH 2/8] amd-pstate
|
|
|
|
Signed-off-by: Peter Jung <admin@ptr1337.dev>
|
|
---
|
|
.../admin-guide/kernel-parameters.txt | 5 +
|
|
Documentation/admin-guide/pm/amd-pstate.rst | 70 ++-
|
|
arch/x86/Kconfig | 5 +-
|
|
arch/x86/include/asm/msr-index.h | 2 +
|
|
drivers/acpi/cppc_acpi.c | 17 +-
|
|
drivers/acpi/processor_driver.c | 6 +
|
|
drivers/cpufreq/acpi-cpufreq.c | 2 -
|
|
drivers/cpufreq/amd-pstate-ut.c | 2 +-
|
|
drivers/cpufreq/amd-pstate.c | 499 +++++++++++++++---
|
|
include/acpi/cppc_acpi.h | 5 +
|
|
include/linux/amd-pstate.h | 32 +-
|
|
include/linux/cpufreq.h | 1 +
|
|
12 files changed, 560 insertions(+), 86 deletions(-)
|
|
|
|
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
|
|
index d2150bd3acc5..71ed7f1b0f9b 100644
|
|
--- a/Documentation/admin-guide/kernel-parameters.txt
|
|
+++ b/Documentation/admin-guide/kernel-parameters.txt
|
|
@@ -374,6 +374,11 @@
|
|
selects a performance level in this range and appropriate
|
|
to the current workload.
|
|
|
|
+ amd_prefcore=
|
|
+ [X86]
|
|
+ disable
|
|
+ Disable amd-pstate preferred core.
|
|
+
|
|
amijoy.map= [HW,JOY] Amiga joystick support
|
|
Map of devices attached to JOY0DAT and JOY1DAT
|
|
Format: <a>,<b>
|
|
diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst
|
|
index 9eb26014d34b..82fbd01da658 100644
|
|
--- a/Documentation/admin-guide/pm/amd-pstate.rst
|
|
+++ b/Documentation/admin-guide/pm/amd-pstate.rst
|
|
@@ -300,8 +300,8 @@ platforms. The AMD P-States mechanism is the more performance and energy
|
|
efficiency frequency management method on AMD processors.
|
|
|
|
|
|
-AMD Pstate Driver Operation Modes
|
|
-=================================
|
|
+``amd-pstate`` Driver Operation Modes
|
|
+======================================
|
|
|
|
``amd_pstate`` CPPC has 3 operation modes: autonomous (active) mode,
|
|
non-autonomous (passive) mode and guided autonomous (guided) mode.
|
|
@@ -353,6 +353,48 @@ is activated. In this mode, driver requests minimum and maximum performance
|
|
level and the platform autonomously selects a performance level in this range
|
|
and appropriate to the current workload.
|
|
|
|
+``amd-pstate`` Preferred Core
|
|
+=================================
|
|
+
|
|
+The core frequency is subjected to the process variation in semiconductors.
|
|
+Not all cores are able to reach the maximum frequency respecting the
|
|
+infrastructure limits. Consequently, AMD has redefined the concept of
|
|
+maximum frequency of a part. This means that a fraction of cores can reach
|
|
+maximum frequency. To find the best process scheduling policy for a given
|
|
+scenario, OS needs to know the core ordering informed by the platform through
|
|
+highest performance capability register of the CPPC interface.
|
|
+
|
|
+``amd-pstate`` preferred core enables the scheduler to prefer scheduling on
|
|
+cores that can achieve a higher frequency with lower voltage. The preferred
|
|
+core rankings can dynamically change based on the workload, platform conditions,
|
|
+thermals and ageing.
|
|
+
|
|
+The priority metric will be initialized by the ``amd-pstate`` driver. The ``amd-pstate``
|
|
+driver will also determine whether or not ``amd-pstate`` preferred core is
|
|
+supported by the platform.
|
|
+
|
|
+``amd-pstate`` driver will provide an initial core ordering when the system boots.
|
|
+The platform uses the CPPC interfaces to communicate the core ranking to the
|
|
+operating system and scheduler to make sure that OS is choosing the cores
|
|
+with highest performance firstly for scheduling the process. When ``amd-pstate``
|
|
+driver receives a message with the highest performance change, it will
|
|
+update the core ranking and set the cpu's priority.
|
|
+
|
|
+``amd-pstate`` Preferred Core Switch
|
|
+=====================================
|
|
+Kernel Parameters
|
|
+-----------------
|
|
+
|
|
+``amd-pstate`` peferred core`` has two states: enable and disable.
|
|
+Enable/disable states can be chosen by different kernel parameters.
|
|
+Default enable ``amd-pstate`` preferred core.
|
|
+
|
|
+``amd_prefcore=disable``
|
|
+
|
|
+For systems that support ``amd-pstate`` preferred core, the core rankings will
|
|
+always be advertised by the platform. But OS can choose to ignore that via the
|
|
+kernel parameter ``amd_prefcore=disable``.
|
|
+
|
|
User Space Interface in ``sysfs`` - General
|
|
===========================================
|
|
|
|
@@ -385,6 +427,30 @@ control its functionality at the system level. They are located in the
|
|
to the operation mode represented by that string - or to be
|
|
unregistered in the "disable" case.
|
|
|
|
+``prefcore``
|
|
+ Preferred core state of the driver: "enabled" or "disabled".
|
|
+
|
|
+ "enabled"
|
|
+ Enable the ``amd-pstate`` preferred core.
|
|
+
|
|
+ "disabled"
|
|
+ Disable the ``amd-pstate`` preferred core
|
|
+
|
|
+
|
|
+ This attribute is read-only to check the state of preferred core set
|
|
+ by the kernel parameter.
|
|
+
|
|
+``cpb_boost``
|
|
+ Specifies whether core performance boost is requested to be enabled or disabled
|
|
+ If core performance boost is disabled while a core is in a boosted P-state, the
|
|
+ core automatically transitions to the highest performance non-boosted P-state.
|
|
+ AMD Core Performance Boost(CPB) is controlled by this new attribute file which
|
|
+ allow user to change all cores frequency boosting state. It supports both
|
|
+ ``active``, ``passive`` and ``guided`` mode control with below value write to it.
|
|
+
|
|
+ "0" Disable Core Performance Boosting
|
|
+ "1" Enable Core Performance Boosting
|
|
+
|
|
``cpupower`` tool support for ``amd-pstate``
|
|
===============================================
|
|
|
|
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
|
|
index 184730705650..70732a76171f 100644
|
|
--- a/arch/x86/Kconfig
|
|
+++ b/arch/x86/Kconfig
|
|
@@ -1054,8 +1054,9 @@ config SCHED_MC
|
|
|
|
config SCHED_MC_PRIO
|
|
bool "CPU core priorities scheduler support"
|
|
- depends on SCHED_MC && CPU_SUP_INTEL
|
|
- select X86_INTEL_PSTATE
|
|
+ depends on SCHED_MC
|
|
+ select X86_INTEL_PSTATE if CPU_SUP_INTEL
|
|
+ select X86_AMD_PSTATE if CPU_SUP_AMD && ACPI
|
|
select CPU_FREQ
|
|
default y
|
|
help
|
|
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
|
|
index d1b5edaf6c34..bfe139eb75b6 100644
|
|
--- a/arch/x86/include/asm/msr-index.h
|
|
+++ b/arch/x86/include/asm/msr-index.h
|
|
@@ -744,6 +744,8 @@
|
|
#define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT)
|
|
#define MSR_K7_FID_VID_CTL 0xc0010041
|
|
#define MSR_K7_FID_VID_STATUS 0xc0010042
|
|
+#define MSR_K7_HWCR_CPB_DIS_BIT 25
|
|
+#define MSR_K7_HWCR_CPB_DIS BIT_ULL(MSR_K7_HWCR_CPB_DIS_BIT)
|
|
|
|
/* K6 MSRs */
|
|
#define MSR_K6_WHCR 0xc0000082
|
|
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
|
|
index d155a86a8614..e23a84f4a50a 100644
|
|
--- a/drivers/acpi/cppc_acpi.c
|
|
+++ b/drivers/acpi/cppc_acpi.c
|
|
@@ -679,8 +679,10 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
|
|
|
|
if (!osc_sb_cppc2_support_acked) {
|
|
pr_debug("CPPC v2 _OSC not acked\n");
|
|
- if (!cpc_supported_by_cpu())
|
|
+ if (!cpc_supported_by_cpu()) {
|
|
+ pr_debug("CPPC is not supported by the CPU\n");
|
|
return -ENODEV;
|
|
+ }
|
|
}
|
|
|
|
/* Parse the ACPI _CPC table for this CPU. */
|
|
@@ -1157,6 +1159,19 @@ int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf)
|
|
return cppc_get_perf(cpunum, NOMINAL_PERF, nominal_perf);
|
|
}
|
|
|
|
+/**
|
|
+ * cppc_get_highest_perf - Get the highest performance register value.
|
|
+ * @cpunum: CPU from which to get highest performance.
|
|
+ * @highest_perf: Return address.
|
|
+ *
|
|
+ * Return: 0 for success, -EIO otherwise.
|
|
+ */
|
|
+int cppc_get_highest_perf(int cpunum, u64 *highest_perf)
|
|
+{
|
|
+ return cppc_get_perf(cpunum, HIGHEST_PERF, highest_perf);
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(cppc_get_highest_perf);
|
|
+
|
|
/**
|
|
* cppc_get_epp_perf - Get the epp register value.
|
|
* @cpunum: CPU from which to get epp preference value.
|
|
diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c
|
|
index 4bd16b3f0781..67db60eda370 100644
|
|
--- a/drivers/acpi/processor_driver.c
|
|
+++ b/drivers/acpi/processor_driver.c
|
|
@@ -27,6 +27,7 @@
|
|
#define ACPI_PROCESSOR_NOTIFY_PERFORMANCE 0x80
|
|
#define ACPI_PROCESSOR_NOTIFY_POWER 0x81
|
|
#define ACPI_PROCESSOR_NOTIFY_THROTTLING 0x82
|
|
+#define ACPI_PROCESSOR_NOTIFY_HIGEST_PERF_CHANGED 0x85
|
|
|
|
MODULE_AUTHOR("Paul Diefenbaugh");
|
|
MODULE_DESCRIPTION("ACPI Processor Driver");
|
|
@@ -83,6 +84,11 @@ static void acpi_processor_notify(acpi_handle handle, u32 event, void *data)
|
|
acpi_bus_generate_netlink_event(device->pnp.device_class,
|
|
dev_name(&device->dev), event, 0);
|
|
break;
|
|
+ case ACPI_PROCESSOR_NOTIFY_HIGEST_PERF_CHANGED:
|
|
+ cpufreq_update_limits(pr->id);
|
|
+ acpi_bus_generate_netlink_event(device->pnp.device_class,
|
|
+ dev_name(&device->dev), event, 0);
|
|
+ break;
|
|
default:
|
|
acpi_handle_debug(handle, "Unsupported event [0x%x]\n", event);
|
|
break;
|
|
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
|
|
index 37f1cdf46d29..2fc82831bddd 100644
|
|
--- a/drivers/cpufreq/acpi-cpufreq.c
|
|
+++ b/drivers/cpufreq/acpi-cpufreq.c
|
|
@@ -50,8 +50,6 @@ enum {
|
|
#define AMD_MSR_RANGE (0x7)
|
|
#define HYGON_MSR_RANGE (0x7)
|
|
|
|
-#define MSR_K7_HWCR_CPB_DIS (1ULL << 25)
|
|
-
|
|
struct acpi_cpufreq_data {
|
|
unsigned int resume;
|
|
unsigned int cpu_feature;
|
|
diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c
|
|
index f04ae67dda37..b3601b0e6dd3 100644
|
|
--- a/drivers/cpufreq/amd-pstate-ut.c
|
|
+++ b/drivers/cpufreq/amd-pstate-ut.c
|
|
@@ -226,7 +226,7 @@ static void amd_pstate_ut_check_freq(u32 index)
|
|
goto skip_test;
|
|
}
|
|
|
|
- if (cpudata->boost_supported) {
|
|
+ if (amd_pstate_global_params.cpb_boost) {
|
|
if ((policy->max == cpudata->max_freq) ||
|
|
(policy->max == cpudata->nominal_freq))
|
|
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
|
|
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
|
|
index 07f341995439..651055df1710 100644
|
|
--- a/drivers/cpufreq/amd-pstate.c
|
|
+++ b/drivers/cpufreq/amd-pstate.c
|
|
@@ -37,6 +37,7 @@
|
|
#include <linux/uaccess.h>
|
|
#include <linux/static_call.h>
|
|
#include <linux/amd-pstate.h>
|
|
+#include <linux/topology.h>
|
|
|
|
#include <acpi/processor.h>
|
|
#include <acpi/cppc_acpi.h>
|
|
@@ -64,6 +65,10 @@ static struct cpufreq_driver amd_pstate_driver;
|
|
static struct cpufreq_driver amd_pstate_epp_driver;
|
|
static int cppc_state = AMD_PSTATE_UNDEFINED;
|
|
static bool cppc_enabled;
|
|
+static bool amd_pstate_prefcore = true;
|
|
+static struct quirk_entry *quirks;
|
|
+struct amd_pstate_global_params amd_pstate_global_params;
|
|
+EXPORT_SYMBOL_GPL(amd_pstate_global_params);
|
|
|
|
/*
|
|
* AMD Energy Preference Performance (EPP)
|
|
@@ -108,6 +113,41 @@ static unsigned int epp_values[] = {
|
|
|
|
typedef int (*cppc_mode_transition_fn)(int);
|
|
|
|
+static struct quirk_entry quirk_amd_7k62 = {
|
|
+ .nominal_freq = 2600,
|
|
+ .lowest_freq = 550,
|
|
+};
|
|
+
|
|
+static int __init dmi_matched_7k62_bios_bug(const struct dmi_system_id *dmi)
|
|
+{
|
|
+ /**
|
|
+ * match the broken bios for family 17h processor support CPPC V2
|
|
+ * broken BIOS lack of nominal_freq and lowest_freq capabilities
|
|
+ * definition in ACPI tables
|
|
+ */
|
|
+ if (boot_cpu_has(X86_FEATURE_ZEN2)) {
|
|
+ quirks = dmi->driver_data;
|
|
+ pr_info("Overriding nominal and lowest frequencies for %s\n", dmi->ident);
|
|
+ return 1;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static const struct dmi_system_id amd_pstate_quirks_table[] __initconst = {
|
|
+ {
|
|
+ .callback = dmi_matched_7k62_bios_bug,
|
|
+ .ident = "AMD EPYC 7K62",
|
|
+ .matches = {
|
|
+ DMI_MATCH(DMI_BIOS_VERSION, "5.14"),
|
|
+ DMI_MATCH(DMI_BIOS_RELEASE, "12/12/2019"),
|
|
+ },
|
|
+ .driver_data = &quirk_amd_7k62,
|
|
+ },
|
|
+ {}
|
|
+};
|
|
+MODULE_DEVICE_TABLE(dmi, amd_pstate_quirks_table);
|
|
+
|
|
static inline int get_mode_idx_from_str(const char *str, size_t size)
|
|
{
|
|
int i;
|
|
@@ -291,16 +331,20 @@ static int pstate_init_perf(struct amd_cpudata *cpudata)
|
|
{
|
|
u64 cap1;
|
|
u32 highest_perf;
|
|
+ struct cppc_perf_caps cppc_perf;
|
|
+ int ret;
|
|
|
|
- int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1,
|
|
+ ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1,
|
|
&cap1);
|
|
if (ret)
|
|
return ret;
|
|
|
|
- /*
|
|
- * TODO: Introduce AMD specific power feature.
|
|
- *
|
|
- * CPPC entry doesn't indicate the highest performance in some ASICs.
|
|
+ ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ /* Some CPUs have different highest_perf from others, it is safer
|
|
+ * to read it than to assume some erroneous value, leading to performance issues.
|
|
*/
|
|
highest_perf = amd_get_highest_perf();
|
|
if (highest_perf > AMD_CPPC_HIGHEST_PERF(cap1))
|
|
@@ -311,7 +355,11 @@ static int pstate_init_perf(struct amd_cpudata *cpudata)
|
|
WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1));
|
|
WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1));
|
|
WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1));
|
|
+ WRITE_ONCE(cpudata->prefcore_ranking, AMD_CPPC_HIGHEST_PERF(cap1));
|
|
WRITE_ONCE(cpudata->min_limit_perf, AMD_CPPC_LOWEST_PERF(cap1));
|
|
+ WRITE_ONCE(cpudata->lowest_freq, cppc_perf.lowest_freq);
|
|
+ WRITE_ONCE(cpudata->nominal_freq, cppc_perf.nominal_freq);
|
|
+
|
|
return 0;
|
|
}
|
|
|
|
@@ -319,11 +367,15 @@ static int cppc_init_perf(struct amd_cpudata *cpudata)
|
|
{
|
|
struct cppc_perf_caps cppc_perf;
|
|
u32 highest_perf;
|
|
+ int ret;
|
|
|
|
- int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
|
|
+ ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
|
|
if (ret)
|
|
return ret;
|
|
|
|
+ /* Some CPUs have different highest_perf from others, it is safer
|
|
+ * to read it than to assume some erroneous value, leading to performance issues.
|
|
+ */
|
|
highest_perf = amd_get_highest_perf();
|
|
if (highest_perf > cppc_perf.highest_perf)
|
|
highest_perf = cppc_perf.highest_perf;
|
|
@@ -334,7 +386,10 @@ static int cppc_init_perf(struct amd_cpudata *cpudata)
|
|
WRITE_ONCE(cpudata->lowest_nonlinear_perf,
|
|
cppc_perf.lowest_nonlinear_perf);
|
|
WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf);
|
|
+ WRITE_ONCE(cpudata->prefcore_ranking, cppc_perf.highest_perf);
|
|
WRITE_ONCE(cpudata->min_limit_perf, cppc_perf.lowest_perf);
|
|
+ WRITE_ONCE(cpudata->lowest_freq, cppc_perf.lowest_freq);
|
|
+ WRITE_ONCE(cpudata->nominal_freq, cppc_perf.nominal_freq);
|
|
|
|
if (cppc_state == AMD_PSTATE_ACTIVE)
|
|
return 0;
|
|
@@ -430,7 +485,10 @@ static inline bool amd_pstate_sample(struct amd_cpudata *cpudata)
|
|
static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
|
|
u32 des_perf, u32 max_perf, bool fast_switch, int gov_flags)
|
|
{
|
|
+ unsigned long max_freq;
|
|
+ struct cpufreq_policy *policy = cpufreq_cpu_get(cpudata->cpu);
|
|
u64 prev = READ_ONCE(cpudata->cppc_req_cached);
|
|
+ u32 nominal_perf = READ_ONCE(cpudata->nominal_perf);
|
|
u64 value = prev;
|
|
|
|
min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf,
|
|
@@ -439,6 +497,9 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
|
|
cpudata->max_limit_perf);
|
|
des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
|
|
|
|
+ max_freq = READ_ONCE(cpudata->max_limit_freq);
|
|
+ policy->cur = div_u64(des_perf * max_freq, max_perf);
|
|
+
|
|
if ((cppc_state == AMD_PSTATE_GUIDED) && (gov_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) {
|
|
min_perf = des_perf;
|
|
des_perf = 0;
|
|
@@ -450,6 +511,10 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
|
|
value &= ~AMD_CPPC_DES_PERF(~0L);
|
|
value |= AMD_CPPC_DES_PERF(des_perf);
|
|
|
|
+ /* limit the max perf when core performance boost feature is disabled */
|
|
+ if (!amd_pstate_global_params.cpb_boost)
|
|
+ max_perf = min_t(unsigned long, nominal_perf, max_perf);
|
|
+
|
|
value &= ~AMD_CPPC_MAX_PERF(~0L);
|
|
value |= AMD_CPPC_MAX_PERF(max_perf);
|
|
|
|
@@ -477,12 +542,19 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy)
|
|
|
|
static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
|
|
{
|
|
- u32 max_limit_perf, min_limit_perf;
|
|
+ u32 max_limit_perf, min_limit_perf, lowest_perf;
|
|
struct amd_cpudata *cpudata = policy->driver_data;
|
|
|
|
max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq);
|
|
min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq);
|
|
|
|
+ lowest_perf = READ_ONCE(cpudata->lowest_perf);
|
|
+ if (min_limit_perf < lowest_perf)
|
|
+ min_limit_perf = lowest_perf;
|
|
+
|
|
+ if (max_limit_perf < min_limit_perf)
|
|
+ max_limit_perf = min_limit_perf;
|
|
+
|
|
WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf);
|
|
WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf);
|
|
WRITE_ONCE(cpudata->max_limit_freq, policy->max);
|
|
@@ -553,10 +625,9 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
|
|
unsigned long capacity)
|
|
{
|
|
unsigned long max_perf, min_perf, des_perf,
|
|
- cap_perf, lowest_nonlinear_perf, max_freq;
|
|
+ cap_perf, lowest_nonlinear_perf;
|
|
struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
|
|
struct amd_cpudata *cpudata = policy->driver_data;
|
|
- unsigned int target_freq;
|
|
|
|
if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq)
|
|
amd_pstate_update_min_max_limit(policy);
|
|
@@ -564,7 +635,6 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
|
|
|
|
cap_perf = READ_ONCE(cpudata->highest_perf);
|
|
lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf);
|
|
- max_freq = READ_ONCE(cpudata->max_freq);
|
|
|
|
des_perf = cap_perf;
|
|
if (target_perf < capacity)
|
|
@@ -582,8 +652,6 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
|
|
max_perf = min_perf;
|
|
|
|
des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
|
|
- target_freq = div_u64(des_perf * max_freq, max_perf);
|
|
- policy->cur = target_freq;
|
|
|
|
amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true,
|
|
policy->governor->flags);
|
|
@@ -592,30 +660,30 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
|
|
|
|
static int amd_get_min_freq(struct amd_cpudata *cpudata)
|
|
{
|
|
- struct cppc_perf_caps cppc_perf;
|
|
+ u32 lowest_freq;
|
|
|
|
- int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
|
|
- if (ret)
|
|
- return ret;
|
|
+ if (quirks && quirks->lowest_freq)
|
|
+ lowest_freq = quirks->lowest_freq;
|
|
+ else
|
|
+ lowest_freq = READ_ONCE(cpudata->lowest_freq);
|
|
|
|
/* Switch to khz */
|
|
- return cppc_perf.lowest_freq * 1000;
|
|
+ return lowest_freq * 1000;
|
|
}
|
|
|
|
static int amd_get_max_freq(struct amd_cpudata *cpudata)
|
|
{
|
|
- struct cppc_perf_caps cppc_perf;
|
|
u32 max_perf, max_freq, nominal_freq, nominal_perf;
|
|
u64 boost_ratio;
|
|
|
|
- int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
|
|
- if (ret)
|
|
- return ret;
|
|
-
|
|
- nominal_freq = cppc_perf.nominal_freq;
|
|
+ nominal_freq = READ_ONCE(cpudata->nominal_freq);
|
|
nominal_perf = READ_ONCE(cpudata->nominal_perf);
|
|
max_perf = READ_ONCE(cpudata->highest_perf);
|
|
|
|
+ /* when boost is off, the highest perf will be limited to nominal_perf */
|
|
+ if (!amd_pstate_global_params.cpb_boost)
|
|
+ max_perf = nominal_perf;
|
|
+
|
|
boost_ratio = div_u64(max_perf << SCHED_CAPACITY_SHIFT,
|
|
nominal_perf);
|
|
|
|
@@ -627,31 +695,25 @@ static int amd_get_max_freq(struct amd_cpudata *cpudata)
|
|
|
|
static int amd_get_nominal_freq(struct amd_cpudata *cpudata)
|
|
{
|
|
- struct cppc_perf_caps cppc_perf;
|
|
+ u32 nominal_freq;
|
|
|
|
- int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
|
|
- if (ret)
|
|
- return ret;
|
|
+ if (quirks && quirks->nominal_freq)
|
|
+ nominal_freq = quirks->nominal_freq;
|
|
+ else
|
|
+ nominal_freq = READ_ONCE(cpudata->nominal_freq);
|
|
|
|
- /* Switch to khz */
|
|
- return cppc_perf.nominal_freq * 1000;
|
|
+ return nominal_freq;
|
|
}
|
|
|
|
static int amd_get_lowest_nonlinear_freq(struct amd_cpudata *cpudata)
|
|
{
|
|
- struct cppc_perf_caps cppc_perf;
|
|
u32 lowest_nonlinear_freq, lowest_nonlinear_perf,
|
|
nominal_freq, nominal_perf;
|
|
u64 lowest_nonlinear_ratio;
|
|
|
|
- int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
|
|
- if (ret)
|
|
- return ret;
|
|
-
|
|
- nominal_freq = cppc_perf.nominal_freq;
|
|
+ nominal_freq = READ_ONCE(cpudata->nominal_freq);
|
|
nominal_perf = READ_ONCE(cpudata->nominal_perf);
|
|
-
|
|
- lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf;
|
|
+ lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf);
|
|
|
|
lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT,
|
|
nominal_perf);
|
|
@@ -662,48 +724,164 @@ static int amd_get_lowest_nonlinear_freq(struct amd_cpudata *cpudata)
|
|
return lowest_nonlinear_freq * 1000;
|
|
}
|
|
|
|
-static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state)
|
|
+static int amd_pstate_boost_init(struct amd_cpudata *cpudata)
|
|
{
|
|
- struct amd_cpudata *cpudata = policy->driver_data;
|
|
+ u64 boost_val;
|
|
int ret;
|
|
|
|
- if (!cpudata->boost_supported) {
|
|
- pr_err("Boost mode is not supported by this processor or SBIOS\n");
|
|
- return -EINVAL;
|
|
+ ret = rdmsrl_on_cpu(cpudata->cpu, MSR_K7_HWCR, &boost_val);
|
|
+ if (ret) {
|
|
+ pr_err_once("failed to read initial CPU boost state!\n");
|
|
+ return ret;
|
|
}
|
|
|
|
- if (state)
|
|
- policy->cpuinfo.max_freq = cpudata->max_freq;
|
|
- else
|
|
- policy->cpuinfo.max_freq = cpudata->nominal_freq;
|
|
+ amd_pstate_global_params.cpb_supported = !(boost_val & MSR_K7_HWCR_CPB_DIS);
|
|
+ amd_pstate_global_params.cpb_boost = amd_pstate_global_params.cpb_supported;
|
|
|
|
- policy->max = policy->cpuinfo.max_freq;
|
|
+ return ret;
|
|
+}
|
|
|
|
- ret = freq_qos_update_request(&cpudata->req[1],
|
|
- policy->cpuinfo.max_freq);
|
|
- if (ret < 0)
|
|
- return ret;
|
|
+static void amd_perf_ctl_reset(unsigned int cpu)
|
|
+{
|
|
+ wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0);
|
|
+}
|
|
|
|
- return 0;
|
|
+/*
|
|
+ * Set amd-pstate preferred core enable can't be done directly from cpufreq callbacks
|
|
+ * due to locking, so queue the work for later.
|
|
+ */
|
|
+static void amd_pstste_sched_prefcore_workfn(struct work_struct *work)
|
|
+{
|
|
+ sched_set_itmt_support();
|
|
}
|
|
+static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn);
|
|
|
|
-static void amd_pstate_boost_init(struct amd_cpudata *cpudata)
|
|
+/*
|
|
+ * Get the highest performance register value.
|
|
+ * @cpu: CPU from which to get highest performance.
|
|
+ * @highest_perf: Return address.
|
|
+ *
|
|
+ * Return: 0 for success, -EIO otherwise.
|
|
+ */
|
|
+static int amd_pstate_get_highest_perf(int cpu, u32 *highest_perf)
|
|
{
|
|
- u32 highest_perf, nominal_perf;
|
|
+ int ret;
|
|
|
|
- highest_perf = READ_ONCE(cpudata->highest_perf);
|
|
- nominal_perf = READ_ONCE(cpudata->nominal_perf);
|
|
+ if (boot_cpu_has(X86_FEATURE_CPPC)) {
|
|
+ u64 cap1;
|
|
|
|
- if (highest_perf <= nominal_perf)
|
|
+ ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+ WRITE_ONCE(*highest_perf, AMD_CPPC_HIGHEST_PERF(cap1));
|
|
+ } else {
|
|
+ u64 cppc_highest_perf;
|
|
+
|
|
+ ret = cppc_get_highest_perf(cpu, &cppc_highest_perf);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+ WRITE_ONCE(*highest_perf, cppc_highest_perf);
|
|
+ }
|
|
+
|
|
+ return (ret);
|
|
+}
|
|
+
|
|
+#define CPPC_MAX_PERF U8_MAX
|
|
+
|
|
+static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
|
|
+{
|
|
+ int ret, prio;
|
|
+ u32 highest_perf;
|
|
+
|
|
+ ret = amd_pstate_get_highest_perf(cpudata->cpu, &highest_perf);
|
|
+ if (ret)
|
|
return;
|
|
|
|
- cpudata->boost_supported = true;
|
|
- current_pstate_driver->boost_enabled = true;
|
|
+ cpudata->hw_prefcore = true;
|
|
+ /* check if CPPC preferred core feature is enabled*/
|
|
+ if (highest_perf < CPPC_MAX_PERF)
|
|
+ prio = (int)highest_perf;
|
|
+ else {
|
|
+ pr_debug("AMD CPPC preferred core is unsupported!\n");
|
|
+ cpudata->hw_prefcore = false;
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ if (!amd_pstate_prefcore)
|
|
+ return;
|
|
+
|
|
+ /*
|
|
+ * The priorities can be set regardless of whether or not
|
|
+ * sched_set_itmt_support(true) has been called and it is valid to
|
|
+ * update them at any time after it has been called.
|
|
+ */
|
|
+ sched_set_itmt_core_prio(prio, cpudata->cpu);
|
|
+
|
|
+ schedule_work(&sched_prefcore_work);
|
|
}
|
|
|
|
-static void amd_perf_ctl_reset(unsigned int cpu)
|
|
+static void amd_pstate_update_limits(unsigned int cpu)
|
|
{
|
|
- wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0);
|
|
+ struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
|
|
+ struct amd_cpudata *cpudata = policy->driver_data;
|
|
+ u32 prev_high = 0, cur_high = 0;
|
|
+ int ret;
|
|
+ bool highest_perf_changed = false;
|
|
+
|
|
+ mutex_lock(&amd_pstate_driver_lock);
|
|
+ if ((!amd_pstate_prefcore) || (!cpudata->hw_prefcore))
|
|
+ goto free_cpufreq_put;
|
|
+
|
|
+ ret = amd_pstate_get_highest_perf(cpu, &cur_high);
|
|
+ if (ret)
|
|
+ goto free_cpufreq_put;
|
|
+
|
|
+ prev_high = READ_ONCE(cpudata->prefcore_ranking);
|
|
+ if (prev_high != cur_high) {
|
|
+ highest_perf_changed = true;
|
|
+ WRITE_ONCE(cpudata->prefcore_ranking, cur_high);
|
|
+
|
|
+ if (cur_high < CPPC_MAX_PERF)
|
|
+ sched_set_itmt_core_prio((int)cur_high, cpu);
|
|
+ }
|
|
+
|
|
+free_cpufreq_put:
|
|
+ cpufreq_cpu_put(policy);
|
|
+
|
|
+ if (!highest_perf_changed)
|
|
+ cpufreq_update_policy(cpu);
|
|
+
|
|
+ mutex_unlock(&amd_pstate_driver_lock);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * Get pstate transition delay time from ACPI tables that firmware set
|
|
+ * instead of using hardcode value directly.
|
|
+ */
|
|
+static u32 amd_pstate_get_transition_delay_us(unsigned int cpu)
|
|
+{
|
|
+ u32 transition_delay_ns;
|
|
+
|
|
+ transition_delay_ns = cppc_get_transition_latency(cpu);
|
|
+ if (transition_delay_ns == CPUFREQ_ETERNAL)
|
|
+ return AMD_PSTATE_TRANSITION_DELAY;
|
|
+
|
|
+ return transition_delay_ns / NSEC_PER_USEC;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * Get pstate transition latency value from ACPI tables that firmware set
|
|
+ * instead of using hardcode value directly.
|
|
+ */
|
|
+static u32 amd_pstate_get_transition_latency(unsigned int cpu)
|
|
+{
|
|
+ u32 transition_latency;
|
|
+
|
|
+ transition_latency = cppc_get_transition_latency(cpu);
|
|
+ if (transition_latency == CPUFREQ_ETERNAL)
|
|
+ return AMD_PSTATE_TRANSITION_LATENCY;
|
|
+
|
|
+ return transition_latency;
|
|
}
|
|
|
|
static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
|
|
@@ -727,24 +905,30 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
|
|
|
|
cpudata->cpu = policy->cpu;
|
|
|
|
+ amd_pstate_init_prefcore(cpudata);
|
|
+
|
|
ret = amd_pstate_init_perf(cpudata);
|
|
if (ret)
|
|
goto free_cpudata1;
|
|
|
|
+ /* initialize cpu cores boot state */
|
|
+ amd_pstate_boost_init(cpudata);
|
|
+
|
|
min_freq = amd_get_min_freq(cpudata);
|
|
- max_freq = amd_get_max_freq(cpudata);
|
|
nominal_freq = amd_get_nominal_freq(cpudata);
|
|
+ cpudata->nominal_freq = nominal_freq;
|
|
+ max_freq = amd_get_max_freq(cpudata);
|
|
lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata);
|
|
|
|
- if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) {
|
|
- dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n",
|
|
- min_freq, max_freq);
|
|
+ if (min_freq < 0 || max_freq < 0 || min_freq > max_freq || nominal_freq == 0) {
|
|
+ dev_err(dev, "min_freq(%d) or max_freq(%d) or nominal_freq(%d) is incorrect\n",
|
|
+ min_freq, max_freq, nominal_freq);
|
|
ret = -EINVAL;
|
|
goto free_cpudata1;
|
|
}
|
|
|
|
- policy->cpuinfo.transition_latency = AMD_PSTATE_TRANSITION_LATENCY;
|
|
- policy->transition_delay_us = AMD_PSTATE_TRANSITION_DELAY;
|
|
+ policy->cpuinfo.transition_latency = amd_pstate_get_transition_latency(policy->cpu);
|
|
+ policy->transition_delay_us = amd_pstate_get_transition_delay_us(policy->cpu);
|
|
|
|
policy->min = min_freq;
|
|
policy->max = max_freq;
|
|
@@ -777,12 +961,10 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
|
|
cpudata->min_freq = min_freq;
|
|
cpudata->max_limit_freq = max_freq;
|
|
cpudata->min_limit_freq = min_freq;
|
|
- cpudata->nominal_freq = nominal_freq;
|
|
cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq;
|
|
|
|
policy->driver_data = cpudata;
|
|
|
|
- amd_pstate_boost_init(cpudata);
|
|
if (!current_pstate_driver->adjust_perf)
|
|
current_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
|
|
|
|
@@ -877,6 +1059,28 @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy,
|
|
return sysfs_emit(buf, "%u\n", perf);
|
|
}
|
|
|
|
+static ssize_t show_amd_pstate_prefcore_ranking(struct cpufreq_policy *policy,
|
|
+ char *buf)
|
|
+{
|
|
+ u32 perf;
|
|
+ struct amd_cpudata *cpudata = policy->driver_data;
|
|
+
|
|
+ perf = READ_ONCE(cpudata->prefcore_ranking);
|
|
+
|
|
+ return sysfs_emit(buf, "%u\n", perf);
|
|
+}
|
|
+
|
|
+static ssize_t show_amd_pstate_hw_prefcore(struct cpufreq_policy *policy,
|
|
+ char *buf)
|
|
+{
|
|
+ bool hw_prefcore;
|
|
+ struct amd_cpudata *cpudata = policy->driver_data;
|
|
+
|
|
+ hw_prefcore = READ_ONCE(cpudata->hw_prefcore);
|
|
+
|
|
+ return sysfs_emit(buf, "%s\n", str_enabled_disabled(hw_prefcore));
|
|
+}
|
|
+
|
|
static ssize_t show_energy_performance_available_preferences(
|
|
struct cpufreq_policy *policy, char *buf)
|
|
{
|
|
@@ -1074,18 +1278,125 @@ static ssize_t status_store(struct device *a, struct device_attribute *b,
|
|
return ret < 0 ? ret : count;
|
|
}
|
|
|
|
+static ssize_t prefcore_show(struct device *dev,
|
|
+ struct device_attribute *attr, char *buf)
|
|
+{
|
|
+ return sysfs_emit(buf, "%s\n", str_enabled_disabled(amd_pstate_prefcore));
|
|
+}
|
|
+
|
|
+static int amd_cpu_boost_update(struct amd_cpudata *cpudata, u32 on)
|
|
+{
|
|
+ struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpudata->cpu);
|
|
+ struct cppc_perf_ctrls perf_ctrls;
|
|
+ u32 highest_perf, nominal_perf;
|
|
+ int ret;
|
|
+
|
|
+ if (!policy)
|
|
+ return -ENODATA;
|
|
+
|
|
+ highest_perf = READ_ONCE(cpudata->highest_perf);
|
|
+ nominal_perf = READ_ONCE(cpudata->nominal_perf);
|
|
+
|
|
+ if (boot_cpu_has(X86_FEATURE_CPPC)) {
|
|
+ u64 value = READ_ONCE(cpudata->cppc_req_cached);
|
|
+
|
|
+ value &= ~GENMASK_ULL(7, 0);
|
|
+ value |= on ? highest_perf : nominal_perf;
|
|
+ WRITE_ONCE(cpudata->cppc_req_cached, value);
|
|
+
|
|
+ wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
|
|
+
|
|
+ } else {
|
|
+ perf_ctrls.max_perf = on ? highest_perf : nominal_perf;
|
|
+ ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1);
|
|
+ if (ret) {
|
|
+ pr_debug("failed to set energy perf value (%d)\n", ret);
|
|
+ return ret;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (on)
|
|
+ policy->cpuinfo.max_freq = cpudata->max_freq;
|
|
+ else
|
|
+ policy->cpuinfo.max_freq = cpudata->nominal_freq * 1000;
|
|
+
|
|
+ policy->max = policy->cpuinfo.max_freq;
|
|
+
|
|
+ if (cppc_state == AMD_PSTATE_PASSIVE) {
|
|
+ ret = freq_qos_update_request(&cpudata->req[1],
|
|
+ policy->cpuinfo.max_freq);
|
|
+ }
|
|
+
|
|
+ cpufreq_cpu_release(policy);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static ssize_t cpb_boost_show(struct device *dev,
|
|
+ struct device_attribute *attr, char *buf)
|
|
+{
|
|
+ return sysfs_emit(buf, "%u\n", amd_pstate_global_params.cpb_boost);
|
|
+}
|
|
+
|
|
+static ssize_t cpb_boost_store(struct device *dev, struct device_attribute *b,
|
|
+ const char *buf, size_t count)
|
|
+{
|
|
+ bool new_state;
|
|
+ ssize_t ret;
|
|
+ int cpu;
|
|
+
|
|
+ mutex_lock(&amd_pstate_driver_lock);
|
|
+ if (!amd_pstate_global_params.cpb_supported) {
|
|
+ pr_err("Boost mode is not supported by this processor or SBIOS\n");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ ret = kstrtobool(buf, &new_state);
|
|
+ if (ret)
|
|
+ return -EINVAL;
|
|
+
|
|
+ amd_pstate_global_params.cpb_boost = !!new_state;
|
|
+
|
|
+ for_each_online_cpu(cpu) {
|
|
+
|
|
+ struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
|
|
+ struct amd_cpudata *cpudata = policy->driver_data;
|
|
+
|
|
+ if (!cpudata) {
|
|
+ pr_err("cpudata is NULL\n");
|
|
+ ret = -ENODATA;
|
|
+ cpufreq_cpu_put(policy);
|
|
+ goto err_exit;
|
|
+ }
|
|
+
|
|
+ amd_cpu_boost_update(cpudata, amd_pstate_global_params.cpb_boost);
|
|
+ refresh_frequency_limits(policy);
|
|
+ cpufreq_cpu_put(policy);
|
|
+ }
|
|
+
|
|
+err_exit:
|
|
+ mutex_unlock(&amd_pstate_driver_lock);
|
|
+ return ret < 0 ? ret : count;
|
|
+}
|
|
+
|
|
cpufreq_freq_attr_ro(amd_pstate_max_freq);
|
|
cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
|
|
|
|
cpufreq_freq_attr_ro(amd_pstate_highest_perf);
|
|
+cpufreq_freq_attr_ro(amd_pstate_prefcore_ranking);
|
|
+cpufreq_freq_attr_ro(amd_pstate_hw_prefcore);
|
|
cpufreq_freq_attr_rw(energy_performance_preference);
|
|
cpufreq_freq_attr_ro(energy_performance_available_preferences);
|
|
static DEVICE_ATTR_RW(status);
|
|
+static DEVICE_ATTR_RO(prefcore);
|
|
+static DEVICE_ATTR_RW(cpb_boost);
|
|
|
|
static struct freq_attr *amd_pstate_attr[] = {
|
|
&amd_pstate_max_freq,
|
|
&amd_pstate_lowest_nonlinear_freq,
|
|
&amd_pstate_highest_perf,
|
|
+ &amd_pstate_prefcore_ranking,
|
|
+ &amd_pstate_hw_prefcore,
|
|
NULL,
|
|
};
|
|
|
|
@@ -1093,6 +1404,8 @@ static struct freq_attr *amd_pstate_epp_attr[] = {
|
|
&amd_pstate_max_freq,
|
|
&amd_pstate_lowest_nonlinear_freq,
|
|
&amd_pstate_highest_perf,
|
|
+ &amd_pstate_prefcore_ranking,
|
|
+ &amd_pstate_hw_prefcore,
|
|
&energy_performance_preference,
|
|
&energy_performance_available_preferences,
|
|
NULL,
|
|
@@ -1100,6 +1413,8 @@ static struct freq_attr *amd_pstate_epp_attr[] = {
|
|
|
|
static struct attribute *pstate_global_attributes[] = {
|
|
&dev_attr_status.attr,
|
|
+ &dev_attr_prefcore.attr,
|
|
+ &dev_attr_cpb_boost.attr,
|
|
NULL
|
|
};
|
|
|
|
@@ -1151,17 +1466,23 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
|
|
cpudata->cpu = policy->cpu;
|
|
cpudata->epp_policy = 0;
|
|
|
|
+ amd_pstate_init_prefcore(cpudata);
|
|
+
|
|
ret = amd_pstate_init_perf(cpudata);
|
|
if (ret)
|
|
goto free_cpudata1;
|
|
|
|
+ /* initialize cpu cores boot state */
|
|
+ amd_pstate_boost_init(cpudata);
|
|
+
|
|
min_freq = amd_get_min_freq(cpudata);
|
|
- max_freq = amd_get_max_freq(cpudata);
|
|
nominal_freq = amd_get_nominal_freq(cpudata);
|
|
+ cpudata->nominal_freq = nominal_freq;
|
|
+ max_freq = amd_get_max_freq(cpudata);
|
|
lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata);
|
|
- if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) {
|
|
- dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n",
|
|
- min_freq, max_freq);
|
|
+ if (min_freq < 0 || max_freq < 0 || min_freq > max_freq || nominal_freq == 0) {
|
|
+ dev_err(dev, "min_freq(%d) or max_freq(%d) or nominal_freq(%d) is incorrect\n",
|
|
+ min_freq, max_freq, nominal_freq);
|
|
ret = -EINVAL;
|
|
goto free_cpudata1;
|
|
}
|
|
@@ -1174,7 +1495,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
|
|
/* Initial processor data capability frequencies */
|
|
cpudata->max_freq = max_freq;
|
|
cpudata->min_freq = min_freq;
|
|
- cpudata->nominal_freq = nominal_freq;
|
|
cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq;
|
|
|
|
policy->driver_data = cpudata;
|
|
@@ -1205,7 +1525,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
|
|
return ret;
|
|
WRITE_ONCE(cpudata->cppc_cap1_cached, value);
|
|
}
|
|
- amd_pstate_boost_init(cpudata);
|
|
|
|
return 0;
|
|
|
|
@@ -1232,6 +1551,12 @@ static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy)
|
|
max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq);
|
|
min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq);
|
|
|
|
+ if (min_limit_perf < min_perf)
|
|
+ min_limit_perf = min_perf;
|
|
+
|
|
+ if (max_limit_perf < min_limit_perf)
|
|
+ max_limit_perf = min_limit_perf;
|
|
+
|
|
WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf);
|
|
WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf);
|
|
|
|
@@ -1294,6 +1619,12 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy)
|
|
|
|
amd_pstate_epp_update_limit(policy);
|
|
|
|
+ /*
|
|
+ * policy->cur is never updated with the amd_pstate_epp driver, but it
|
|
+ * is used as a stale frequency value. So, keep it within limits.
|
|
+ */
|
|
+ policy->cur = policy->min;
|
|
+
|
|
return 0;
|
|
}
|
|
|
|
@@ -1431,7 +1762,7 @@ static struct cpufreq_driver amd_pstate_driver = {
|
|
.exit = amd_pstate_cpu_exit,
|
|
.suspend = amd_pstate_cpu_suspend,
|
|
.resume = amd_pstate_cpu_resume,
|
|
- .set_boost = amd_pstate_set_boost,
|
|
+ .update_limits = amd_pstate_update_limits,
|
|
.name = "amd-pstate",
|
|
.attr = amd_pstate_attr,
|
|
};
|
|
@@ -1446,6 +1777,7 @@ static struct cpufreq_driver amd_pstate_epp_driver = {
|
|
.online = amd_pstate_epp_cpu_online,
|
|
.suspend = amd_pstate_epp_suspend,
|
|
.resume = amd_pstate_epp_resume,
|
|
+ .update_limits = amd_pstate_update_limits,
|
|
.name = "amd-pstate-epp",
|
|
.attr = amd_pstate_epp_attr,
|
|
};
|
|
@@ -1486,6 +1818,11 @@ static int __init amd_pstate_init(void)
|
|
if (cpufreq_get_current_driver())
|
|
return -EEXIST;
|
|
|
|
+ quirks = NULL;
|
|
+
|
|
+ /* check if this machine need CPPC quirks */
|
|
+ dmi_check_system(amd_pstate_quirks_table);
|
|
+
|
|
switch (cppc_state) {
|
|
case AMD_PSTATE_UNDEFINED:
|
|
/* Disable on the following configs by default:
|
|
@@ -1567,7 +1904,17 @@ static int __init amd_pstate_param(char *str)
|
|
|
|
return amd_pstate_set_driver(mode_idx);
|
|
}
|
|
+
|
|
+static int __init amd_prefcore_param(char *str)
|
|
+{
|
|
+ if (!strcmp(str, "disable"))
|
|
+ amd_pstate_prefcore = false;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
early_param("amd_pstate", amd_pstate_param);
|
|
+early_param("amd_prefcore", amd_prefcore_param);
|
|
|
|
MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>");
|
|
MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver");
|
|
diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h
|
|
index 3a0995f8bce8..930b6afba6f4 100644
|
|
--- a/include/acpi/cppc_acpi.h
|
|
+++ b/include/acpi/cppc_acpi.h
|
|
@@ -139,6 +139,7 @@ struct cppc_cpudata {
|
|
#ifdef CONFIG_ACPI_CPPC_LIB
|
|
extern int cppc_get_desired_perf(int cpunum, u64 *desired_perf);
|
|
extern int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf);
|
|
+extern int cppc_get_highest_perf(int cpunum, u64 *highest_perf);
|
|
extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs);
|
|
extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls);
|
|
extern int cppc_set_enable(int cpu, bool enable);
|
|
@@ -167,6 +168,10 @@ static inline int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf)
|
|
{
|
|
return -ENOTSUPP;
|
|
}
|
|
+static inline int cppc_get_highest_perf(int cpunum, u64 *highest_perf)
|
|
+{
|
|
+ return -ENOTSUPP;
|
|
+}
|
|
static inline int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs)
|
|
{
|
|
return -ENOTSUPP;
|
|
diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h
|
|
index 6ad02ad9c7b4..e89cf1249715 100644
|
|
--- a/include/linux/amd-pstate.h
|
|
+++ b/include/linux/amd-pstate.h
|
|
@@ -39,11 +39,16 @@ struct amd_aperf_mperf {
|
|
* @cppc_req_cached: cached performance request hints
|
|
* @highest_perf: the maximum performance an individual processor may reach,
|
|
* assuming ideal conditions
|
|
+ * For platforms that do not support the preferred core feature, the
|
|
+ * highest_pef may be configured with 166 or 255, to avoid max frequency
|
|
+ * calculated wrongly. we take the fixed value as the highest_perf.
|
|
* @nominal_perf: the maximum sustained performance level of the processor,
|
|
* assuming ideal operating conditions
|
|
* @lowest_nonlinear_perf: the lowest performance level at which nonlinear power
|
|
* savings are achieved
|
|
* @lowest_perf: the absolute lowest performance level of the processor
|
|
+ * @prefcore_ranking: the preferred core ranking, the higher value indicates a higher
|
|
+ * priority.
|
|
* @max_freq: the frequency that mapped to highest_perf
|
|
* @min_freq: the frequency that mapped to lowest_perf
|
|
* @nominal_freq: the frequency that mapped to nominal_perf
|
|
@@ -51,7 +56,9 @@ struct amd_aperf_mperf {
|
|
* @cur: Difference of Aperf/Mperf/tsc count between last and current sample
|
|
* @prev: Last Aperf/Mperf/tsc count value read from register
|
|
* @freq: current cpu frequency value
|
|
- * @boost_supported: check whether the Processor or SBIOS supports boost mode
|
|
+ * @hw_prefcore: check whether HW supports preferred core featue.
|
|
+ * Only when hw_prefcore and early prefcore param are true,
|
|
+ * AMD P-State driver supports preferred core featue.
|
|
* @epp_policy: Last saved policy used to set energy-performance preference
|
|
* @epp_cached: Cached CPPC energy-performance preference value
|
|
* @policy: Cpufreq policy value
|
|
@@ -70,6 +77,7 @@ struct amd_cpudata {
|
|
u32 nominal_perf;
|
|
u32 lowest_nonlinear_perf;
|
|
u32 lowest_perf;
|
|
+ u32 prefcore_ranking;
|
|
u32 min_limit_perf;
|
|
u32 max_limit_perf;
|
|
u32 min_limit_freq;
|
|
@@ -79,12 +87,13 @@ struct amd_cpudata {
|
|
u32 min_freq;
|
|
u32 nominal_freq;
|
|
u32 lowest_nonlinear_freq;
|
|
+ u32 lowest_freq;
|
|
|
|
struct amd_aperf_mperf cur;
|
|
struct amd_aperf_mperf prev;
|
|
|
|
u64 freq;
|
|
- bool boost_supported;
|
|
+ bool hw_prefcore;
|
|
|
|
/* EPP feature related attributes*/
|
|
s16 epp_policy;
|
|
@@ -114,4 +123,23 @@ static const char * const amd_pstate_mode_string[] = {
|
|
[AMD_PSTATE_GUIDED] = "guided",
|
|
NULL,
|
|
};
|
|
+
|
|
+struct quirk_entry {
|
|
+ u32 nominal_freq;
|
|
+ u32 lowest_freq;
|
|
+};
|
|
+
|
|
+/**
|
|
+ * struct amd_pstate_global_params - Global parameters, mostly tunable via sysfs.
|
|
+ * @cpb_boost: Whether or not to use boost CPU P-states.
|
|
+ * @cpb_supported: Whether or not CPU boost P-states are available
|
|
+ * based on the MSR_K7_HWCR bit[25] state
|
|
+ */
|
|
+struct amd_pstate_global_params {
|
|
+ bool cpb_boost;
|
|
+ bool cpb_supported;
|
|
+};
|
|
+
|
|
+extern struct amd_pstate_global_params amd_pstate_global_params;
|
|
+
|
|
#endif /* _LINUX_AMD_PSTATE_H */
|
|
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
|
|
index 320fab7d2e94..3129411fa978 100644
|
|
--- a/include/linux/cpufreq.h
|
|
+++ b/include/linux/cpufreq.h
|
|
@@ -263,6 +263,7 @@ static inline bool cpufreq_supports_freq_invariance(void)
|
|
return false;
|
|
}
|
|
static inline void disable_cpufreq(void) { }
|
|
+static inline void cpufreq_update_limits(unsigned int cpu) { }
|
|
#endif
|
|
|
|
#ifdef CONFIG_CPU_FREQ_STAT
|
|
--
|
|
2.44.0
|
|
|