diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2009-09-02 13:49:18 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-09-15 16:51:27 +0200 |
commit | 47fe38fcff0517e67d395c039d2e26d2de688a60 (patch) | |
tree | ada38f4e1aa644354b9de4c2cb128719e9e64a9e | |
parent | 5cbc19a983141729d716be17197028434127b376 (diff) | |
download | linux-3.10-47fe38fcff0517e67d395c039d2e26d2de688a60.tar.gz linux-3.10-47fe38fcff0517e67d395c039d2e26d2de688a60.tar.bz2 linux-3.10-47fe38fcff0517e67d395c039d2e26d2de688a60.zip |
x86: sched: Provide arch implementations using aperf/mperf
APERF/MPERF support for cpu_power.
APERF/MPERF is arch defined to be a relative scale of work capacity
per logical cpu, this is assumed to include SMT and Turbo mode.
APERF/MPERF are specified to both reset to 0 when either counter
wraps, which is highly inconvenient, since that'll give a blimp
when that happens. The manual specifies writing 0 to the counters
after each read, but that's 1) too expensive, and 2) destroys the
possibility of sharing these counters with other users, so we live
with the blimp - the other existing user does too.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | arch/x86/kernel/cpu/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/sched.c | 55 | ||||
-rw-r--r-- | include/linux/sched.h | 4 |
3 files changed, 60 insertions, 1 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index c1f253dac15..8dd30638fe4 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -13,7 +13,7 @@ CFLAGS_common.o := $(nostackp) obj-y := intel_cacheinfo.o addon_cpuid_features.o obj-y += proc.o capflags.o powerflags.o common.o -obj-y += vmware.o hypervisor.o +obj-y += vmware.o hypervisor.o sched.o obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o obj-$(CONFIG_X86_64) += bugs_64.o diff --git a/arch/x86/kernel/cpu/sched.c b/arch/x86/kernel/cpu/sched.c new file mode 100644 index 00000000000..6c00a8f3cce --- /dev/null +++ b/arch/x86/kernel/cpu/sched.c @@ -0,0 +1,55 @@ +#include <linux/sched.h> +#include <linux/math64.h> +#include <linux/percpu.h> +#include <linux/irqflags.h> + +#include <asm/cpufeature.h> +#include <asm/processor.h> + +#ifdef CONFIG_SMP + +static DEFINE_PER_CPU(struct aperfmperf, old_perf); + +static unsigned long scale_aperfmperf(void) +{ + struct aperfmperf val, *old = &__get_cpu_var(old_perf); + unsigned long ratio, flags; + + local_irq_save(flags); + get_aperfmperf(&val); + local_irq_restore(flags); + + ratio = calc_aperfmperf_ratio(old, &val); + *old = val; + + return ratio; +} + +unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu) +{ + /* + * do aperf/mperf on the cpu level because it includes things + * like turbo mode, which are relevant to full cores. + */ + if (boot_cpu_has(X86_FEATURE_APERFMPERF)) + return scale_aperfmperf(); + + /* + * maybe have something cpufreq here + */ + + return default_scale_freq_power(sd, cpu); +} + +unsigned long arch_scale_smt_power(struct sched_domain *sd, int cpu) +{ + /* + * aperf/mperf already includes the smt gain + */ + if (boot_cpu_has(X86_FEATURE_APERFMPERF)) + return SCHED_LOAD_SCALE; + + return default_scale_smt_power(sd, cpu); +} + +#endif diff --git a/include/linux/sched.h b/include/linux/sched.h index c30bf3d516d..fc4c0f9393d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -992,6 +992,9 @@ static inline int test_sd_parent(struct sched_domain *sd, int flag) return 0; } +unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu); +unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu); + #else /* CONFIG_SMP */ struct sched_domain_attr; @@ -1003,6 +1006,7 @@ partition_sched_domains(int ndoms_new, struct cpumask *doms_new, } #endif /* !CONFIG_SMP */ + struct io_context; /* See blkdev.h */ |