From 41df0d61c266998b8049df7fec119cd518a43aa1 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 12 May 2008 21:21:13 +0200 Subject: x86: Add performance variants of cpumask operators * Increase performance for systems with large count NR_CPUS by limiting the range of the cpumask operators that loop over the bits in a cpumask_t variable. This removes a large amount of wasted cpu cycles. * Add performance variants of the cpumask operators: int cpus_weight_nr(mask) Same using nr_cpu_ids instead of NR_CPUS int first_cpu_nr(mask) Number lowest set bit, or nr_cpu_ids int next_cpu_nr(cpu, mask) Next cpu past 'cpu', or nr_cpu_ids for_each_cpu_mask_nr(cpu, mask) for-loop cpu over mask using nr_cpu_ids * Modify following to use performance variants: #define num_online_cpus() cpus_weight_nr(cpu_online_map) #define num_possible_cpus() cpus_weight_nr(cpu_possible_map) #define num_present_cpus() cpus_weight_nr(cpu_present_map) #define for_each_possible_cpu(cpu) for_each_cpu_mask_nr((cpu), ...) #define for_each_online_cpu(cpu) for_each_cpu_mask_nr((cpu), ...) #define for_each_present_cpu(cpu) for_each_cpu_mask_nr((cpu), ...) * Comment added to include/linux/cpumask.h: Note: The alternate operations with the suffix "_nr" are used to limit the range of the loop to nr_cpu_ids instead of NR_CPUS when NR_CPUS > 64 for performance reasons. If NR_CPUS is <= 64 then most assembler bitmask operators execute faster with a constant range, so the operator will continue to use NR_CPUS. Another consideration is that nr_cpu_ids is initialized to NR_CPUS and isn't lowered until the possible cpus are discovered (including any disabled cpus). So early uses will span the entire range of NR_CPUS. (The net effect is that for systems with 64 or less CPU's there are no functional changes.) For inclusion into sched-devel/latest tree. Based on: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git + sched-devel/latest .../mingo/linux-2.6-sched-devel.git Cc: Paul Jackson Cc: Christoph Lameter Reviewed-by: Paul Jackson Reviewed-by: Christoph Lameter Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar --- lib/cpumask.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'lib') diff --git a/lib/cpumask.c b/lib/cpumask.c index bb4f76d3c3e..5f97dc25ef9 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -15,6 +15,15 @@ int __next_cpu(int n, const cpumask_t *srcp) } EXPORT_SYMBOL(__next_cpu); +#if NR_CPUS > 64 +int __next_cpu_nr(int n, const cpumask_t *srcp) +{ + return min_t(int, nr_cpu_ids, + find_next_bit(srcp->bits, nr_cpu_ids, n+1)); +} +EXPORT_SYMBOL(__next_cpu_nr); +#endif + int __any_online_cpu(const cpumask_t *mask) { int cpu; -- cgit v1.2.3 From 4755b9291204982ac908e069674d6e5eb45815b3 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 15 Jul 2008 14:14:35 -0700 Subject: cpumask: Optimize cpumask_of_cpu in lib/smp_processor_id.c * Optimize various places where a pointer to the cpumask_of_cpu value will result in reducing stack pressure. Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar --- lib/smp_processor_id.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c index 3b4dc098181..9fb2df0d8df 100644 --- a/lib/smp_processor_id.c +++ b/lib/smp_processor_id.c @@ -11,7 +11,7 @@ notrace unsigned int debug_smp_processor_id(void) { unsigned long preempt_count = preempt_count(); int this_cpu = raw_smp_processor_id(); - cpumask_t this_mask; + cpumask_of_cpu_ptr_declare(this_mask); if (likely(preempt_count)) goto out; @@ -23,9 +23,9 @@ notrace unsigned int debug_smp_processor_id(void) * Kernel threads bound to a single CPU can safely use * smp_processor_id(): */ - this_mask = cpumask_of_cpu(this_cpu); + cpumask_of_cpu_ptr_next(this_mask, cpu); - if (cpus_equal(current->cpus_allowed, this_mask)) + if (cpus_equal(current->cpus_allowed, *this_mask)) goto out; /* -- cgit v1.2.3 From 06f8d00e9eecb738c99b737ac38a585ea7583ad5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 18 Jul 2008 22:34:00 +0200 Subject: cpumask: Optimize cpumask_of_cpu in lib/smp_processor_id.c, fix fix typo. Signed-off-by: Ingo Molnar --- lib/smp_processor_id.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c index 9fb2df0d8df..c4381d9516f 100644 --- a/lib/smp_processor_id.c +++ b/lib/smp_processor_id.c @@ -23,7 +23,7 @@ notrace unsigned int debug_smp_processor_id(void) * Kernel threads bound to a single CPU can safely use * smp_processor_id(): */ - cpumask_of_cpu_ptr_next(this_mask, cpu); + cpumask_of_cpu_ptr_next(this_mask, this_cpu); if (cpus_equal(current->cpus_allowed, *this_mask)) goto out; -- cgit v1.2.3