posix-timers: RCU conversion

Ben Nagy reported a scalability problem with KVM/QEMU that hit very hard a single spinlock (idr_lock) in posix-timers code, on its 48 core machine. Even on a 16 cpu machine (2x4x2), a single test can show 98% of cpu time used in ticket_spin_lock, from lock_timer Ref: http://www.spinics.net/lists/kvm/msg51526.html Switching to RCU is quite easy, IDR being already RCU ready. idr_lock should be locked only for an insert/delete, not a lookup. Benchmark on a 2x4x2 machine, 16 processes calling timer_gettime(). Before : real 1m18.669s user 0m1.346s sys 1m17.180s After : real 0m3.296s user 0m1.366s sys 0m1.926s Reported-by: Ben Nagy <ben@iagu.net> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Tested-by: Ben Nagy <ben@iagu.net> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Avi Kivity <avi@redhat.com> Cc: John Stultz <johnstul@us.ibm.com> Cc: Richard Cochran <richard.cochran@omicron.at> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
author: Eric Dumazet <eric.dumazet@gmail.com> 2011-05-24 11:12:58 +0200
committer: Thomas Gleixner <tglx@linutronix.de> 2011-05-24 12:10:51 +0200
commit: 8af088710d1eb3c980e0ef3779c8d47f3f217b48 (patch)
tree: e122a2e65684f0a40d263ba73afe3d54a2c5993a /kernel
parent: d762f4383100c2a87b1a3f2d678cd3b5425655b4 (diff)
download: kernel-common-8af088710d1eb3c980e0ef3779c8d47f3f217b48.tar.gz
kernel-common-8af088710d1eb3c980e0ef3779c8d47f3f217b48.tar.bz2
kernel-common-8af088710d1eb3c980e0ef3779c8d47f3f217b48.zip
1 files changed, 14 insertions, 11 deletions
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index a1b5edf1bf92..4556182527f3 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -491,6 +491,13 @@ static struct k_itimer * alloc_posix_timer(void)
 	return tmr;
 }
 
+static void k_itimer_rcu_free(struct rcu_head *head)
+{
+	struct k_itimer *tmr = container_of(head, struct k_itimer, it.rcu);
+
+	kmem_cache_free(posix_timers_cache, tmr);
+}
+
 #define IT_ID_SET	1
 #define IT_ID_NOT_SET	0
 static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
@@ -503,7 +510,7 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
 	}
 	put_pid(tmr->it_pid);
 	sigqueue_free(tmr->sigq);
-	kmem_cache_free(posix_timers_cache, tmr);
+	call_rcu(&tmr->it.rcu, k_itimer_rcu_free);
 }
 
 static struct k_clock *clockid_to_kclock(const clockid_t id)
@@ -631,22 +638,18 @@ out:
 static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags)
 {
 	struct k_itimer *timr;
-	/*
-	 * Watch out here.  We do a irqsave on the idr_lock and pass the
-	 * flags part over to the timer lock.  Must not let interrupts in
-	 * while we are moving the lock.
-	 */
-	spin_lock_irqsave(&idr_lock, *flags);
+
+	rcu_read_lock();
 	timr = idr_find(&posix_timers_id, (int)timer_id);
 	if (timr) {
-		spin_lock(&timr->it_lock);
+		spin_lock_irqsave(&timr->it_lock, *flags);
 		if (timr->it_signal == current->signal) {
-			spin_unlock(&idr_lock);
+			rcu_read_unlock();
 			return timr;
 		}
-		spin_unlock(&timr->it_lock);
+		spin_unlock_irqrestore(&timr->it_lock, *flags);
 	}
-	spin_unlock_irqrestore(&idr_lock, *flags);
+	rcu_read_unlock();
 
 	return NULL;
 }
author	Eric Dumazet <eric.dumazet@gmail.com>	2011-05-24 11:12:58 +0200
committer	Thomas Gleixner <tglx@linutronix.de>	2011-05-24 12:10:51 +0200
commit	8af088710d1eb3c980e0ef3779c8d47f3f217b48 (patch)
tree	e122a2e65684f0a40d263ba73afe3d54a2c5993a /kernel
parent	d762f4383100c2a87b1a3f2d678cd3b5425655b4 (diff)
download	kernel-common-8af088710d1eb3c980e0ef3779c8d47f3f217b48.tar.gz kernel-common-8af088710d1eb3c980e0ef3779c8d47f3f217b48.tar.bz2 kernel-common-8af088710d1eb3c980e0ef3779c8d47f3f217b48.zip