From b12a03ce4880bd13786a98db6de494a3e0123129 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 2 May 2011 16:48:57 +0200 Subject: hrtimers: Prepare for cancel on clock was set timers Make clock_was_set() unconditional and rename hres_timers_resume to hrtimers_resume. This is a preparatory patch for hrtimers which are cancelled when clock realtime was set. Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 16 ++---- kernel/hrtimer.c | 125 ++++++++++++++++++++++------------------------ kernel/time/timekeeping.c | 2 +- 3 files changed, 65 insertions(+), 78 deletions(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 62f500c724f..4135c88fe4f 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -148,9 +148,7 @@ struct hrtimer_clock_base { ktime_t resolution; ktime_t (*get_time)(void); ktime_t softirq_time; -#ifdef CONFIG_HIGH_RES_TIMERS ktime_t offset; -#endif }; enum hrtimer_base_type { @@ -256,8 +254,6 @@ static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer) #ifdef CONFIG_HIGH_RES_TIMERS struct clock_event_device; -extern void clock_was_set(void); -extern void hres_timers_resume(void); extern void hrtimer_interrupt(struct clock_event_device *dev); /* @@ -291,16 +287,8 @@ extern void hrtimer_peek_ahead_timers(void); # define MONOTONIC_RES_NSEC LOW_RES_NSEC # define KTIME_MONOTONIC_RES KTIME_LOW_RES -/* - * clock_was_set() is a NOP for non- high-resolution systems. The - * time-sorted order guarantees that a timer does not expire early and - * is expired in the next softirq when the clock was advanced. - */ -static inline void clock_was_set(void) { } static inline void hrtimer_peek_ahead_timers(void) { } -static inline void hres_timers_resume(void) { } - /* * In non high resolution mode the time reference is taken from * the base softirq time variable. @@ -316,11 +304,13 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer) } #endif +extern void clock_was_set(void); +extern void hrtimers_resume(void); + extern ktime_t ktime_get(void); extern ktime_t ktime_get_real(void); extern ktime_t ktime_get_boottime(void); - DECLARE_PER_CPU(struct tick_device, tick_cpu_device); diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index dbbbf7d4308..c145ed643bc 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -621,66 +621,6 @@ static int hrtimer_reprogram(struct hrtimer *timer, return res; } - -/* - * Retrigger next event is called after clock was set - * - * Called with interrupts disabled via on_each_cpu() - */ -static void retrigger_next_event(void *arg) -{ - struct hrtimer_cpu_base *base; - struct timespec realtime_offset, wtm, sleep; - - if (!hrtimer_hres_active()) - return; - - get_xtime_and_monotonic_and_sleep_offset(&realtime_offset, &wtm, - &sleep); - set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec); - - base = &__get_cpu_var(hrtimer_bases); - - /* Adjust CLOCK_REALTIME offset */ - raw_spin_lock(&base->lock); - base->clock_base[HRTIMER_BASE_REALTIME].offset = - timespec_to_ktime(realtime_offset); - base->clock_base[HRTIMER_BASE_BOOTTIME].offset = - timespec_to_ktime(sleep); - - hrtimer_force_reprogram(base, 0); - raw_spin_unlock(&base->lock); -} - -/* - * Clock realtime was set - * - * Change the offset of the realtime clock vs. the monotonic - * clock. - * - * We might have to reprogram the high resolution timer interrupt. On - * SMP we call the architecture specific code to retrigger _all_ high - * resolution timer interrupts. On UP we just disable interrupts and - * call the high resolution interrupt code. - */ -void clock_was_set(void) -{ - /* Retrigger the CPU local events everywhere */ - on_each_cpu(retrigger_next_event, NULL, 1); -} - -/* - * During resume we might have to reprogram the high resolution timer - * interrupt (on the local CPU): - */ -void hres_timers_resume(void) -{ - WARN_ONCE(!irqs_disabled(), - KERN_INFO "hres_timers_resume() called with IRQs enabled!"); - - retrigger_next_event(NULL); -} - /* * Initialize the high resolution related parts of cpu_base */ @@ -714,12 +654,14 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, return 0; } +static void retrigger_next_event(void *arg); + /* * Switch to high resolution mode */ static int hrtimer_switch_to_hres(void) { - int cpu = smp_processor_id(); + int i, cpu = smp_processor_id(); struct hrtimer_cpu_base *base = &per_cpu(hrtimer_bases, cpu); unsigned long flags; @@ -735,9 +677,8 @@ static int hrtimer_switch_to_hres(void) return 0; } base->hres_active = 1; - base->clock_base[HRTIMER_BASE_REALTIME].resolution = KTIME_HIGH_RES; - base->clock_base[HRTIMER_BASE_MONOTONIC].resolution = KTIME_HIGH_RES; - base->clock_base[HRTIMER_BASE_BOOTTIME].resolution = KTIME_HIGH_RES; + for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) + base->clock_base[i].resolution = KTIME_HIGH_RES; tick_setup_sched_timer(); @@ -764,6 +705,62 @@ static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } #endif /* CONFIG_HIGH_RES_TIMERS */ +/* + * Retrigger next event is called after clock was set + * + * Called with interrupts disabled via on_each_cpu() + */ +static void retrigger_next_event(void *arg) +{ + struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases); + struct timespec realtime_offset, xtim, wtm, sleep; + + if (!hrtimer_hres_active()) + return; + + get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep); + set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec); + + /* Adjust CLOCK_REALTIME offset */ + raw_spin_lock(&base->lock); + base->clock_base[HRTIMER_BASE_REALTIME].offset = + timespec_to_ktime(realtime_offset); + base->clock_base[HRTIMER_BASE_BOOTTIME].offset = + timespec_to_ktime(sleep); + + hrtimer_force_reprogram(base, 0); + raw_spin_unlock(&base->lock); +} + +/* + * Clock realtime was set + * + * Change the offset of the realtime clock vs. the monotonic + * clock. + * + * We might have to reprogram the high resolution timer interrupt. On + * SMP we call the architecture specific code to retrigger _all_ high + * resolution timer interrupts. On UP we just disable interrupts and + * call the high resolution interrupt code. + */ +void clock_was_set(void) +{ + /* Retrigger the CPU local events everywhere */ + on_each_cpu(retrigger_next_event, NULL, 1); +} + +/* + * During resume we might have to reprogram the high resolution timer + * interrupt (on the local CPU): + */ +void hrtimers_resume(void) +{ + WARN_ONCE(!irqs_disabled(), + KERN_INFO "hrtimers_resume() called with IRQs enabled!"); + + retrigger_next_event(NULL); +} + static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) { #ifdef CONFIG_TIMER_STATS diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 8e6a05a5915..a61b8fa2d39 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -680,7 +680,7 @@ static void timekeeping_resume(void) clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL); /* Resume hrtimers */ - hres_timers_resume(); + hrtimers_resume(); } static int timekeeping_suspend(void) -- cgit v1.2.3 From 99ee5315dac6211e972fa3f23bcc9a0343ff58c4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 27 Apr 2011 14:16:42 +0200 Subject: timerfd: Allow timers to be cancelled when clock was set Some applications must be aware of clock realtime being set backward. A simple example is a clock applet which arms a timer for the next minute display. If clock realtime is set backward then the applet displays a stale time for the amount of time which the clock was set backwards. Due to that applications poll the time because we don't have an interface. Extend the timerfd interface by adding a flag which puts the timer onto a different internal realtime clock. All timers on this clock are expired whenever the clock was set. The timerfd core records the monotonic offset when the timer is created. When the timer is armed, then the current offset is compared to the previous recorded offset. When it has changed, then timerfd_settime returns -ECANCELED. When a timer is read the offset is compared and if it changed -ECANCELED returned to user space. Periodic timers are not rearmed in the cancelation case. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Cc: Chris Friesen Tested-by: Kay Sievers Cc: "Kirill A. Shutemov" Cc: Peter Zijlstra Cc: Davide Libenzi Reviewed-by: Alexander Shishkin Link: http://lkml.kernel.org/r/%3Calpine.LFD.2.02.1104271359580.3323%40ionos%3E Signed-off-by: Thomas Gleixner --- fs/timerfd.c | 57 ++++++++++++++++++++++++++++++++++++++++++----- include/linux/hrtimer.h | 2 ++ include/linux/time.h | 6 +++++ include/linux/timerfd.h | 3 ++- kernel/hrtimer.c | 36 +++++++++++++++++++++++++++++- kernel/time/timekeeping.c | 15 +++++++++++++ 6 files changed, 111 insertions(+), 8 deletions(-) diff --git a/fs/timerfd.c b/fs/timerfd.c index 8c4fc1425b3..7e14c9e7c4e 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -26,10 +26,12 @@ struct timerfd_ctx { struct hrtimer tmr; ktime_t tintv; + ktime_t moffs; wait_queue_head_t wqh; u64 ticks; int expired; int clockid; + bool might_cancel; }; /* @@ -59,24 +61,52 @@ static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; } -static void timerfd_setup(struct timerfd_ctx *ctx, int flags, - const struct itimerspec *ktmr) +static bool timerfd_canceled(struct timerfd_ctx *ctx) +{ + ktime_t moffs; + + if (!ctx->might_cancel) + return false; + + moffs = ktime_get_monotonic_offset(); + + if (moffs.tv64 == ctx->moffs.tv64) + return false; + + ctx->moffs = moffs; + return true; +} + +static int timerfd_setup(struct timerfd_ctx *ctx, int flags, + const struct itimerspec *ktmr) { enum hrtimer_mode htmode; ktime_t texp; + int clockid = ctx->clockid; htmode = (flags & TFD_TIMER_ABSTIME) ? HRTIMER_MODE_ABS: HRTIMER_MODE_REL; + ctx->might_cancel = false; + if (htmode == HRTIMER_MODE_ABS && ctx->clockid == CLOCK_REALTIME && + (flags & TFD_TIMER_CANCELON_SET)) { + clockid = CLOCK_REALTIME_COS; + ctx->might_cancel = true; + } + texp = timespec_to_ktime(ktmr->it_value); ctx->expired = 0; ctx->ticks = 0; ctx->tintv = timespec_to_ktime(ktmr->it_interval); - hrtimer_init(&ctx->tmr, ctx->clockid, htmode); + hrtimer_init(&ctx->tmr, clockid, htmode); hrtimer_set_expires(&ctx->tmr, texp); ctx->tmr.function = timerfd_tmrproc; - if (texp.tv64 != 0) + if (texp.tv64 != 0) { hrtimer_start(&ctx->tmr, texp, htmode); + if (timerfd_canceled(ctx)) + return -ECANCELED; + } + return 0; } static int timerfd_release(struct inode *inode, struct file *file) @@ -118,8 +148,21 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, res = -EAGAIN; else res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks); + if (ctx->ticks) { ticks = ctx->ticks; + + /* + * If clock has changed, we do not care about the + * ticks and we do not rearm the timer. Userspace must + * reevaluate anyway. + */ + if (timerfd_canceled(ctx)) { + ticks = 0; + ctx->expired = 0; + res = -ECANCELED; + } + if (ctx->expired && ctx->tintv.tv64) { /* * If tintv.tv64 != 0, this is a periodic timer that @@ -183,6 +226,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) init_waitqueue_head(&ctx->wqh); ctx->clockid = clockid; hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS); + ctx->moffs = ktime_get_monotonic_offset(); ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS)); @@ -199,6 +243,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, struct file *file; struct timerfd_ctx *ctx; struct itimerspec ktmr, kotmr; + int ret; if (copy_from_user(&ktmr, utmr, sizeof(ktmr))) return -EFAULT; @@ -240,14 +285,14 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, /* * Re-program the timer to the new value ... */ - timerfd_setup(ctx, flags, &ktmr); + ret = timerfd_setup(ctx, flags, &ktmr); spin_unlock_irq(&ctx->wqh.lock); fput(file); if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr))) return -EFAULT; - return 0; + return ret; } SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 4135c88fe4f..eda4ccde073 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -155,6 +155,7 @@ enum hrtimer_base_type { HRTIMER_BASE_REALTIME, HRTIMER_BASE_MONOTONIC, HRTIMER_BASE_BOOTTIME, + HRTIMER_BASE_REALTIME_COS, HRTIMER_MAX_CLOCK_BASES, }; @@ -310,6 +311,7 @@ extern void hrtimers_resume(void); extern ktime_t ktime_get(void); extern ktime_t ktime_get_real(void); extern ktime_t ktime_get_boottime(void); +extern ktime_t ktime_get_monotonic_offset(void); DECLARE_PER_CPU(struct tick_device, tick_cpu_device); diff --git a/include/linux/time.h b/include/linux/time.h index b3061782dec..a9242773eb2 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -302,6 +302,12 @@ struct itimerval { * The IDs of various hardware clocks: */ #define CLOCK_SGI_CYCLE 10 + +#ifdef __KERNEL__ +/* This clock is not exposed to user space */ +#define CLOCK_REALTIME_COS 15 +#endif + #define MAX_CLOCKS 16 #define CLOCKS_MASK (CLOCK_REALTIME | CLOCK_MONOTONIC) #define CLOCKS_MONO CLOCK_MONOTONIC diff --git a/include/linux/timerfd.h b/include/linux/timerfd.h index 2d0792983f8..e9571fc8f1a 100644 --- a/include/linux/timerfd.h +++ b/include/linux/timerfd.h @@ -19,6 +19,7 @@ * shared O_* flags. */ #define TFD_TIMER_ABSTIME (1 << 0) +#define TFD_TIMER_CANCELON_SET (1 << 1) #define TFD_CLOEXEC O_CLOEXEC #define TFD_NONBLOCK O_NONBLOCK @@ -26,6 +27,6 @@ /* Flags for timerfd_create. */ #define TFD_CREATE_FLAGS TFD_SHARED_FCNTL_FLAGS /* Flags for timerfd_settime. */ -#define TFD_SETTIME_FLAGS TFD_TIMER_ABSTIME +#define TFD_SETTIME_FLAGS (TFD_TIMER_ABSTIME | TFD_TIMER_CANCELON_SET) #endif /* _LINUX_TIMERFD_H */ diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index c145ed643bc..eabcbd78143 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -78,6 +78,11 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = .get_time = &ktime_get_boottime, .resolution = KTIME_LOW_RES, }, + { + .index = CLOCK_REALTIME_COS, + .get_time = &ktime_get_real, + .resolution = KTIME_LOW_RES, + }, } }; @@ -85,6 +90,7 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { [CLOCK_REALTIME] = HRTIMER_BASE_REALTIME, [CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC, [CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME, + [CLOCK_REALTIME_COS] = HRTIMER_BASE_REALTIME_COS, }; static inline int hrtimer_clockid_to_base(clockid_t clock_id) @@ -110,6 +116,7 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base) base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim; base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono; base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot; + base->clock_base[HRTIMER_BASE_REALTIME_COS].softirq_time = xtim; } /* @@ -479,6 +486,8 @@ static inline void debug_deactivate(struct hrtimer *timer) trace_hrtimer_cancel(timer); } +static void hrtimer_expire_cancelable(struct hrtimer_cpu_base *cpu_base); + /* High resolution timer related functions */ #ifdef CONFIG_HIGH_RES_TIMERS @@ -715,9 +724,14 @@ static void retrigger_next_event(void *arg) struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases); struct timespec realtime_offset, xtim, wtm, sleep; - if (!hrtimer_hres_active()) + if (!hrtimer_hres_active()) { + raw_spin_lock(&base->lock); + hrtimer_expire_cancelable(base); + raw_spin_unlock(&base->lock); return; + } + /* Optimized out for !HIGH_RES */ get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep); set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec); @@ -727,6 +741,10 @@ static void retrigger_next_event(void *arg) timespec_to_ktime(realtime_offset); base->clock_base[HRTIMER_BASE_BOOTTIME].offset = timespec_to_ktime(sleep); + base->clock_base[HRTIMER_BASE_REALTIME_COS].offset = + timespec_to_ktime(realtime_offset); + + hrtimer_expire_cancelable(base); hrtimer_force_reprogram(base, 0); raw_spin_unlock(&base->lock); @@ -1222,6 +1240,22 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now) timer->state &= ~HRTIMER_STATE_CALLBACK; } +static void hrtimer_expire_cancelable(struct hrtimer_cpu_base *cpu_base) +{ + struct timerqueue_node *node; + struct hrtimer_clock_base *base; + ktime_t now = ktime_get_real(); + + base = &cpu_base->clock_base[HRTIMER_BASE_REALTIME_COS]; + + while ((node = timerqueue_getnext(&base->active))) { + struct hrtimer *timer; + + timer = container_of(node, struct hrtimer, node); + __run_hrtimer(timer, &now); + } +} + #ifdef CONFIG_HIGH_RES_TIMERS /* diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index a61b8fa2d39..342408cf68d 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1098,6 +1098,21 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim, } while (read_seqretry(&xtime_lock, seq)); } +/** + * ktime_get_monotonic_offset() - get wall_to_monotonic in ktime_t format + */ +ktime_t ktime_get_monotonic_offset(void) +{ + unsigned long seq; + struct timespec wtom; + + do { + seq = read_seqbegin(&xtime_lock); + wtom = wall_to_monotonic; + } while (read_seqretry(&xtime_lock, seq)); + return timespec_to_ktime(wtom); +} + /** * xtime_update() - advances the timekeeping infrastructure * @ticks: number of ticks, that have elapsed since the last call. -- cgit v1.2.3 From ce788f930b0cdf821de7ee8f84cfe8cf7fcb6311 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 4 May 2011 08:00:47 +0200 Subject: alarmtimer: Check return value of class_find_device() alarmtimer_late_init() uses class_find_device() to find a alarm capable rtc device. The match callback stores a pointer to the name in the char pointer handed in from the call site. alarmtimer_late_init() checks the char pointer for NULL, but the pointer is on the stack and not initialized to NULL before the call. So it can have random content when the match function did not identify a device, which leads to random access in the following rtc_open() call where the pointer is dereferenced Instead of relying on the char pointer, check the return value of class_find_device. If a device is found then the name pointer is valid as well. Reported-by: Ingo Molnar Cc: John Stultz Signed-off-by: Thomas Gleixner --- kernel/time/alarmtimer.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 9265014cb4d..e5db9b00751 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -669,11 +669,13 @@ static int __init has_wakealarm(struct device *dev, void *name_ptr) */ static int __init alarmtimer_init_late(void) { + struct device *dev; char *str; /* Find an rtc device and init the rtc_timer */ - class_find_device(rtc_class, NULL, &str, has_wakealarm); - if (str) + dev = class_find_device(rtc_class, NULL, &str, has_wakealarm); + /* If we have a device then str is valid. See has_wakealarm() */ + if (dev) rtcdev = rtc_class_open(str); if (!rtcdev) { printk(KERN_WARNING "No RTC device found, ALARM timers will" -- cgit v1.2.3 From 179eb03268aa1da03d90f1566ea85dc1478d3ae3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 4 May 2011 08:18:34 +0200 Subject: alarmtimer: Drop device refcount after rtc_open() class_find_device() takes a refcount on the rtc device. rtc_open() takes another one, so we can drop it after the rtc_open() call. Signed-off-by: Thomas Gleixner Cc: John Stultz --- kernel/time/alarmtimer.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index e5db9b00751..c6027fe9a4e 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -675,8 +675,14 @@ static int __init alarmtimer_init_late(void) /* Find an rtc device and init the rtc_timer */ dev = class_find_device(rtc_class, NULL, &str, has_wakealarm); /* If we have a device then str is valid. See has_wakealarm() */ - if (dev) + if (dev) { rtcdev = rtc_class_open(str); + /* + * Drop the reference we got in class_find_device, + * rtc_open takes its own. + */ + put_device(dev); + } if (!rtcdev) { printk(KERN_WARNING "No RTC device found, ALARM timers will" " not wake from suspend"); -- cgit v1.2.3 From 7372b0b122af0f6675f3ab65bfd91c8a438e0480 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 4 May 2011 15:09:27 -0700 Subject: clockevents: Move C3 stop test outside lock Avoid taking broadcast_lock in the idle path for systems where the timer doesn't stop in C3. [ tglx: Removed the stale label and added comment ] Signed-off-by: Andi Kleen Cc: Dave Kleikamp Cc: Chris Mason Cc: Peter Zijlstra Cc: Tim Chen Cc: lenb@kernel.org Cc: paulmck@us.ibm.com Link: http://lkml.kernel.org/r/%3C20110504234806.GF2925%40one.firstfloor.org%3E Signed-off-by: Thomas Gleixner --- kernel/time/tick-broadcast.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index da800ffa810..827e0f862da 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -456,23 +456,27 @@ void tick_broadcast_oneshot_control(unsigned long reason) unsigned long flags; int cpu; - raw_spin_lock_irqsave(&tick_broadcast_lock, flags); - /* * Periodic mode does not care about the enter/exit of power * states */ if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) - goto out; + return; - bc = tick_broadcast_device.evtdev; + /* + * We are called with preemtion disabled from the depth of the + * idle code, so we can't be moved away. + */ cpu = smp_processor_id(); td = &per_cpu(tick_cpu_device, cpu); dev = td->evtdev; if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) - goto out; + return; + + bc = tick_broadcast_device.evtdev; + raw_spin_lock_irqsave(&tick_broadcast_lock, flags); if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) { if (!cpumask_test_cpu(cpu, tick_get_broadcast_oneshot_mask())) { cpumask_set_cpu(cpu, tick_get_broadcast_oneshot_mask()); @@ -489,8 +493,6 @@ void tick_broadcast_oneshot_control(unsigned long reason) tick_program_event(dev->next_event, 1); } } - -out: raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } -- cgit v1.2.3 From 9ec2690758a5467f24beb301cca5098078073bba Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 May 2011 16:18:50 +0200 Subject: timerfd: Manage cancelable timers in timerfd Peter is concerned about the extra scan of CLOCK_REALTIME_COS in the timer interrupt. Yes, I did not think about it, because the solution was so elegant. I didn't like the extra list in timerfd when it was proposed some time ago, but with a rcu based list the list walk it's less horrible than the original global lock, which was held over the list iteration. Requested-by: Peter Zijlstra Signed-off-by: Thomas Gleixner Reviewed-by: Peter Zijlstra --- fs/timerfd.c | 105 ++++++++++++++++++++++++++++++++++-------------- include/linux/hrtimer.h | 6 ++- include/linux/time.h | 6 --- include/linux/timerfd.h | 4 +- kernel/hrtimer.c | 94 +++++++++++++++---------------------------- 5 files changed, 113 insertions(+), 102 deletions(-) diff --git a/fs/timerfd.c b/fs/timerfd.c index 7e14c9e7c4e..f67acbdda5e 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -22,6 +22,7 @@ #include #include #include +#include struct timerfd_ctx { struct hrtimer tmr; @@ -31,9 +32,14 @@ struct timerfd_ctx { u64 ticks; int expired; int clockid; + struct rcu_head rcu; + struct list_head clist; bool might_cancel; }; +static LIST_HEAD(cancel_list); +static DEFINE_SPINLOCK(cancel_lock); + /* * This gets called when the timer event triggers. We set the "expired" * flag, but we do not re-arm the timer (in case it's necessary, @@ -53,28 +59,69 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) return HRTIMER_NORESTART; } -static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) +/* + * Called when the clock was set to cancel the timers in the cancel + * list. + */ +void timerfd_clock_was_set(void) { - ktime_t remaining; + ktime_t moffs = ktime_get_monotonic_offset(); + struct timerfd_ctx *ctx; + unsigned long flags; - remaining = hrtimer_expires_remaining(&ctx->tmr); - return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; + rcu_read_lock(); + list_for_each_entry_rcu(ctx, &cancel_list, clist) { + if (!ctx->might_cancel) + continue; + spin_lock_irqsave(&ctx->wqh.lock, flags); + if (ctx->moffs.tv64 != moffs.tv64) { + ctx->moffs.tv64 = KTIME_MAX; + wake_up_locked(&ctx->wqh); + } + spin_unlock_irqrestore(&ctx->wqh.lock, flags); + } + rcu_read_unlock(); } -static bool timerfd_canceled(struct timerfd_ctx *ctx) +static void timerfd_remove_cancel(struct timerfd_ctx *ctx) { - ktime_t moffs; + if (ctx->might_cancel) { + ctx->might_cancel = false; + spin_lock(&cancel_lock); + list_del_rcu(&ctx->clist); + spin_unlock(&cancel_lock); + } +} - if (!ctx->might_cancel) +static bool timerfd_canceled(struct timerfd_ctx *ctx) +{ + if (!ctx->might_cancel || ctx->moffs.tv64 != KTIME_MAX) return false; + ctx->moffs = ktime_get_monotonic_offset(); + return true; +} - moffs = ktime_get_monotonic_offset(); +static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags) +{ + if (ctx->clockid == CLOCK_REALTIME && (flags & TFD_TIMER_ABSTIME) && + (flags & TFD_TIMER_CANCEL_ON_SET)) { + if (!ctx->might_cancel) { + ctx->might_cancel = true; + spin_lock(&cancel_lock); + list_add_rcu(&ctx->clist, &cancel_list); + spin_unlock(&cancel_lock); + } + } else if (ctx->might_cancel) { + timerfd_remove_cancel(ctx); + } +} - if (moffs.tv64 == ctx->moffs.tv64) - return false; +static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) +{ + ktime_t remaining; - ctx->moffs = moffs; - return true; + remaining = hrtimer_expires_remaining(&ctx->tmr); + return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; } static int timerfd_setup(struct timerfd_ctx *ctx, int flags, @@ -87,13 +134,6 @@ static int timerfd_setup(struct timerfd_ctx *ctx, int flags, htmode = (flags & TFD_TIMER_ABSTIME) ? HRTIMER_MODE_ABS: HRTIMER_MODE_REL; - ctx->might_cancel = false; - if (htmode == HRTIMER_MODE_ABS && ctx->clockid == CLOCK_REALTIME && - (flags & TFD_TIMER_CANCELON_SET)) { - clockid = CLOCK_REALTIME_COS; - ctx->might_cancel = true; - } - texp = timespec_to_ktime(ktmr->it_value); ctx->expired = 0; ctx->ticks = 0; @@ -113,8 +153,9 @@ static int timerfd_release(struct inode *inode, struct file *file) { struct timerfd_ctx *ctx = file->private_data; + timerfd_remove_cancel(ctx); hrtimer_cancel(&ctx->tmr); - kfree(ctx); + kfree_rcu(ctx, rcu); return 0; } @@ -149,20 +190,20 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, else res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks); + /* + * If clock has changed, we do not care about the + * ticks and we do not rearm the timer. Userspace must + * reevaluate anyway. + */ + if (timerfd_canceled(ctx)) { + ctx->ticks = 0; + ctx->expired = 0; + res = -ECANCELED; + } + if (ctx->ticks) { ticks = ctx->ticks; - /* - * If clock has changed, we do not care about the - * ticks and we do not rearm the timer. Userspace must - * reevaluate anyway. - */ - if (timerfd_canceled(ctx)) { - ticks = 0; - ctx->expired = 0; - res = -ECANCELED; - } - if (ctx->expired && ctx->tintv.tv64) { /* * If tintv.tv64 != 0, this is a periodic timer that @@ -258,6 +299,8 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, return PTR_ERR(file); ctx = file->private_data; + timerfd_setup_cancel(ctx, flags); + /* * We need to stop the existing timer before reprogramming * it to the new values. diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index eda4ccde073..925c8c01db7 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -155,7 +155,6 @@ enum hrtimer_base_type { HRTIMER_BASE_REALTIME, HRTIMER_BASE_MONOTONIC, HRTIMER_BASE_BOOTTIME, - HRTIMER_BASE_REALTIME_COS, HRTIMER_MAX_CLOCK_BASES, }; @@ -306,6 +305,11 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer) #endif extern void clock_was_set(void); +#ifdef CONFIG_TIMERFD +extern void timerfd_clock_was_set(void); +#else +static inline void timerfd_clock_was_set(void) { } +#endif extern void hrtimers_resume(void); extern ktime_t ktime_get(void); diff --git a/include/linux/time.h b/include/linux/time.h index a9242773eb2..b3061782dec 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -302,12 +302,6 @@ struct itimerval { * The IDs of various hardware clocks: */ #define CLOCK_SGI_CYCLE 10 - -#ifdef __KERNEL__ -/* This clock is not exposed to user space */ -#define CLOCK_REALTIME_COS 15 -#endif - #define MAX_CLOCKS 16 #define CLOCKS_MASK (CLOCK_REALTIME | CLOCK_MONOTONIC) #define CLOCKS_MONO CLOCK_MONOTONIC diff --git a/include/linux/timerfd.h b/include/linux/timerfd.h index e9571fc8f1a..d3b57fa1222 100644 --- a/include/linux/timerfd.h +++ b/include/linux/timerfd.h @@ -19,7 +19,7 @@ * shared O_* flags. */ #define TFD_TIMER_ABSTIME (1 << 0) -#define TFD_TIMER_CANCELON_SET (1 << 1) +#define TFD_TIMER_CANCEL_ON_SET (1 << 1) #define TFD_CLOEXEC O_CLOEXEC #define TFD_NONBLOCK O_NONBLOCK @@ -27,6 +27,6 @@ /* Flags for timerfd_create. */ #define TFD_CREATE_FLAGS TFD_SHARED_FCNTL_FLAGS /* Flags for timerfd_settime. */ -#define TFD_SETTIME_FLAGS (TFD_TIMER_ABSTIME | TFD_TIMER_CANCELON_SET) +#define TFD_SETTIME_FLAGS (TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET) #endif /* _LINUX_TIMERFD_H */ diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index eabcbd78143..26dd32f9f6b 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -78,11 +78,6 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = .get_time = &ktime_get_boottime, .resolution = KTIME_LOW_RES, }, - { - .index = CLOCK_REALTIME_COS, - .get_time = &ktime_get_real, - .resolution = KTIME_LOW_RES, - }, } }; @@ -90,7 +85,6 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { [CLOCK_REALTIME] = HRTIMER_BASE_REALTIME, [CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC, [CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME, - [CLOCK_REALTIME_COS] = HRTIMER_BASE_REALTIME_COS, }; static inline int hrtimer_clockid_to_base(clockid_t clock_id) @@ -116,7 +110,6 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base) base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim; base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono; base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot; - base->clock_base[HRTIMER_BASE_REALTIME_COS].softirq_time = xtim; } /* @@ -486,8 +479,6 @@ static inline void debug_deactivate(struct hrtimer *timer) trace_hrtimer_cancel(timer); } -static void hrtimer_expire_cancelable(struct hrtimer_cpu_base *cpu_base); - /* High resolution timer related functions */ #ifdef CONFIG_HIGH_RES_TIMERS @@ -663,7 +654,33 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, return 0; } -static void retrigger_next_event(void *arg); +/* + * Retrigger next event is called after clock was set + * + * Called with interrupts disabled via on_each_cpu() + */ +static void retrigger_next_event(void *arg) +{ + struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases); + struct timespec realtime_offset, xtim, wtm, sleep; + + if (!hrtimer_hres_active()) + return; + + /* Optimized out for !HIGH_RES */ + get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep); + set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec); + + /* Adjust CLOCK_REALTIME offset */ + raw_spin_lock(&base->lock); + base->clock_base[HRTIMER_BASE_REALTIME].offset = + timespec_to_ktime(realtime_offset); + base->clock_base[HRTIMER_BASE_BOOTTIME].offset = + timespec_to_ktime(sleep); + + hrtimer_force_reprogram(base, 0); + raw_spin_unlock(&base->lock); +} /* * Switch to high resolution mode @@ -711,45 +728,10 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, return 0; } static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } +static inline void retrigger_next_event(void *arg) { } #endif /* CONFIG_HIGH_RES_TIMERS */ -/* - * Retrigger next event is called after clock was set - * - * Called with interrupts disabled via on_each_cpu() - */ -static void retrigger_next_event(void *arg) -{ - struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases); - struct timespec realtime_offset, xtim, wtm, sleep; - - if (!hrtimer_hres_active()) { - raw_spin_lock(&base->lock); - hrtimer_expire_cancelable(base); - raw_spin_unlock(&base->lock); - return; - } - - /* Optimized out for !HIGH_RES */ - get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep); - set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec); - - /* Adjust CLOCK_REALTIME offset */ - raw_spin_lock(&base->lock); - base->clock_base[HRTIMER_BASE_REALTIME].offset = - timespec_to_ktime(realtime_offset); - base->clock_base[HRTIMER_BASE_BOOTTIME].offset = - timespec_to_ktime(sleep); - base->clock_base[HRTIMER_BASE_REALTIME_COS].offset = - timespec_to_ktime(realtime_offset); - - hrtimer_expire_cancelable(base); - - hrtimer_force_reprogram(base, 0); - raw_spin_unlock(&base->lock); -} - /* * Clock realtime was set * @@ -763,8 +745,11 @@ static void retrigger_next_event(void *arg) */ void clock_was_set(void) { +#ifdef CONFIG_HIGHRES_TIMERS /* Retrigger the CPU local events everywhere */ on_each_cpu(retrigger_next_event, NULL, 1); +#endif + timerfd_clock_was_set(); } /* @@ -777,6 +762,7 @@ void hrtimers_resume(void) KERN_INFO "hrtimers_resume() called with IRQs enabled!"); retrigger_next_event(NULL); + timerfd_clock_was_set(); } static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) @@ -1240,22 +1226,6 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now) timer->state &= ~HRTIMER_STATE_CALLBACK; } -static void hrtimer_expire_cancelable(struct hrtimer_cpu_base *cpu_base) -{ - struct timerqueue_node *node; - struct hrtimer_clock_base *base; - ktime_t now = ktime_get_real(); - - base = &cpu_base->clock_base[HRTIMER_BASE_REALTIME_COS]; - - while ((node = timerqueue_getnext(&base->active))) { - struct hrtimer *timer; - - timer = container_of(node, struct hrtimer, node); - __run_hrtimer(timer, &now); - } -} - #ifdef CONFIG_HIGH_RES_TIMERS /* -- cgit v1.2.3 From f24444b01bf6c51c300fd3ffc73423383d747882 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 May 2011 13:02:58 +0200 Subject: hrtimers: Make struct hrtimer_cpu_base layout less stupid In the HIGHRES=y case we access the members at the end of struct hrtimer_cpu_base first and then the one at the beginning. Move the hrtimer data to front, so we have linear progressing access. Signed-off-by: Thomas Gleixner Reviewed-by: Peter Zijlstra --- include/linux/hrtimer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 925c8c01db7..cc5f5f51db1 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -174,7 +174,6 @@ enum hrtimer_base_type { */ struct hrtimer_cpu_base { raw_spinlock_t lock; - struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; #ifdef CONFIG_HIGH_RES_TIMERS ktime_t expires_next; int hres_active; @@ -184,6 +183,7 @@ struct hrtimer_cpu_base { unsigned long nr_hangs; ktime_t max_hang_time; #endif + struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; }; static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) -- cgit v1.2.3 From ab8177bc53e8ae3a3ba6d200ce2c2dae263f7ee5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 May 2011 13:05:15 +0200 Subject: hrtimers: Avoid touching inactive timer bases Instead of iterating over all possible timer bases avoid it by marking the active bases in the cpu base. Signed-off-by: Thomas Gleixner Reviewed-by: Peter Zijlstra --- include/linux/hrtimer.h | 7 +++++-- include/linux/thread_info.h | 2 +- kernel/hrtimer.c | 29 ++++++++++++++++++----------- kernel/posix-cpu-timers.c | 4 ++-- kernel/posix-timers.c | 2 +- kernel/time/alarmtimer.c | 4 ++-- 6 files changed, 29 insertions(+), 19 deletions(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index cc5f5f51db1..771c95802ed 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -143,7 +143,8 @@ struct hrtimer_sleeper { */ struct hrtimer_clock_base { struct hrtimer_cpu_base *cpu_base; - clockid_t index; + int index; + clockid_t clockid; struct timerqueue_head active; ktime_t resolution; ktime_t (*get_time)(void); @@ -162,7 +163,7 @@ enum hrtimer_base_type { * struct hrtimer_cpu_base - the per cpu clock bases * @lock: lock protecting the base and associated clock bases * and timers - * @clock_base: array of clock bases for this cpu + * @active_bases: Bitfield to mark bases with active timers * @expires_next: absolute time of the next event which was scheduled * via clock_set_next_event() * @hres_active: State of high resolution mode @@ -171,9 +172,11 @@ enum hrtimer_base_type { * @nr_retries: Total number of hrtimer interrupt retries * @nr_hangs: Total number of hrtimer interrupt hangs * @max_hang_time: Maximum time spent in hrtimer_interrupt + * @clock_base: array of clock bases for this cpu */ struct hrtimer_cpu_base { raw_spinlock_t lock; + unsigned long active_bases; #ifdef CONFIG_HIGH_RES_TIMERS ktime_t expires_next; int hres_active; diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 20fc303947d..8d03f079688 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -29,7 +29,7 @@ struct restart_block { } futex; /* For nanosleep */ struct { - clockid_t index; + clockid_t clockid; struct timespec __user *rmtp; #ifdef CONFIG_COMPAT struct compat_timespec __user *compat_rmtp; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 26dd32f9f6b..1b08f6d67f1 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -64,17 +64,20 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = .clock_base = { { - .index = CLOCK_REALTIME, + .index = HRTIMER_BASE_REALTIME, + .clockid = CLOCK_REALTIME, .get_time = &ktime_get_real, .resolution = KTIME_LOW_RES, }, { - .index = CLOCK_MONOTONIC, + .index = HRTIMER_BASE_MONOTONIC, + .clockid = CLOCK_MONOTONIC, .get_time = &ktime_get, .resolution = KTIME_LOW_RES, }, { - .index = CLOCK_BOOTTIME, + .index = HRTIMER_BASE_BOOTTIME, + .clockid = CLOCK_BOOTTIME, .get_time = &ktime_get_boottime, .resolution = KTIME_LOW_RES, }, @@ -196,7 +199,7 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, struct hrtimer_cpu_base *new_cpu_base; int this_cpu = smp_processor_id(); int cpu = hrtimer_get_target(this_cpu, pinned); - int basenum = hrtimer_clockid_to_base(base->index); + int basenum = base->index; again: new_cpu_base = &per_cpu(hrtimer_bases, cpu); @@ -857,6 +860,7 @@ static int enqueue_hrtimer(struct hrtimer *timer, debug_activate(timer); timerqueue_add(&base->active, &timer->node); + base->cpu_base->active_bases |= 1 << base->index; /* * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the @@ -898,6 +902,8 @@ static void __remove_hrtimer(struct hrtimer *timer, #endif } timerqueue_del(&base->active, &timer->node); + if (!timerqueue_getnext(&base->active)) + base->cpu_base->active_bases &= ~(1 << base->index); out: timer->state = newstate; } @@ -1235,7 +1241,6 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now) void hrtimer_interrupt(struct clock_event_device *dev) { struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); - struct hrtimer_clock_base *base; ktime_t expires_next, now, entry_time, delta; int i, retries = 0; @@ -1257,12 +1262,15 @@ retry: */ cpu_base->expires_next.tv64 = KTIME_MAX; - base = cpu_base->clock_base; - for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { - ktime_t basenow; + struct hrtimer_clock_base *base; struct timerqueue_node *node; + ktime_t basenow; + + if (!(cpu_base->active_bases & (1 << i))) + continue; + base = cpu_base->clock_base + i; basenow = ktime_add(now, base->offset); while ((node = timerqueue_getnext(&base->active))) { @@ -1295,7 +1303,6 @@ retry: __run_hrtimer(timer, &basenow); } - base++; } /* @@ -1526,7 +1533,7 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart) struct timespec __user *rmtp; int ret = 0; - hrtimer_init_on_stack(&t.timer, restart->nanosleep.index, + hrtimer_init_on_stack(&t.timer, restart->nanosleep.clockid, HRTIMER_MODE_ABS); hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); @@ -1578,7 +1585,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, restart = ¤t_thread_info()->restart_block; restart->fn = hrtimer_nanosleep_restart; - restart->nanosleep.index = t.timer.base->index; + restart->nanosleep.clockid = t.timer.base->clockid; restart->nanosleep.rmtp = rmtp; restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer); diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 0791b13df7b..58f405b581e 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -1514,7 +1514,7 @@ static int posix_cpu_nsleep(const clockid_t which_clock, int flags, return -EFAULT; restart_block->fn = posix_cpu_nsleep_restart; - restart_block->nanosleep.index = which_clock; + restart_block->nanosleep.clockid = which_clock; restart_block->nanosleep.rmtp = rmtp; restart_block->nanosleep.expires = timespec_to_ns(rqtp); } @@ -1523,7 +1523,7 @@ static int posix_cpu_nsleep(const clockid_t which_clock, int flags, static long posix_cpu_nsleep_restart(struct restart_block *restart_block) { - clockid_t which_clock = restart_block->nanosleep.index; + clockid_t which_clock = restart_block->nanosleep.clockid; struct timespec t; struct itimerspec it; int error; diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index e5498d7405c..a1b5edf1bf9 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -1056,7 +1056,7 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, */ long clock_nanosleep_restart(struct restart_block *restart_block) { - clockid_t which_clock = restart_block->nanosleep.index; + clockid_t which_clock = restart_block->nanosleep.clockid; struct k_clock *kc = clockid_to_kclock(which_clock); if (WARN_ON_ONCE(!kc || !kc->nsleep_restart)) diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index c6027fe9a4e..2d966244ea6 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -494,7 +494,7 @@ static int update_rmtp(ktime_t exp, enum alarmtimer_type type, */ static long __sched alarm_timer_nsleep_restart(struct restart_block *restart) { - enum alarmtimer_type type = restart->nanosleep.index; + enum alarmtimer_type type = restart->nanosleep.clockid; ktime_t exp; struct timespec __user *rmtp; struct alarm alarm; @@ -573,7 +573,7 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags, restart = ¤t_thread_info()->restart_block; restart->fn = alarm_timer_nsleep_restart; - restart->nanosleep.index = type; + restart->nanosleep.clockid = type; restart->nanosleep.expires = exp.tv64; restart->nanosleep.rmtp = rmtp; ret = -ERESTART_RESTARTBLOCK; -- cgit v1.2.3 From 68fa61c026057a39d6ccb850aa8785043afbee02 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 May 2011 23:14:04 +0200 Subject: hrtimers: Reorder clock bases The ordering of the clock bases is historical due to the CLOCK_REALTIME and CLOCK_MONOTONIC constants. Now the hrtimer bases have their own enumeration due to the gap between CLOCK_MONOTONIC and CLOCK_BOOTTIME. So we can be more clever as most timers end up on the CLOCK_MONOTONIC base due to the virtue of POSIX declaring that relative CLOCK_REALTIME timers are not affected by time changes. In desktop environments this is slowly changing as applications switch to absolute timers, but I've observed empty CLOCK_REALTIME bases often enough. There is no performance penalty or overhead when CLOCK_REALTIME timers are active, but in case they are not we don't skip over a full cache line. Signed-off-by: Thomas Gleixner Reviewed-by: Peter Zijlstra --- include/linux/hrtimer.h | 2 +- kernel/hrtimer.c | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 771c95802ed..51932e5acf7 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -153,8 +153,8 @@ struct hrtimer_clock_base { }; enum hrtimer_base_type { - HRTIMER_BASE_REALTIME, HRTIMER_BASE_MONOTONIC, + HRTIMER_BASE_REALTIME, HRTIMER_BASE_BOOTTIME, HRTIMER_MAX_CLOCK_BASES, }; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 1b08f6d67f1..c541ee527ec 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -63,18 +63,18 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = .clock_base = { - { - .index = HRTIMER_BASE_REALTIME, - .clockid = CLOCK_REALTIME, - .get_time = &ktime_get_real, - .resolution = KTIME_LOW_RES, - }, { .index = HRTIMER_BASE_MONOTONIC, .clockid = CLOCK_MONOTONIC, .get_time = &ktime_get, .resolution = KTIME_LOW_RES, }, + { + .index = HRTIMER_BASE_REALTIME, + .clockid = CLOCK_REALTIME, + .get_time = &ktime_get_real, + .resolution = KTIME_LOW_RES, + }, { .index = HRTIMER_BASE_BOOTTIME, .clockid = CLOCK_BOOTTIME, -- cgit v1.2.3