Creation of Cybook 2416 (actually Gen4) repository

This commit is contained in:
mlt
2009-12-18 17:10:00 +00:00
committed by godzil
commit 76f20f4d40
13791 changed files with 6812321 additions and 0 deletions

25
kernel/time/Kconfig Normal file
View File

@@ -0,0 +1,25 @@
#
# Timer subsystem related configuration options
#
config TICK_ONESHOT
bool
default n
config NO_HZ
bool "Tickless System (Dynamic Ticks)"
depends on GENERIC_TIME && GENERIC_CLOCKEVENTS
select TICK_ONESHOT
help
This option enables a tickless system: timer interrupts will
only trigger on an as-needed basis both when the system is
busy and when the system is idle.
config HIGH_RES_TIMERS
bool "High Resolution Timer Support"
depends on GENERIC_TIME && GENERIC_CLOCKEVENTS
select TICK_ONESHOT
help
This option enables high resolution timer support. If your
hardware is not capable then this option only increases
the size of the kernel image.

8
kernel/time/Makefile Normal file
View File

@@ -0,0 +1,8 @@
obj-y += ntp.o clocksource.o jiffies.o timer_list.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS) += clockevents.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o
obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o
obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o
obj-$(CONFIG_TIMER_STATS) += timer_stats.o

276
kernel/time/clockevents.c Normal file
View File

@@ -0,0 +1,276 @@
/*
* linux/kernel/time/clockevents.c
*
* This file contains functions which manage clock event devices.
*
* Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
* Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
* Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
*
* This code is licenced under the GPL version 2. For details see
* kernel-base/COPYING.
*/
#include <linux/clockchips.h>
#include <linux/hrtimer.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/notifier.h>
#include <linux/smp.h>
#include <linux/sysdev.h>
/* The registered clock event devices */
static LIST_HEAD(clockevent_devices);
static LIST_HEAD(clockevents_released);
/* Notification for clock events */
static RAW_NOTIFIER_HEAD(clockevents_chain);
/* Protection for the above */
static DEFINE_SPINLOCK(clockevents_lock);
/**
* clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds
* @latch: value to convert
* @evt: pointer to clock event device descriptor
*
* Math helper, returns latch value converted to nanoseconds (bound checked)
*/
unsigned long clockevent_delta2ns(unsigned long latch,
struct clock_event_device *evt)
{
u64 clc = ((u64) latch << evt->shift);
do_div(clc, evt->mult);
if (clc < 1000)
clc = 1000;
if (clc > LONG_MAX)
clc = LONG_MAX;
return (unsigned long) clc;
}
/**
* clockevents_set_mode - set the operating mode of a clock event device
* @dev: device to modify
* @mode: new mode
*
* Must be called with interrupts disabled !
*/
void clockevents_set_mode(struct clock_event_device *dev,
enum clock_event_mode mode)
{
if (dev->mode != mode) {
dev->set_mode(mode, dev);
dev->mode = mode;
}
}
/**
* clockevents_program_event - Reprogram the clock event device.
* @expires: absolute expiry time (monotonic clock)
*
* Returns 0 on success, -ETIME when the event is in the past.
*/
int clockevents_program_event(struct clock_event_device *dev, ktime_t expires,
ktime_t now)
{
unsigned long long clc;
int64_t delta;
delta = ktime_to_ns(ktime_sub(expires, now));
if (delta <= 0)
return -ETIME;
dev->next_event = expires;
if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN)
return 0;
if (delta > dev->max_delta_ns)
delta = dev->max_delta_ns;
if (delta < dev->min_delta_ns)
delta = dev->min_delta_ns;
clc = delta * dev->mult;
clc >>= dev->shift;
return dev->set_next_event((unsigned long) clc, dev);
}
/**
* clockevents_register_notifier - register a clock events change listener
*/
int clockevents_register_notifier(struct notifier_block *nb)
{
int ret;
spin_lock(&clockevents_lock);
ret = raw_notifier_chain_register(&clockevents_chain, nb);
spin_unlock(&clockevents_lock);
return ret;
}
/**
* clockevents_unregister_notifier - unregister a clock events change listener
*/
void clockevents_unregister_notifier(struct notifier_block *nb)
{
spin_lock(&clockevents_lock);
raw_notifier_chain_unregister(&clockevents_chain, nb);
spin_unlock(&clockevents_lock);
}
/*
* Notify about a clock event change. Called with clockevents_lock
* held.
*/
static void clockevents_do_notify(unsigned long reason, void *dev)
{
raw_notifier_call_chain(&clockevents_chain, reason, dev);
}
/*
* Called after a notify add to make devices availble which were
* released from the notifier call.
*/
static void clockevents_notify_released(void)
{
struct clock_event_device *dev;
while (!list_empty(&clockevents_released)) {
dev = list_entry(clockevents_released.next,
struct clock_event_device, list);
list_del(&dev->list);
list_add(&dev->list, &clockevent_devices);
clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev);
}
}
/**
* clockevents_register_device - register a clock event device
* @dev: device to register
*/
void clockevents_register_device(struct clock_event_device *dev)
{
BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
spin_lock(&clockevents_lock);
list_add(&dev->list, &clockevent_devices);
clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev);
clockevents_notify_released();
spin_unlock(&clockevents_lock);
}
/*
* Noop handler when we shut down an event device
*/
static void clockevents_handle_noop(struct clock_event_device *dev)
{
}
/**
* clockevents_exchange_device - release and request clock devices
* @old: device to release (can be NULL)
* @new: device to request (can be NULL)
*
* Called from the notifier chain. clockevents_lock is held already
*/
void clockevents_exchange_device(struct clock_event_device *old,
struct clock_event_device *new)
{
unsigned long flags;
local_irq_save(flags);
/*
* Caller releases a clock event device. We queue it into the
* released list and do a notify add later.
*/
if (old) {
old->event_handler = clockevents_handle_noop;
clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED);
list_del(&old->list);
list_add(&old->list, &clockevents_released);
}
if (new) {
BUG_ON(new->mode != CLOCK_EVT_MODE_UNUSED);
clockevents_set_mode(new, CLOCK_EVT_MODE_SHUTDOWN);
}
local_irq_restore(flags);
}
/**
* clockevents_request_device
*/
struct clock_event_device *clockevents_request_device(unsigned int features,
cpumask_t cpumask)
{
struct clock_event_device *cur, *dev = NULL;
struct list_head *tmp;
spin_lock(&clockevents_lock);
list_for_each(tmp, &clockevent_devices) {
cur = list_entry(tmp, struct clock_event_device, list);
if ((cur->features & features) == features &&
cpus_equal(cpumask, cur->cpumask)) {
if (!dev || dev->rating < cur->rating)
dev = cur;
}
}
clockevents_exchange_device(NULL, dev);
spin_unlock(&clockevents_lock);
return dev;
}
/**
* clockevents_release_device
*/
void clockevents_release_device(struct clock_event_device *dev)
{
spin_lock(&clockevents_lock);
clockevents_exchange_device(dev, NULL);
clockevents_notify_released();
spin_unlock(&clockevents_lock);
}
/**
* clockevents_notify - notification about relevant events
*/
void clockevents_notify(unsigned long reason, void *arg)
{
spin_lock(&clockevents_lock);
clockevents_do_notify(reason, arg);
switch (reason) {
case CLOCK_EVT_NOTIFY_CPU_DEAD:
/*
* Unregister the clock event devices which were
* released from the users in the notify chain.
*/
while (!list_empty(&clockevents_released)) {
struct clock_event_device *dev;
dev = list_entry(clockevents_released.next,
struct clock_event_device, list);
list_del(&dev->list);
}
break;
default:
break;
}
spin_unlock(&clockevents_lock);
}
EXPORT_SYMBOL_GPL(clockevents_notify);

522
kernel/time/clocksource.c Normal file
View File

@@ -0,0 +1,522 @@
/*
* linux/kernel/time/clocksource.c
*
* This file contains the functions which manage clocksource drivers.
*
* Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* TODO WishList:
* o Allow clocksource drivers to be unregistered
* o get rid of clocksource_jiffies extern
*/
#include <linux/clocksource.h>
#include <linux/sysdev.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
#include <linux/tick.h>
/* XXX - Would like a better way for initializing curr_clocksource */
extern struct clocksource clocksource_jiffies;
/*[Clocksource internal variables]---------
* curr_clocksource:
* currently selected clocksource. Initialized to clocksource_jiffies.
* next_clocksource:
* pending next selected clocksource.
* clocksource_list:
* linked list with the registered clocksources
* clocksource_lock:
* protects manipulations to curr_clocksource and next_clocksource
* and the clocksource_list
* override_name:
* Name of the user-specified clocksource.
*/
static struct clocksource *curr_clocksource = &clocksource_jiffies;
static struct clocksource *next_clocksource;
static struct clocksource *clocksource_override;
static LIST_HEAD(clocksource_list);
static DEFINE_SPINLOCK(clocksource_lock);
static char override_name[32];
static int finished_booting;
/* clocksource_done_booting - Called near the end of core bootup
*
* Hack to avoid lots of clocksource churn at boot time.
* We use fs_initcall because we want this to start before
* device_initcall but after subsys_initcall.
*/
static int __init clocksource_done_booting(void)
{
finished_booting = 1;
return 0;
}
fs_initcall(clocksource_done_booting);
#ifdef CONFIG_CLOCKSOURCE_WATCHDOG
static LIST_HEAD(watchdog_list);
static struct clocksource *watchdog;
static struct timer_list watchdog_timer;
static DEFINE_SPINLOCK(watchdog_lock);
static cycle_t watchdog_last;
static int watchdog_resumed;
/*
* Interval: 0.5sec Treshold: 0.0625s
*/
#define WATCHDOG_INTERVAL (HZ >> 1)
#define WATCHDOG_TRESHOLD (NSEC_PER_SEC >> 4)
static void clocksource_ratewd(struct clocksource *cs, int64_t delta)
{
if (delta > -WATCHDOG_TRESHOLD && delta < WATCHDOG_TRESHOLD)
return;
printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n",
cs->name, delta);
cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG);
clocksource_change_rating(cs, 0);
cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
list_del(&cs->wd_list);
}
static void clocksource_watchdog(unsigned long data)
{
struct clocksource *cs, *tmp;
cycle_t csnow, wdnow;
int64_t wd_nsec, cs_nsec;
int resumed;
spin_lock(&watchdog_lock);
resumed = watchdog_resumed;
if (unlikely(resumed))
watchdog_resumed = 0;
wdnow = watchdog->read();
wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask);
watchdog_last = wdnow;
list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) {
csnow = cs->read();
if (unlikely(resumed)) {
cs->wd_last = csnow;
continue;
}
/* Initialized ? */
if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) {
if ((cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) &&
(watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) {
cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
/*
* We just marked the clocksource as
* highres-capable, notify the rest of the
* system as well so that we transition
* into high-res mode:
*/
tick_clock_notify();
}
cs->flags |= CLOCK_SOURCE_WATCHDOG;
cs->wd_last = csnow;
} else {
cs_nsec = cyc2ns(cs, (csnow - cs->wd_last) & cs->mask);
cs->wd_last = csnow;
/* Check the delta. Might remove from the list ! */
clocksource_ratewd(cs, cs_nsec - wd_nsec);
}
}
if (!list_empty(&watchdog_list)) {
__mod_timer(&watchdog_timer,
watchdog_timer.expires + WATCHDOG_INTERVAL);
}
spin_unlock(&watchdog_lock);
}
static void clocksource_resume_watchdog(void)
{
spin_lock(&watchdog_lock);
watchdog_resumed = 1;
spin_unlock(&watchdog_lock);
}
static void clocksource_check_watchdog(struct clocksource *cs)
{
struct clocksource *cse;
unsigned long flags;
spin_lock_irqsave(&watchdog_lock, flags);
if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
int started = !list_empty(&watchdog_list);
list_add(&cs->wd_list, &watchdog_list);
if (!started && watchdog) {
watchdog_last = watchdog->read();
watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
add_timer(&watchdog_timer);
}
} else {
if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
if (!watchdog || cs->rating > watchdog->rating) {
if (watchdog)
del_timer(&watchdog_timer);
watchdog = cs;
init_timer(&watchdog_timer);
watchdog_timer.function = clocksource_watchdog;
/* Reset watchdog cycles */
list_for_each_entry(cse, &watchdog_list, wd_list)
cse->flags &= ~CLOCK_SOURCE_WATCHDOG;
/* Start if list is not empty */
if (!list_empty(&watchdog_list)) {
watchdog_last = watchdog->read();
watchdog_timer.expires =
jiffies + WATCHDOG_INTERVAL;
add_timer(&watchdog_timer);
}
}
}
spin_unlock_irqrestore(&watchdog_lock, flags);
}
#else
static void clocksource_check_watchdog(struct clocksource *cs)
{
if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
}
static inline void clocksource_resume_watchdog(void) { }
#endif
/**
* clocksource_resume - resume the clocksource(s)
*/
void clocksource_resume(void)
{
struct list_head *tmp;
unsigned long flags;
spin_lock_irqsave(&clocksource_lock, flags);
list_for_each(tmp, &clocksource_list) {
struct clocksource *cs;
cs = list_entry(tmp, struct clocksource, list);
if (cs->resume)
cs->resume();
}
clocksource_resume_watchdog();
spin_unlock_irqrestore(&clocksource_lock, flags);
}
/**
* clocksource_get_next - Returns the selected clocksource
*
*/
struct clocksource *clocksource_get_next(void)
{
unsigned long flags;
spin_lock_irqsave(&clocksource_lock, flags);
if (next_clocksource && finished_booting) {
curr_clocksource = next_clocksource;
next_clocksource = NULL;
}
spin_unlock_irqrestore(&clocksource_lock, flags);
return curr_clocksource;
}
/**
* select_clocksource - Selects the best registered clocksource.
*
* Private function. Must hold clocksource_lock when called.
*
* Select the clocksource with the best rating, or the clocksource,
* which is selected by userspace override.
*/
static struct clocksource *select_clocksource(void)
{
struct clocksource *next;
if (list_empty(&clocksource_list))
return NULL;
if (clocksource_override)
next = clocksource_override;
else
next = list_entry(clocksource_list.next, struct clocksource,
list);
if (next == curr_clocksource)
return NULL;
return next;
}
/*
* Enqueue the clocksource sorted by rating
*/
static int clocksource_enqueue(struct clocksource *c)
{
struct list_head *tmp, *entry = &clocksource_list;
list_for_each(tmp, &clocksource_list) {
struct clocksource *cs;
cs = list_entry(tmp, struct clocksource, list);
if (cs == c)
return -EBUSY;
/* Keep track of the place, where to insert */
if (cs->rating >= c->rating)
entry = tmp;
}
list_add(&c->list, entry);
if (strlen(c->name) == strlen(override_name) &&
!strcmp(c->name, override_name))
clocksource_override = c;
return 0;
}
/**
* clocksource_register - Used to install new clocksources
* @t: clocksource to be registered
*
* Returns -EBUSY if registration fails, zero otherwise.
*/
int clocksource_register(struct clocksource *c)
{
unsigned long flags;
int ret;
spin_lock_irqsave(&clocksource_lock, flags);
ret = clocksource_enqueue(c);
if (!ret)
next_clocksource = select_clocksource();
spin_unlock_irqrestore(&clocksource_lock, flags);
if (!ret)
clocksource_check_watchdog(c);
return ret;
}
EXPORT_SYMBOL(clocksource_register);
/**
* clocksource_change_rating - Change the rating of a registered clocksource
*
*/
void clocksource_change_rating(struct clocksource *cs, int rating)
{
unsigned long flags;
spin_lock_irqsave(&clocksource_lock, flags);
list_del(&cs->list);
cs->rating = rating;
clocksource_enqueue(cs);
next_clocksource = select_clocksource();
spin_unlock_irqrestore(&clocksource_lock, flags);
}
#ifdef CONFIG_SYSFS
/**
* sysfs_show_current_clocksources - sysfs interface for current clocksource
* @dev: unused
* @buf: char buffer to be filled with clocksource list
*
* Provides sysfs interface for listing current clocksource.
*/
static ssize_t
sysfs_show_current_clocksources(struct sys_device *dev, char *buf)
{
char *curr = buf;
spin_lock_irq(&clocksource_lock);
curr += sprintf(curr, "%s ", curr_clocksource->name);
spin_unlock_irq(&clocksource_lock);
curr += sprintf(curr, "\n");
return curr - buf;
}
/**
* sysfs_override_clocksource - interface for manually overriding clocksource
* @dev: unused
* @buf: name of override clocksource
* @count: length of buffer
*
* Takes input from sysfs interface for manually overriding the default
* clocksource selction.
*/
static ssize_t sysfs_override_clocksource(struct sys_device *dev,
const char *buf, size_t count)
{
struct clocksource *ovr = NULL;
struct list_head *tmp;
size_t ret = count;
int len;
/* strings from sysfs write are not 0 terminated! */
if (count >= sizeof(override_name))
return -EINVAL;
/* strip of \n: */
if (buf[count-1] == '\n')
count--;
spin_lock_irq(&clocksource_lock);
if (count > 0)
memcpy(override_name, buf, count);
override_name[count] = 0;
len = strlen(override_name);
if (len) {
ovr = clocksource_override;
/* try to select it: */
list_for_each(tmp, &clocksource_list) {
struct clocksource *cs;
cs = list_entry(tmp, struct clocksource, list);
if (strlen(cs->name) == len &&
!strcmp(cs->name, override_name))
ovr = cs;
}
}
/* Reselect, when the override name has changed */
if (ovr != clocksource_override) {
clocksource_override = ovr;
next_clocksource = select_clocksource();
}
spin_unlock_irq(&clocksource_lock);
return ret;
}
/**
* sysfs_show_available_clocksources - sysfs interface for listing clocksource
* @dev: unused
* @buf: char buffer to be filled with clocksource list
*
* Provides sysfs interface for listing registered clocksources
*/
static ssize_t
sysfs_show_available_clocksources(struct sys_device *dev, char *buf)
{
struct list_head *tmp;
char *curr = buf;
spin_lock_irq(&clocksource_lock);
list_for_each(tmp, &clocksource_list) {
struct clocksource *src;
src = list_entry(tmp, struct clocksource, list);
curr += sprintf(curr, "%s ", src->name);
}
spin_unlock_irq(&clocksource_lock);
curr += sprintf(curr, "\n");
return curr - buf;
}
/*
* Sysfs setup bits:
*/
static SYSDEV_ATTR(current_clocksource, 0600, sysfs_show_current_clocksources,
sysfs_override_clocksource);
static SYSDEV_ATTR(available_clocksource, 0600,
sysfs_show_available_clocksources, NULL);
static struct sysdev_class clocksource_sysclass = {
set_kset_name("clocksource"),
};
static struct sys_device device_clocksource = {
.id = 0,
.cls = &clocksource_sysclass,
};
static int __init init_clocksource_sysfs(void)
{
int error = sysdev_class_register(&clocksource_sysclass);
if (!error)
error = sysdev_register(&device_clocksource);
if (!error)
error = sysdev_create_file(
&device_clocksource,
&attr_current_clocksource);
if (!error)
error = sysdev_create_file(
&device_clocksource,
&attr_available_clocksource);
return error;
}
device_initcall(init_clocksource_sysfs);
#endif /* CONFIG_SYSFS */
/**
* boot_override_clocksource - boot clock override
* @str: override name
*
* Takes a clocksource= boot argument and uses it
* as the clocksource override name.
*/
static int __init boot_override_clocksource(char* str)
{
unsigned long flags;
spin_lock_irqsave(&clocksource_lock, flags);
if (str)
strlcpy(override_name, str, sizeof(override_name));
spin_unlock_irqrestore(&clocksource_lock, flags);
return 1;
}
__setup("clocksource=", boot_override_clocksource);
/**
* boot_override_clock - Compatibility layer for deprecated boot option
* @str: override name
*
* DEPRECATED! Takes a clock= boot argument and uses it
* as the clocksource override name
*/
static int __init boot_override_clock(char* str)
{
if (!strcmp(str, "pmtmr")) {
printk("Warning: clock=pmtmr is deprecated. "
"Use clocksource=acpi_pm.\n");
return boot_override_clocksource("acpi_pm");
}
printk("Warning! clock= boot option is deprecated. "
"Use clocksource=xyz\n");
return boot_override_clocksource(str);
}
__setup("clock=", boot_override_clock);

72
kernel/time/jiffies.c Normal file
View File

@@ -0,0 +1,72 @@
/***********************************************************************
* linux/kernel/time/jiffies.c
*
* This file contains the jiffies based clocksource.
*
* Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
************************************************************************/
#include <linux/clocksource.h>
#include <linux/jiffies.h>
#include <linux/init.h>
/* The Jiffies based clocksource is the lowest common
* denominator clock source which should function on
* all systems. It has the same coarse resolution as
* the timer interrupt frequency HZ and it suffers
* inaccuracies caused by missed or lost timer
* interrupts and the inability for the timer
* interrupt hardware to accuratly tick at the
* requested HZ value. It is also not reccomended
* for "tick-less" systems.
*/
#define NSEC_PER_JIFFY ((u32)((((u64)NSEC_PER_SEC)<<8)/ACTHZ))
/* Since jiffies uses a simple NSEC_PER_JIFFY multiplier
* conversion, the .shift value could be zero. However
* this would make NTP adjustments impossible as they are
* in units of 1/2^.shift. Thus we use JIFFIES_SHIFT to
* shift both the nominator and denominator the same
* amount, and give ntp adjustments in units of 1/2^8
*
* The value 8 is somewhat carefully chosen, as anything
* larger can result in overflows. NSEC_PER_JIFFY grows as
* HZ shrinks, so values greater then 8 overflow 32bits when
* HZ=100.
*/
#define JIFFIES_SHIFT 8
static cycle_t jiffies_read(void)
{
return (cycle_t) jiffies;
}
struct clocksource clocksource_jiffies = {
.name = "jiffies",
.rating = 1, /* lowest valid rating*/
.read = jiffies_read,
.mask = 0xffffffff, /*32bits*/
.mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
.shift = JIFFIES_SHIFT,
};
static int __init init_jiffies_clocksource(void)
{
return clocksource_register(&clocksource_jiffies);
}
core_initcall(init_jiffies_clocksource);

360
kernel/time/ntp.c Normal file
View File

@@ -0,0 +1,360 @@
/*
* linux/kernel/time/ntp.c
*
* NTP state machine interfaces and logic.
*
* This code was mainly moved from kernel/timer.c and kernel/time.c
* Please see those files for relevant copyright info and historical
* changelogs.
*/
#include <linux/mm.h>
#include <linux/time.h>
#include <linux/timex.h>
#include <asm/div64.h>
#include <asm/timex.h>
/*
* Timekeeping variables
*/
unsigned long tick_usec = TICK_USEC; /* USER_HZ period (usec) */
unsigned long tick_nsec; /* ACTHZ period (nsec) */
static u64 tick_length, tick_length_base;
#define MAX_TICKADJ 500 /* microsecs */
#define MAX_TICKADJ_SCALED (((u64)(MAX_TICKADJ * NSEC_PER_USEC) << \
TICK_LENGTH_SHIFT) / NTP_INTERVAL_FREQ)
/*
* phase-lock loop variables
*/
/* TIME_ERROR prevents overwriting the CMOS clock */
static int time_state = TIME_OK; /* clock synchronization status */
int time_status = STA_UNSYNC; /* clock status bits */
static s64 time_offset; /* time adjustment (ns) */
static long time_constant = 2; /* pll time constant */
long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */
long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */
long time_freq; /* frequency offset (scaled ppm)*/
static long time_reftime; /* time at last adjustment (s) */
long time_adjust;
#define CLOCK_TICK_OVERFLOW (LATCH * HZ - CLOCK_TICK_RATE)
#define CLOCK_TICK_ADJUST (((s64)CLOCK_TICK_OVERFLOW * NSEC_PER_SEC) / \
(s64)CLOCK_TICK_RATE)
static void ntp_update_frequency(void)
{
u64 second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ)
<< TICK_LENGTH_SHIFT;
second_length += (s64)CLOCK_TICK_ADJUST << TICK_LENGTH_SHIFT;
second_length += (s64)time_freq << (TICK_LENGTH_SHIFT - SHIFT_NSEC);
tick_length_base = second_length;
do_div(second_length, HZ);
tick_nsec = second_length >> TICK_LENGTH_SHIFT;
do_div(tick_length_base, NTP_INTERVAL_FREQ);
}
/**
* ntp_clear - Clears the NTP state variables
*
* Must be called while holding a write on the xtime_lock
*/
void ntp_clear(void)
{
time_adjust = 0; /* stop active adjtime() */
time_status |= STA_UNSYNC;
time_maxerror = NTP_PHASE_LIMIT;
time_esterror = NTP_PHASE_LIMIT;
ntp_update_frequency();
tick_length = tick_length_base;
time_offset = 0;
}
/*
* this routine handles the overflow of the microsecond field
*
* The tricky bits of code to handle the accurate clock support
* were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
* They were originally developed for SUN and DEC kernels.
* All the kudos should go to Dave for this stuff.
*/
void second_overflow(void)
{
long time_adj;
/* Bump the maxerror field */
time_maxerror += MAXFREQ >> SHIFT_USEC;
if (time_maxerror > NTP_PHASE_LIMIT) {
time_maxerror = NTP_PHASE_LIMIT;
time_status |= STA_UNSYNC;
}
/*
* Leap second processing. If in leap-insert state at the end of the
* day, the system clock is set back one second; if in leap-delete
* state, the system clock is set ahead one second. The microtime()
* routine or external clock driver will insure that reported time is
* always monotonic. The ugly divides should be replaced.
*/
switch (time_state) {
case TIME_OK:
if (time_status & STA_INS)
time_state = TIME_INS;
else if (time_status & STA_DEL)
time_state = TIME_DEL;
break;
case TIME_INS:
if (xtime.tv_sec % 86400 == 0) {
xtime.tv_sec--;
wall_to_monotonic.tv_sec++;
/*
* The timer interpolator will make time change
* gradually instead of an immediate jump by one second
*/
time_interpolator_update(-NSEC_PER_SEC);
time_state = TIME_OOP;
clock_was_set();
printk(KERN_NOTICE "Clock: inserting leap second "
"23:59:60 UTC\n");
}
break;
case TIME_DEL:
if ((xtime.tv_sec + 1) % 86400 == 0) {
xtime.tv_sec++;
wall_to_monotonic.tv_sec--;
/*
* Use of time interpolator for a gradual change of
* time
*/
time_interpolator_update(NSEC_PER_SEC);
time_state = TIME_WAIT;
clock_was_set();
printk(KERN_NOTICE "Clock: deleting leap second "
"23:59:59 UTC\n");
}
break;
case TIME_OOP:
time_state = TIME_WAIT;
break;
case TIME_WAIT:
if (!(time_status & (STA_INS | STA_DEL)))
time_state = TIME_OK;
}
/*
* Compute the phase adjustment for the next second. The offset is
* reduced by a fixed factor times the time constant.
*/
tick_length = tick_length_base;
time_adj = shift_right(time_offset, SHIFT_PLL + time_constant);
time_offset -= time_adj;
tick_length += (s64)time_adj << (TICK_LENGTH_SHIFT - SHIFT_UPDATE);
if (unlikely(time_adjust)) {
if (time_adjust > MAX_TICKADJ) {
time_adjust -= MAX_TICKADJ;
tick_length += MAX_TICKADJ_SCALED;
} else if (time_adjust < -MAX_TICKADJ) {
time_adjust += MAX_TICKADJ;
tick_length -= MAX_TICKADJ_SCALED;
} else {
tick_length += (s64)(time_adjust * NSEC_PER_USEC /
NTP_INTERVAL_FREQ) << TICK_LENGTH_SHIFT;
time_adjust = 0;
}
}
}
/*
* Return how long ticks are at the moment, that is, how much time
* update_wall_time_one_tick will add to xtime next time we call it
* (assuming no calls to do_adjtimex in the meantime).
* The return value is in fixed-point nanoseconds shifted by the
* specified number of bits to the right of the binary point.
* This function has no side-effects.
*/
u64 current_tick_length(void)
{
return tick_length;
}
void __attribute__ ((weak)) notify_arch_cmos_timer(void)
{
return;
}
/* adjtimex mainly allows reading (and writing, if superuser) of
* kernel time-keeping variables. used by xntpd.
*/
int do_adjtimex(struct timex *txc)
{
long mtemp, save_adjust, rem;
s64 freq_adj, temp64;
int result;
/* In order to modify anything, you gotta be super-user! */
if (txc->modes && !capable(CAP_SYS_TIME))
return -EPERM;
/* Now we validate the data before disabling interrupts */
if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT)
/* singleshot must not be used with any other mode bits */
if (txc->modes != ADJ_OFFSET_SINGLESHOT)
return -EINVAL;
if (txc->modes != ADJ_OFFSET_SINGLESHOT && (txc->modes & ADJ_OFFSET))
/* adjustment Offset limited to +- .512 seconds */
if (txc->offset <= - MAXPHASE || txc->offset >= MAXPHASE )
return -EINVAL;
/* if the quartz is off by more than 10% something is VERY wrong ! */
if (txc->modes & ADJ_TICK)
if (txc->tick < 900000/USER_HZ ||
txc->tick > 1100000/USER_HZ)
return -EINVAL;
write_seqlock_irq(&xtime_lock);
result = time_state; /* mostly `TIME_OK' */
/* Save for later - semantics of adjtime is to return old value */
save_adjust = time_adjust;
#if 0 /* STA_CLOCKERR is never set yet */
time_status &= ~STA_CLOCKERR; /* reset STA_CLOCKERR */
#endif
/* If there are input parameters, then process them */
if (txc->modes)
{
if (txc->modes & ADJ_STATUS) /* only set allowed bits */
time_status = (txc->status & ~STA_RONLY) |
(time_status & STA_RONLY);
if (txc->modes & ADJ_FREQUENCY) { /* p. 22 */
if (txc->freq > MAXFREQ || txc->freq < -MAXFREQ) {
result = -EINVAL;
goto leave;
}
time_freq = ((s64)txc->freq * NSEC_PER_USEC)
>> (SHIFT_USEC - SHIFT_NSEC);
}
if (txc->modes & ADJ_MAXERROR) {
if (txc->maxerror < 0 || txc->maxerror >= NTP_PHASE_LIMIT) {
result = -EINVAL;
goto leave;
}
time_maxerror = txc->maxerror;
}
if (txc->modes & ADJ_ESTERROR) {
if (txc->esterror < 0 || txc->esterror >= NTP_PHASE_LIMIT) {
result = -EINVAL;
goto leave;
}
time_esterror = txc->esterror;
}
if (txc->modes & ADJ_TIMECONST) { /* p. 24 */
if (txc->constant < 0) { /* NTP v4 uses values > 6 */
result = -EINVAL;
goto leave;
}
time_constant = min(txc->constant + 4, (long)MAXTC);
}
if (txc->modes & ADJ_OFFSET) { /* values checked earlier */
if (txc->modes == ADJ_OFFSET_SINGLESHOT) {
/* adjtime() is independent from ntp_adjtime() */
time_adjust = txc->offset;
}
else if (time_status & STA_PLL) {
time_offset = txc->offset * NSEC_PER_USEC;
/*
* Scale the phase adjustment and
* clamp to the operating range.
*/
time_offset = min(time_offset, (s64)MAXPHASE * NSEC_PER_USEC);
time_offset = max(time_offset, (s64)-MAXPHASE * NSEC_PER_USEC);
/*
* Select whether the frequency is to be controlled
* and in which mode (PLL or FLL). Clamp to the operating
* range. Ugly multiply/divide should be replaced someday.
*/
if (time_status & STA_FREQHOLD || time_reftime == 0)
time_reftime = xtime.tv_sec;
mtemp = xtime.tv_sec - time_reftime;
time_reftime = xtime.tv_sec;
freq_adj = time_offset * mtemp;
freq_adj = shift_right(freq_adj, time_constant * 2 +
(SHIFT_PLL + 2) * 2 - SHIFT_NSEC);
if (mtemp >= MINSEC && (time_status & STA_FLL || mtemp > MAXSEC)) {
temp64 = time_offset << (SHIFT_NSEC - SHIFT_FLL);
if (time_offset < 0) {
temp64 = -temp64;
do_div(temp64, mtemp);
freq_adj -= temp64;
} else {
do_div(temp64, mtemp);
freq_adj += temp64;
}
}
freq_adj += time_freq;
freq_adj = min(freq_adj, (s64)MAXFREQ_NSEC);
time_freq = max(freq_adj, (s64)-MAXFREQ_NSEC);
time_offset = div_long_long_rem_signed(time_offset,
NTP_INTERVAL_FREQ,
&rem);
time_offset <<= SHIFT_UPDATE;
} /* STA_PLL */
} /* txc->modes & ADJ_OFFSET */
if (txc->modes & ADJ_TICK)
tick_usec = txc->tick;
if (txc->modes & (ADJ_TICK|ADJ_FREQUENCY|ADJ_OFFSET))
ntp_update_frequency();
} /* txc->modes */
leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0)
result = TIME_ERROR;
if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT)
txc->offset = save_adjust;
else
txc->offset = ((long)shift_right(time_offset, SHIFT_UPDATE)) *
NTP_INTERVAL_FREQ / 1000;
txc->freq = (time_freq / NSEC_PER_USEC) <<
(SHIFT_USEC - SHIFT_NSEC);
txc->maxerror = time_maxerror;
txc->esterror = time_esterror;
txc->status = time_status;
txc->constant = time_constant;
txc->precision = 1;
txc->tolerance = MAXFREQ;
txc->tick = tick_usec;
/* PPS is not implemented, so these are zero */
txc->ppsfreq = 0;
txc->jitter = 0;
txc->shift = 0;
txc->stabil = 0;
txc->jitcnt = 0;
txc->calcnt = 0;
txc->errcnt = 0;
txc->stbcnt = 0;
write_sequnlock_irq(&xtime_lock);
do_gettimeofday(&txc->time);
notify_arch_cmos_timer();
return(result);
}

View File

@@ -0,0 +1,540 @@
/*
* linux/kernel/time/tick-broadcast.c
*
* This file contains functions which emulate a local clock-event
* device via a broadcast event source.
*
* Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
* Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
* Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
*
* This code is licenced under the GPL version 2. For details see
* kernel-base/COPYING.
*/
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/hrtimer.h>
#include <linux/irq.h>
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/sched.h>
#include <linux/tick.h>
#include "tick-internal.h"
/*
* Broadcast support for broken x86 hardware, where the local apic
* timer stops in C3 state.
*/
struct tick_device tick_broadcast_device;
static cpumask_t tick_broadcast_mask;
static DEFINE_SPINLOCK(tick_broadcast_lock);
/*
* Debugging: see timer_list.c
*/
struct tick_device *tick_get_broadcast_device(void)
{
return &tick_broadcast_device;
}
cpumask_t *tick_get_broadcast_mask(void)
{
return &tick_broadcast_mask;
}
/*
* Start the device in periodic mode
*/
static void tick_broadcast_start_periodic(struct clock_event_device *bc)
{
if (bc && bc->mode == CLOCK_EVT_MODE_SHUTDOWN)
tick_setup_periodic(bc, 1);
}
/*
* Check, if the device can be utilized as broadcast device:
*/
int tick_check_broadcast_device(struct clock_event_device *dev)
{
if (tick_broadcast_device.evtdev ||
(dev->features & CLOCK_EVT_FEAT_C3STOP))
return 0;
clockevents_exchange_device(NULL, dev);
tick_broadcast_device.evtdev = dev;
if (!cpus_empty(tick_broadcast_mask))
tick_broadcast_start_periodic(dev);
return 1;
}
/*
* Check, if the device is the broadcast device
*/
int tick_is_broadcast_device(struct clock_event_device *dev)
{
return (dev && tick_broadcast_device.evtdev == dev);
}
/*
* Check, if the device is disfunctional and a place holder, which
* needs to be handled by the broadcast device.
*/
int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
{
unsigned long flags;
int ret = 0;
spin_lock_irqsave(&tick_broadcast_lock, flags);
/*
* Devices might be registered with both periodic and oneshot
* mode disabled. This signals, that the device needs to be
* operated from the broadcast device and is a placeholder for
* the cpu local device.
*/
if (!tick_device_is_functional(dev)) {
dev->event_handler = tick_handle_periodic;
cpu_set(cpu, tick_broadcast_mask);
tick_broadcast_start_periodic(tick_broadcast_device.evtdev);
ret = 1;
}
spin_unlock_irqrestore(&tick_broadcast_lock, flags);
return ret;
}
/*
* Broadcast the event to the cpus, which are set in the mask
*/
int tick_do_broadcast(cpumask_t mask)
{
int ret = 0, cpu = smp_processor_id();
struct tick_device *td;
/*
* Check, if the current cpu is in the mask
*/
if (cpu_isset(cpu, mask)) {
cpu_clear(cpu, mask);
td = &per_cpu(tick_cpu_device, cpu);
td->evtdev->event_handler(td->evtdev);
ret = 1;
}
if (!cpus_empty(mask)) {
/*
* It might be necessary to actually check whether the devices
* have different broadcast functions. For now, just use the
* one of the first device. This works as long as we have this
* misfeature only on x86 (lapic)
*/
cpu = first_cpu(mask);
td = &per_cpu(tick_cpu_device, cpu);
td->evtdev->broadcast(mask);
ret = 1;
}
return ret;
}
/*
* Periodic broadcast:
* - invoke the broadcast handlers
*/
static void tick_do_periodic_broadcast(void)
{
cpumask_t mask;
spin_lock(&tick_broadcast_lock);
cpus_and(mask, cpu_online_map, tick_broadcast_mask);
tick_do_broadcast(mask);
spin_unlock(&tick_broadcast_lock);
}
/*
* Event handler for periodic broadcast ticks
*/
static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
{
dev->next_event.tv64 = KTIME_MAX;
tick_do_periodic_broadcast();
/*
* The device is in periodic mode. No reprogramming necessary:
*/
if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
return;
/*
* Setup the next period for devices, which do not have
* periodic mode:
*/
for (;;) {
ktime_t next = ktime_add(dev->next_event, tick_period);
if (!clockevents_program_event(dev, next, ktime_get()))
return;
tick_do_periodic_broadcast();
}
}
/*
* Powerstate information: The system enters/leaves a state, where
* affected devices might stop
*/
static void tick_do_broadcast_on_off(void *why)
{
struct clock_event_device *bc, *dev;
struct tick_device *td;
unsigned long flags, *reason = why;
int cpu;
spin_lock_irqsave(&tick_broadcast_lock, flags);
cpu = smp_processor_id();
td = &per_cpu(tick_cpu_device, cpu);
dev = td->evtdev;
bc = tick_broadcast_device.evtdev;
/*
* Is the device in broadcast mode forever or is it not
* affected by the powerstate ?
*/
if (!dev || !tick_device_is_functional(dev) ||
!(dev->features & CLOCK_EVT_FEAT_C3STOP))
goto out;
if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_ON) {
if (!cpu_isset(cpu, tick_broadcast_mask)) {
cpu_set(cpu, tick_broadcast_mask);
if (td->mode == TICKDEV_MODE_PERIODIC)
clockevents_set_mode(dev,
CLOCK_EVT_MODE_SHUTDOWN);
}
} else {
if (cpu_isset(cpu, tick_broadcast_mask)) {
cpu_clear(cpu, tick_broadcast_mask);
if (td->mode == TICKDEV_MODE_PERIODIC)
tick_setup_periodic(dev, 0);
}
}
if (cpus_empty(tick_broadcast_mask))
clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN);
else {
if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
tick_broadcast_start_periodic(bc);
else
tick_broadcast_setup_oneshot(bc);
}
out:
spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
/*
* Powerstate information: The system enters/leaves a state, where
* affected devices might stop.
*/
void tick_broadcast_on_off(unsigned long reason, int *oncpu)
{
int cpu = get_cpu();
if (!cpu_isset(*oncpu, cpu_online_map)) {
printk(KERN_ERR "tick-braodcast: ignoring broadcast for "
"offline CPU #%d\n", *oncpu);
} else {
if (cpu == *oncpu)
tick_do_broadcast_on_off(&reason);
else
smp_call_function_single(*oncpu,
tick_do_broadcast_on_off,
&reason, 1, 1);
}
put_cpu();
}
/*
* Set the periodic handler depending on broadcast on/off
*/
void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
{
if (!broadcast)
dev->event_handler = tick_handle_periodic;
else
dev->event_handler = tick_handle_periodic_broadcast;
}
/*
* Remove a CPU from broadcasting
*/
void tick_shutdown_broadcast(unsigned int *cpup)
{
struct clock_event_device *bc;
unsigned long flags;
unsigned int cpu = *cpup;
spin_lock_irqsave(&tick_broadcast_lock, flags);
bc = tick_broadcast_device.evtdev;
cpu_clear(cpu, tick_broadcast_mask);
if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
if (bc && cpus_empty(tick_broadcast_mask))
clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN);
}
spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
void tick_suspend_broadcast(void)
{
struct clock_event_device *bc;
unsigned long flags;
spin_lock_irqsave(&tick_broadcast_lock, flags);
bc = tick_broadcast_device.evtdev;
if (bc && tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN);
spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
int tick_resume_broadcast(void)
{
struct clock_event_device *bc;
unsigned long flags;
int broadcast = 0;
spin_lock_irqsave(&tick_broadcast_lock, flags);
bc = tick_broadcast_device.evtdev;
if (bc) {
switch (tick_broadcast_device.mode) {
case TICKDEV_MODE_PERIODIC:
if(!cpus_empty(tick_broadcast_mask))
tick_broadcast_start_periodic(bc);
broadcast = cpu_isset(smp_processor_id(),
tick_broadcast_mask);
break;
case TICKDEV_MODE_ONESHOT:
broadcast = tick_resume_broadcast_oneshot(bc);
break;
}
}
spin_unlock_irqrestore(&tick_broadcast_lock, flags);
return broadcast;
}
#ifdef CONFIG_TICK_ONESHOT
static cpumask_t tick_broadcast_oneshot_mask;
/*
* Debugging: see timer_list.c
*/
cpumask_t *tick_get_broadcast_oneshot_mask(void)
{
return &tick_broadcast_oneshot_mask;
}
static int tick_broadcast_set_event(ktime_t expires, int force)
{
struct clock_event_device *bc = tick_broadcast_device.evtdev;
ktime_t now = ktime_get();
int res;
for(;;) {
res = clockevents_program_event(bc, expires, now);
if (!res || !force)
return res;
now = ktime_get();
expires = ktime_add(now, ktime_set(0, bc->min_delta_ns));
}
}
int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
{
clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
if(!cpus_empty(tick_broadcast_oneshot_mask))
tick_broadcast_set_event(ktime_get(), 1);
return cpu_isset(smp_processor_id(), tick_broadcast_oneshot_mask);
}
/*
* Reprogram the broadcast device:
*
* Called with tick_broadcast_lock held and interrupts disabled.
*/
static int tick_broadcast_reprogram(void)
{
ktime_t expires = { .tv64 = KTIME_MAX };
struct tick_device *td;
int cpu;
/*
* Find the event which expires next:
*/
for (cpu = first_cpu(tick_broadcast_oneshot_mask); cpu != NR_CPUS;
cpu = next_cpu(cpu, tick_broadcast_oneshot_mask)) {
td = &per_cpu(tick_cpu_device, cpu);
if (td->evtdev->next_event.tv64 < expires.tv64)
expires = td->evtdev->next_event;
}
if (expires.tv64 == KTIME_MAX)
return 0;
return tick_broadcast_set_event(expires, 0);
}
/*
* Handle oneshot mode broadcasting
*/
static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
{
struct tick_device *td;
cpumask_t mask;
ktime_t now;
int cpu;
spin_lock(&tick_broadcast_lock);
again:
dev->next_event.tv64 = KTIME_MAX;
mask = CPU_MASK_NONE;
now = ktime_get();
/* Find all expired events */
for (cpu = first_cpu(tick_broadcast_oneshot_mask); cpu != NR_CPUS;
cpu = next_cpu(cpu, tick_broadcast_oneshot_mask)) {
td = &per_cpu(tick_cpu_device, cpu);
if (td->evtdev->next_event.tv64 <= now.tv64)
cpu_set(cpu, mask);
}
/*
* Wakeup the cpus which have an expired event. The broadcast
* device is reprogrammed in the return from idle code.
*/
if (!tick_do_broadcast(mask)) {
/*
* The global event did not expire any CPU local
* events. This happens in dyntick mode, as the
* maximum PIT delta is quite small.
*/
if (tick_broadcast_reprogram())
goto again;
}
spin_unlock(&tick_broadcast_lock);
}
/*
* Powerstate information: The system enters/leaves a state, where
* affected devices might stop
*/
void tick_broadcast_oneshot_control(unsigned long reason)
{
struct clock_event_device *bc, *dev;
struct tick_device *td;
unsigned long flags;
int cpu;
spin_lock_irqsave(&tick_broadcast_lock, flags);
/*
* Periodic mode does not care about the enter/exit of power
* states
*/
if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
goto out;
bc = tick_broadcast_device.evtdev;
cpu = smp_processor_id();
td = &per_cpu(tick_cpu_device, cpu);
dev = td->evtdev;
if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
goto out;
if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
if (!cpu_isset(cpu, tick_broadcast_oneshot_mask)) {
cpu_set(cpu, tick_broadcast_oneshot_mask);
clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
if (dev->next_event.tv64 < bc->next_event.tv64)
tick_broadcast_set_event(dev->next_event, 1);
}
} else {
if (cpu_isset(cpu, tick_broadcast_oneshot_mask)) {
cpu_clear(cpu, tick_broadcast_oneshot_mask);
clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
if (dev->next_event.tv64 != KTIME_MAX)
tick_program_event(dev->next_event, 1);
}
}
out:
spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
/**
* tick_broadcast_setup_highres - setup the broadcast device for highres
*/
void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
{
if (bc->mode != CLOCK_EVT_MODE_ONESHOT) {
bc->event_handler = tick_handle_oneshot_broadcast;
clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
bc->next_event.tv64 = KTIME_MAX;
}
}
/*
* Select oneshot operating mode for the broadcast device
*/
void tick_broadcast_switch_to_oneshot(void)
{
struct clock_event_device *bc;
unsigned long flags;
spin_lock_irqsave(&tick_broadcast_lock, flags);
tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
bc = tick_broadcast_device.evtdev;
if (bc)
tick_broadcast_setup_oneshot(bc);
spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
/*
* Remove a dead CPU from broadcasting
*/
void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
{
struct clock_event_device *bc;
unsigned long flags;
unsigned int cpu = *cpup;
spin_lock_irqsave(&tick_broadcast_lock, flags);
bc = tick_broadcast_device.evtdev;
cpu_clear(cpu, tick_broadcast_oneshot_mask);
if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT) {
if (bc && cpus_empty(tick_broadcast_oneshot_mask))
clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN);
}
spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
#endif

386
kernel/time/tick-common.c Normal file
View File

@@ -0,0 +1,386 @@
/*
* linux/kernel/time/tick-common.c
*
* This file contains the base functions to manage periodic tick
* related events.
*
* Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
* Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
* Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
*
* This code is licenced under the GPL version 2. For details see
* kernel-base/COPYING.
*/
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/hrtimer.h>
#include <linux/irq.h>
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/sched.h>
#include <linux/tick.h>
#include "tick-internal.h"
/*
* Tick devices
*/
DEFINE_PER_CPU(struct tick_device, tick_cpu_device);
/*
* Tick next event: keeps track of the tick time
*/
ktime_t tick_next_period;
ktime_t tick_period;
int tick_do_timer_cpu __read_mostly = -1;
DEFINE_SPINLOCK(tick_device_lock);
/*
* Debugging: see timer_list.c
*/
struct tick_device *tick_get_device(int cpu)
{
return &per_cpu(tick_cpu_device, cpu);
}
/**
* tick_is_oneshot_available - check for a oneshot capable event device
*/
int tick_is_oneshot_available(void)
{
struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
return dev && (dev->features & CLOCK_EVT_FEAT_ONESHOT);
}
/*
* Periodic tick
*/
static void tick_periodic(int cpu)
{
if (tick_do_timer_cpu == cpu) {
write_seqlock(&xtime_lock);
/* Keep track of the next tick event */
tick_next_period = ktime_add(tick_next_period, tick_period);
do_timer(1);
write_sequnlock(&xtime_lock);
}
update_process_times(user_mode(get_irq_regs()));
profile_tick(CPU_PROFILING);
}
/*
* Event handler for periodic ticks
*/
void tick_handle_periodic(struct clock_event_device *dev)
{
int cpu = smp_processor_id();
ktime_t next;
tick_periodic(cpu);
if (dev->mode != CLOCK_EVT_MODE_ONESHOT)
return;
/*
* Setup the next period for devices, which do not have
* periodic mode:
*/
next = ktime_add(dev->next_event, tick_period);
for (;;) {
if (!clockevents_program_event(dev, next, ktime_get()))
return;
tick_periodic(cpu);
next = ktime_add(next, tick_period);
}
}
/*
* Setup the device for a periodic tick
*/
void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
{
tick_set_periodic_handler(dev, broadcast);
/* Broadcast setup ? */
if (!tick_device_is_functional(dev))
return;
if (dev->features & CLOCK_EVT_FEAT_PERIODIC) {
clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC);
} else {
unsigned long seq;
ktime_t next;
do {
seq = read_seqbegin(&xtime_lock);
next = tick_next_period;
} while (read_seqretry(&xtime_lock, seq));
clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
for (;;) {
if (!clockevents_program_event(dev, next, ktime_get()))
return;
next = ktime_add(next, tick_period);
}
}
}
/*
* Setup the tick device
*/
static void tick_setup_device(struct tick_device *td,
struct clock_event_device *newdev, int cpu,
cpumask_t cpumask)
{
ktime_t next_event;
void (*handler)(struct clock_event_device *) = NULL;
/*
* First device setup ?
*/
if (!td->evtdev) {
/*
* If no cpu took the do_timer update, assign it to
* this cpu:
*/
if (tick_do_timer_cpu == -1) {
tick_do_timer_cpu = cpu;
tick_next_period = ktime_get();
tick_period = ktime_set(0, NSEC_PER_SEC / HZ);
}
/*
* Startup in periodic mode first.
*/
td->mode = TICKDEV_MODE_PERIODIC;
} else {
handler = td->evtdev->event_handler;
next_event = td->evtdev->next_event;
}
td->evtdev = newdev;
/*
* When the device is not per cpu, pin the interrupt to the
* current cpu:
*/
if (!cpus_equal(newdev->cpumask, cpumask))
irq_set_affinity(newdev->irq, cpumask);
/*
* When global broadcasting is active, check if the current
* device is registered as a placeholder for broadcast mode.
* This allows us to handle this x86 misfeature in a generic
* way.
*/
if (tick_device_uses_broadcast(newdev, cpu))
return;
if (td->mode == TICKDEV_MODE_PERIODIC)
tick_setup_periodic(newdev, 0);
else
tick_setup_oneshot(newdev, handler, next_event);
}
/*
* Check, if the new registered device should be used.
*/
static int tick_check_new_device(struct clock_event_device *newdev)
{
struct clock_event_device *curdev;
struct tick_device *td;
int cpu, ret = NOTIFY_OK;
unsigned long flags;
cpumask_t cpumask;
spin_lock_irqsave(&tick_device_lock, flags);
cpu = smp_processor_id();
if (!cpu_isset(cpu, newdev->cpumask))
goto out;
td = &per_cpu(tick_cpu_device, cpu);
curdev = td->evtdev;
cpumask = cpumask_of_cpu(cpu);
/* cpu local device ? */
if (!cpus_equal(newdev->cpumask, cpumask)) {
/*
* If the cpu affinity of the device interrupt can not
* be set, ignore it.
*/
if (!irq_can_set_affinity(newdev->irq))
goto out_bc;
/*
* If we have a cpu local device already, do not replace it
* by a non cpu local device
*/
if (curdev && cpus_equal(curdev->cpumask, cpumask))
goto out_bc;
}
/*
* If we have an active device, then check the rating and the oneshot
* feature.
*/
if (curdev) {
/*
* Prefer one shot capable devices !
*/
if ((curdev->features & CLOCK_EVT_FEAT_ONESHOT) &&
!(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
goto out_bc;
/*
* Check the rating
*/
if (curdev->rating >= newdev->rating)
goto out_bc;
}
/*
* Replace the eventually existing device by the new
* device. If the current device is the broadcast device, do
* not give it back to the clockevents layer !
*/
if (tick_is_broadcast_device(curdev)) {
clockevents_set_mode(curdev, CLOCK_EVT_MODE_SHUTDOWN);
curdev = NULL;
}
clockevents_exchange_device(curdev, newdev);
tick_setup_device(td, newdev, cpu, cpumask);
if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
tick_oneshot_notify();
spin_unlock_irqrestore(&tick_device_lock, flags);
return NOTIFY_STOP;
out_bc:
/*
* Can the new device be used as a broadcast device ?
*/
if (tick_check_broadcast_device(newdev))
ret = NOTIFY_STOP;
out:
spin_unlock_irqrestore(&tick_device_lock, flags);
return ret;
}
/*
* Shutdown an event device on a given cpu:
*
* This is called on a life CPU, when a CPU is dead. So we cannot
* access the hardware device itself.
* We just set the mode and remove it from the lists.
*/
static void tick_shutdown(unsigned int *cpup)
{
struct tick_device *td = &per_cpu(tick_cpu_device, *cpup);
struct clock_event_device *dev = td->evtdev;
unsigned long flags;
spin_lock_irqsave(&tick_device_lock, flags);
td->mode = TICKDEV_MODE_PERIODIC;
if (dev) {
/*
* Prevent that the clock events layer tries to call
* the set mode function!
*/
dev->mode = CLOCK_EVT_MODE_UNUSED;
clockevents_exchange_device(dev, NULL);
td->evtdev = NULL;
}
/* Transfer the do_timer job away from this cpu */
if (*cpup == tick_do_timer_cpu) {
int cpu = first_cpu(cpu_online_map);
tick_do_timer_cpu = (cpu != NR_CPUS) ? cpu : -1;
}
spin_unlock_irqrestore(&tick_device_lock, flags);
}
static void tick_suspend(void)
{
struct tick_device *td = &__get_cpu_var(tick_cpu_device);
unsigned long flags;
spin_lock_irqsave(&tick_device_lock, flags);
clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_SHUTDOWN);
spin_unlock_irqrestore(&tick_device_lock, flags);
}
static void tick_resume(void)
{
struct tick_device *td = &__get_cpu_var(tick_cpu_device);
unsigned long flags;
spin_lock_irqsave(&tick_device_lock, flags);
if (td->mode == TICKDEV_MODE_PERIODIC)
tick_setup_periodic(td->evtdev, 0);
else
tick_resume_oneshot();
spin_unlock_irqrestore(&tick_device_lock, flags);
}
/*
* Notification about clock event devices
*/
static int tick_notify(struct notifier_block *nb, unsigned long reason,
void *dev)
{
switch (reason) {
case CLOCK_EVT_NOTIFY_ADD:
return tick_check_new_device(dev);
case CLOCK_EVT_NOTIFY_BROADCAST_ON:
case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
tick_broadcast_on_off(reason, dev);
break;
case CLOCK_EVT_NOTIFY_BROADCAST_ENTER:
case CLOCK_EVT_NOTIFY_BROADCAST_EXIT:
tick_broadcast_oneshot_control(reason);
break;
case CLOCK_EVT_NOTIFY_CPU_DEAD:
tick_shutdown_broadcast_oneshot(dev);
tick_shutdown_broadcast(dev);
tick_shutdown(dev);
break;
case CLOCK_EVT_NOTIFY_SUSPEND:
tick_suspend();
tick_suspend_broadcast();
break;
case CLOCK_EVT_NOTIFY_RESUME:
if (!tick_resume_broadcast())
tick_resume();
break;
default:
break;
}
return NOTIFY_OK;
}
static struct notifier_block tick_notifier = {
.notifier_call = tick_notify,
};
/**
* tick_init - initialize the tick control
*
* Register the notifier with the clockevents framework
*/
void __init tick_init(void)
{
clockevents_register_notifier(&tick_notifier);
}

124
kernel/time/tick-internal.h Normal file
View File

@@ -0,0 +1,124 @@
/*
* tick internal variable and functions used by low/high res code
*/
DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
extern spinlock_t tick_device_lock;
extern ktime_t tick_next_period;
extern ktime_t tick_period;
extern int tick_do_timer_cpu __read_mostly;
extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast);
extern void tick_handle_periodic(struct clock_event_device *dev);
/*
* NO_HZ / high resolution timer shared code
*/
#ifdef CONFIG_TICK_ONESHOT
extern void tick_setup_oneshot(struct clock_event_device *newdev,
void (*handler)(struct clock_event_device *),
ktime_t nextevt);
extern int tick_program_event(ktime_t expires, int force);
extern void tick_oneshot_notify(void);
extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *));
extern void tick_resume_oneshot(void);
# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
extern void tick_broadcast_oneshot_control(unsigned long reason);
extern void tick_broadcast_switch_to_oneshot(void);
extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup);
extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc);
# else /* BROADCAST */
static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
{
BUG();
}
static inline void tick_broadcast_oneshot_control(unsigned long reason) { }
static inline void tick_broadcast_switch_to_oneshot(void) { }
static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
# endif /* !BROADCAST */
#else /* !ONESHOT */
static inline
void tick_setup_oneshot(struct clock_event_device *newdev,
void (*handler)(struct clock_event_device *),
ktime_t nextevt)
{
BUG();
}
static inline void tick_resume_oneshot(void)
{
BUG();
}
static inline int tick_program_event(ktime_t expires, int force)
{
return 0;
}
static inline void tick_oneshot_notify(void) { }
static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
{
BUG();
}
static inline void tick_broadcast_oneshot_control(unsigned long reason) { }
static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
{
return 0;
}
#endif /* !TICK_ONESHOT */
/*
* Broadcasting support
*/
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
extern int tick_do_broadcast(cpumask_t mask);
extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu);
extern int tick_check_broadcast_device(struct clock_event_device *dev);
extern int tick_is_broadcast_device(struct clock_event_device *dev);
extern void tick_broadcast_on_off(unsigned long reason, int *oncpu);
extern void tick_shutdown_broadcast(unsigned int *cpup);
extern void tick_suspend_broadcast(void);
extern int tick_resume_broadcast(void);
extern void
tick_set_periodic_handler(struct clock_event_device *dev, int broadcast);
#else /* !BROADCAST */
static inline int tick_check_broadcast_device(struct clock_event_device *dev)
{
return 0;
}
static inline int tick_is_broadcast_device(struct clock_event_device *dev)
{
return 0;
}
static inline int tick_device_uses_broadcast(struct clock_event_device *dev,
int cpu)
{
return 0;
}
static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { }
static inline void tick_broadcast_on_off(unsigned long reason, int *oncpu) { }
static inline void tick_shutdown_broadcast(unsigned int *cpup) { }
static inline void tick_suspend_broadcast(void) { }
static inline int tick_resume_broadcast(void) { return 0; }
/*
* Set the periodic handler in non broadcast mode
*/
static inline void tick_set_periodic_handler(struct clock_event_device *dev,
int broadcast)
{
dev->event_handler = tick_handle_periodic;
}
#endif /* !BROADCAST */
/*
* Check, if the device is functional or a dummy for broadcast
*/
static inline int tick_device_is_functional(struct clock_event_device *dev)
{
return !(dev->features & CLOCK_EVT_FEAT_DUMMY);
}

View File

@@ -0,0 +1,96 @@
/*
* linux/kernel/time/tick-oneshot.c
*
* This file contains functions which manage high resolution tick
* related events.
*
* Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
* Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
* Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
*
* This code is licenced under the GPL version 2. For details see
* kernel-base/COPYING.
*/
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/hrtimer.h>
#include <linux/irq.h>
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/sched.h>
#include <linux/tick.h>
#include "tick-internal.h"
/**
* tick_program_event
*/
int tick_program_event(ktime_t expires, int force)
{
struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
ktime_t now = ktime_get();
while (1) {
int ret = clockevents_program_event(dev, expires, now);
if (!ret || !force)
return ret;
now = ktime_get();
expires = ktime_add(now, ktime_set(0, dev->min_delta_ns));
}
}
/**
* tick_resume_onshot - resume oneshot mode
*/
void tick_resume_oneshot(void)
{
struct tick_device *td = &__get_cpu_var(tick_cpu_device);
struct clock_event_device *dev = td->evtdev;
clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
tick_program_event(ktime_get(), 1);
}
/**
* tick_setup_oneshot - setup the event device for oneshot mode (hres or nohz)
*/
void tick_setup_oneshot(struct clock_event_device *newdev,
void (*handler)(struct clock_event_device *),
ktime_t next_event)
{
newdev->event_handler = handler;
clockevents_set_mode(newdev, CLOCK_EVT_MODE_ONESHOT);
clockevents_program_event(newdev, next_event, ktime_get());
}
/**
* tick_switch_to_oneshot - switch to oneshot mode
*/
int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *))
{
struct tick_device *td = &__get_cpu_var(tick_cpu_device);
struct clock_event_device *dev = td->evtdev;
if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT) ||
!tick_device_is_functional(dev))
return -EINVAL;
td->mode = TICKDEV_MODE_ONESHOT;
dev->event_handler = handler;
clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
tick_broadcast_switch_to_oneshot();
return 0;
}
#ifdef CONFIG_HIGH_RES_TIMERS
/**
* tick_init_highres - switch to high resolution mode
*
* Called with interrupts disabled.
*/
int tick_init_highres(void)
{
return tick_switch_to_oneshot(hrtimer_interrupt);
}
#endif

611
kernel/time/tick-sched.c Normal file
View File

@@ -0,0 +1,611 @@
/*
* linux/kernel/time/tick-sched.c
*
* Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
* Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
* Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner
*
* No idle tick implementation for low and high resolution timers
*
* Started by: Thomas Gleixner and Ingo Molnar
*
* For licencing details see kernel-base/COPYING
*/
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/hrtimer.h>
#include <linux/interrupt.h>
#include <linux/kernel_stat.h>
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/sched.h>
#include <linux/tick.h>
#include <asm/irq_regs.h>
#include "tick-internal.h"
/*
* Per cpu nohz control structure
*/
static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
/*
* The time, when the last jiffy update happened. Protected by xtime_lock.
*/
static ktime_t last_jiffies_update;
struct tick_sched *tick_get_tick_sched(int cpu)
{
return &per_cpu(tick_cpu_sched, cpu);
}
/*
* Must be called with interrupts disabled !
*/
static void tick_do_update_jiffies64(ktime_t now)
{
unsigned long ticks = 0;
ktime_t delta;
/* Reevalute with xtime_lock held */
write_seqlock(&xtime_lock);
delta = ktime_sub(now, last_jiffies_update);
if (delta.tv64 >= tick_period.tv64) {
delta = ktime_sub(delta, tick_period);
last_jiffies_update = ktime_add(last_jiffies_update,
tick_period);
/* Slow path for long timeouts */
if (unlikely(delta.tv64 >= tick_period.tv64)) {
s64 incr = ktime_to_ns(tick_period);
ticks = ktime_divns(delta, incr);
last_jiffies_update = ktime_add_ns(last_jiffies_update,
incr * ticks);
}
do_timer(++ticks);
}
write_sequnlock(&xtime_lock);
}
/*
* Initialize and return retrieve the jiffies update.
*/
static ktime_t tick_init_jiffy_update(void)
{
ktime_t period;
write_seqlock(&xtime_lock);
/* Did we start the jiffies update yet ? */
if (last_jiffies_update.tv64 == 0)
last_jiffies_update = tick_next_period;
period = last_jiffies_update;
write_sequnlock(&xtime_lock);
return period;
}
/*
* NOHZ - aka dynamic tick functionality
*/
#ifdef CONFIG_NO_HZ
/*
* NO HZ enabled ?
*/
static int tick_nohz_enabled __read_mostly = 1;
/*
* Enable / Disable tickless mode
*/
static int __init setup_tick_nohz(char *str)
{
if (!strcmp(str, "off"))
tick_nohz_enabled = 0;
else if (!strcmp(str, "on"))
tick_nohz_enabled = 1;
else
return 0;
return 1;
}
__setup("nohz=", setup_tick_nohz);
/**
* tick_nohz_update_jiffies - update jiffies when idle was interrupted
*
* Called from interrupt entry when the CPU was idle
*
* In case the sched_tick was stopped on this CPU, we have to check if jiffies
* must be updated. Otherwise an interrupt handler could use a stale jiffy
* value. We do this unconditionally on any cpu, as we don't know whether the
* cpu, which has the update task assigned is in a long sleep.
*/
void tick_nohz_update_jiffies(void)
{
int cpu = smp_processor_id();
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
unsigned long flags;
ktime_t now;
if (!ts->tick_stopped)
return;
cpu_clear(cpu, nohz_cpu_mask);
now = ktime_get();
local_irq_save(flags);
tick_do_update_jiffies64(now);
local_irq_restore(flags);
}
/**
* tick_nohz_stop_sched_tick - stop the idle tick from the idle task
*
* When the next event is more than a tick into the future, stop the idle tick
* Called either from the idle loop or from irq_exit() when an idle period was
* just interrupted by an interrupt which did not cause a reschedule.
*/
void tick_nohz_stop_sched_tick(void)
{
unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
struct tick_sched *ts;
ktime_t last_update, expires, now, delta;
int cpu;
local_irq_save(flags);
cpu = smp_processor_id();
ts = &per_cpu(tick_cpu_sched, cpu);
if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
goto end;
if (need_resched())
goto end;
cpu = smp_processor_id();
if (unlikely(local_softirq_pending())) {
static int ratelimit;
if (ratelimit < 10) {
printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
local_softirq_pending());
ratelimit++;
}
}
now = ktime_get();
/*
* When called from irq_exit we need to account the idle sleep time
* correctly.
*/
if (ts->tick_stopped) {
delta = ktime_sub(now, ts->idle_entrytime);
ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
}
ts->idle_entrytime = now;
ts->idle_calls++;
/* Read jiffies and the time when jiffies were updated last */
do {
seq = read_seqbegin(&xtime_lock);
last_update = last_jiffies_update;
last_jiffies = jiffies;
} while (read_seqretry(&xtime_lock, seq));
/* Get the next timer wheel timer */
next_jiffies = get_next_timer_interrupt(last_jiffies);
delta_jiffies = next_jiffies - last_jiffies;
if (rcu_needs_cpu(cpu))
delta_jiffies = 1;
/*
* Do not stop the tick, if we are only one off
* or if the cpu is required for rcu
*/
if (!ts->tick_stopped && delta_jiffies == 1)
goto out;
/* Schedule the tick, if we are at least one jiffie off */
if ((long)delta_jiffies >= 1) {
if (delta_jiffies > 1)
cpu_set(cpu, nohz_cpu_mask);
/*
* nohz_stop_sched_tick can be called several times before
* the nohz_restart_sched_tick is called. This happens when
* interrupts arrive which do not cause a reschedule. In the
* first call we save the current tick time, so we can restart
* the scheduler tick in nohz_restart_sched_tick.
*/
if (!ts->tick_stopped) {
ts->idle_tick = ts->sched_timer.expires;
ts->tick_stopped = 1;
ts->idle_jiffies = last_jiffies;
}
/*
* If this cpu is the one which updates jiffies, then
* give up the assignment and let it be taken by the
* cpu which runs the tick timer next, which might be
* this cpu as well. If we don't drop this here the
* jiffies might be stale and do_timer() never
* invoked.
*/
if (cpu == tick_do_timer_cpu)
tick_do_timer_cpu = -1;
/*
* calculate the expiry time for the next timer wheel
* timer
*/
expires = ktime_add_ns(last_update, tick_period.tv64 *
delta_jiffies);
ts->idle_expires = expires;
ts->idle_sleeps++;
if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
hrtimer_start(&ts->sched_timer, expires,
HRTIMER_MODE_ABS);
/* Check, if the timer was already in the past */
if (hrtimer_active(&ts->sched_timer))
goto out;
} else if(!tick_program_event(expires, 0))
goto out;
/*
* We are past the event already. So we crossed a
* jiffie boundary. Update jiffies and raise the
* softirq.
*/
tick_do_update_jiffies64(ktime_get());
cpu_clear(cpu, nohz_cpu_mask);
}
raise_softirq_irqoff(TIMER_SOFTIRQ);
out:
ts->next_jiffies = next_jiffies;
ts->last_jiffies = last_jiffies;
end:
local_irq_restore(flags);
}
/**
* nohz_restart_sched_tick - restart the idle tick from the idle task
*
* Restart the idle tick when the CPU is woken up from idle
*/
void tick_nohz_restart_sched_tick(void)
{
int cpu = smp_processor_id();
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
unsigned long ticks;
ktime_t now, delta;
if (!ts->tick_stopped)
return;
/* Update jiffies first */
now = ktime_get();
local_irq_disable();
tick_do_update_jiffies64(now);
cpu_clear(cpu, nohz_cpu_mask);
/* Account the idle time */
delta = ktime_sub(now, ts->idle_entrytime);
ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
/*
* We stopped the tick in idle. Update process times would miss the
* time we slept as update_process_times does only a 1 tick
* accounting. Enforce that this is accounted to idle !
*/
ticks = jiffies - ts->idle_jiffies;
/*
* We might be one off. Do not randomly account a huge number of ticks!
*/
if (ticks && ticks < LONG_MAX) {
add_preempt_count(HARDIRQ_OFFSET);
account_system_time(current, HARDIRQ_OFFSET,
jiffies_to_cputime(ticks));
sub_preempt_count(HARDIRQ_OFFSET);
}
/*
* Cancel the scheduled timer and restore the tick
*/
ts->tick_stopped = 0;
hrtimer_cancel(&ts->sched_timer);
ts->sched_timer.expires = ts->idle_tick;
while (1) {
/* Forward the time to expire in the future */
hrtimer_forward(&ts->sched_timer, now, tick_period);
if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
hrtimer_start(&ts->sched_timer,
ts->sched_timer.expires,
HRTIMER_MODE_ABS);
/* Check, if the timer was already in the past */
if (hrtimer_active(&ts->sched_timer))
break;
} else {
if (!tick_program_event(ts->sched_timer.expires, 0))
break;
}
/* Update jiffies and reread time */
tick_do_update_jiffies64(now);
now = ktime_get();
}
local_irq_enable();
}
static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now)
{
hrtimer_forward(&ts->sched_timer, now, tick_period);
return tick_program_event(ts->sched_timer.expires, 0);
}
/*
* The nohz low res interrupt handler
*/
static void tick_nohz_handler(struct clock_event_device *dev)
{
struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
struct pt_regs *regs = get_irq_regs();
int cpu = smp_processor_id();
ktime_t now = ktime_get();
dev->next_event.tv64 = KTIME_MAX;
/*
* Check if the do_timer duty was dropped. We don't care about
* concurrency: This happens only when the cpu in charge went
* into a long sleep. If two cpus happen to assign themself to
* this duty, then the jiffies update is still serialized by
* xtime_lock.
*/
if (unlikely(tick_do_timer_cpu == -1))
tick_do_timer_cpu = cpu;
/* Check, if the jiffies need an update */
if (tick_do_timer_cpu == cpu)
tick_do_update_jiffies64(now);
/*
* When we are idle and the tick is stopped, we have to touch
* the watchdog as we might not schedule for a really long
* time. This happens on complete idle SMP systems while
* waiting on the login prompt. We also increment the "start
* of idle" jiffy stamp so the idle accounting adjustment we
* do when we go busy again does not account too much ticks.
*/
if (ts->tick_stopped) {
touch_softlockup_watchdog();
ts->idle_jiffies++;
}
update_process_times(user_mode(regs));
profile_tick(CPU_PROFILING);
/* Do not restart, when we are in the idle loop */
if (ts->tick_stopped)
return;
while (tick_nohz_reprogram(ts, now)) {
now = ktime_get();
tick_do_update_jiffies64(now);
}
}
/**
* tick_nohz_switch_to_nohz - switch to nohz mode
*/
static void tick_nohz_switch_to_nohz(void)
{
struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
ktime_t next;
if (!tick_nohz_enabled)
return;
local_irq_disable();
if (tick_switch_to_oneshot(tick_nohz_handler)) {
local_irq_enable();
return;
}
ts->nohz_mode = NOHZ_MODE_LOWRES;
/*
* Recycle the hrtimer in ts, so we can share the
* hrtimer_forward with the highres code.
*/
hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
/* Get the next period */
next = tick_init_jiffy_update();
for (;;) {
ts->sched_timer.expires = next;
if (!tick_program_event(next, 0))
break;
next = ktime_add(next, tick_period);
}
local_irq_enable();
printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n",
smp_processor_id());
}
#else
static inline void tick_nohz_switch_to_nohz(void) { }
#endif /* NO_HZ */
/*
* High resolution timer specific code
*/
#ifdef CONFIG_HIGH_RES_TIMERS
/*
* We rearm the timer until we get disabled by the idle code
* Called with interrupts disabled and timer->base->cpu_base->lock held.
*/
static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
{
struct tick_sched *ts =
container_of(timer, struct tick_sched, sched_timer);
struct hrtimer_cpu_base *base = timer->base->cpu_base;
struct pt_regs *regs = get_irq_regs();
ktime_t now = ktime_get();
int cpu = smp_processor_id();
#ifdef CONFIG_NO_HZ
/*
* Check if the do_timer duty was dropped. We don't care about
* concurrency: This happens only when the cpu in charge went
* into a long sleep. If two cpus happen to assign themself to
* this duty, then the jiffies update is still serialized by
* xtime_lock.
*/
if (unlikely(tick_do_timer_cpu == -1))
tick_do_timer_cpu = cpu;
#endif
/* Check, if the jiffies need an update */
if (tick_do_timer_cpu == cpu)
tick_do_update_jiffies64(now);
/*
* Do not call, when we are not in irq context and have
* no valid regs pointer
*/
if (regs) {
/*
* When we are idle and the tick is stopped, we have to touch
* the watchdog as we might not schedule for a really long
* time. This happens on complete idle SMP systems while
* waiting on the login prompt. We also increment the "start of
* idle" jiffy stamp so the idle accounting adjustment we do
* when we go busy again does not account too much ticks.
*/
if (ts->tick_stopped) {
touch_softlockup_watchdog();
ts->idle_jiffies++;
}
/*
* update_process_times() might take tasklist_lock, hence
* drop the base lock. sched-tick hrtimers are per-CPU and
* never accessible by userspace APIs, so this is safe to do.
*/
spin_unlock(&base->lock);
update_process_times(user_mode(regs));
profile_tick(CPU_PROFILING);
spin_lock(&base->lock);
}
/* Do not restart, when we are in the idle loop */
if (ts->tick_stopped)
return HRTIMER_NORESTART;
hrtimer_forward(timer, now, tick_period);
return HRTIMER_RESTART;
}
/**
* tick_setup_sched_timer - setup the tick emulation timer
*/
void tick_setup_sched_timer(void)
{
struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
ktime_t now = ktime_get();
/*
* Emulate tick processing via per-CPU hrtimers:
*/
hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
ts->sched_timer.function = tick_sched_timer;
ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
/* Get the next period */
ts->sched_timer.expires = tick_init_jiffy_update();
for (;;) {
hrtimer_forward(&ts->sched_timer, now, tick_period);
hrtimer_start(&ts->sched_timer, ts->sched_timer.expires,
HRTIMER_MODE_ABS);
/* Check, if the timer was already in the past */
if (hrtimer_active(&ts->sched_timer))
break;
now = ktime_get();
}
#ifdef CONFIG_NO_HZ
if (tick_nohz_enabled)
ts->nohz_mode = NOHZ_MODE_HIGHRES;
#endif
}
void tick_cancel_sched_timer(int cpu)
{
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
if (ts->sched_timer.base)
hrtimer_cancel(&ts->sched_timer);
ts->tick_stopped = 0;
ts->nohz_mode = NOHZ_MODE_INACTIVE;
}
#endif /* HIGH_RES_TIMERS */
/**
* Async notification about clocksource changes
*/
void tick_clock_notify(void)
{
int cpu;
for_each_possible_cpu(cpu)
set_bit(0, &per_cpu(tick_cpu_sched, cpu).check_clocks);
}
/*
* Async notification about clock event changes
*/
void tick_oneshot_notify(void)
{
struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
set_bit(0, &ts->check_clocks);
}
/**
* Check, if a change happened, which makes oneshot possible.
*
* Called cyclic from the hrtimer softirq (driven by the timer
* softirq) allow_nohz signals, that we can switch into low-res nohz
* mode, because high resolution timers are disabled (either compile
* or runtime).
*/
int tick_check_oneshot_change(int allow_nohz)
{
struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
if (!test_and_clear_bit(0, &ts->check_clocks))
return 0;
if (ts->nohz_mode != NOHZ_MODE_INACTIVE)
return 0;
if (!timekeeping_is_continuous() || !tick_is_oneshot_available())
return 0;
if (!allow_nohz)
return 1;
tick_nohz_switch_to_nohz();
return 0;
}

287
kernel/time/timer_list.c Normal file
View File

@@ -0,0 +1,287 @@
/*
* kernel/time/timer_list.c
*
* List pending timers
*
* Copyright(C) 2006, Red Hat, Inc., Ingo Molnar
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/proc_fs.h>
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
#include <linux/kallsyms.h>
#include <linux/tick.h>
#include <asm/uaccess.h>
typedef void (*print_fn_t)(struct seq_file *m, unsigned int *classes);
DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
/*
* This allows printing both to /proc/timer_list and
* to the console (on SysRq-Q):
*/
#define SEQ_printf(m, x...) \
do { \
if (m) \
seq_printf(m, x); \
else \
printk(x); \
} while (0)
static void print_name_offset(struct seq_file *m, void *sym)
{
unsigned long addr = (unsigned long)sym;
char namebuf[KSYM_NAME_LEN+1];
unsigned long size, offset;
const char *sym_name;
char *modname;
sym_name = kallsyms_lookup(addr, &size, &offset, &modname, namebuf);
if (sym_name)
SEQ_printf(m, "%s", sym_name);
else
SEQ_printf(m, "<%p>", sym);
}
static void
print_timer(struct seq_file *m, struct hrtimer *timer, int idx, u64 now)
{
#ifdef CONFIG_TIMER_STATS
char tmp[TASK_COMM_LEN + 1];
#endif
SEQ_printf(m, " #%d: ", idx);
print_name_offset(m, timer);
SEQ_printf(m, ", ");
print_name_offset(m, timer->function);
SEQ_printf(m, ", S:%02lx", timer->state);
#ifdef CONFIG_TIMER_STATS
SEQ_printf(m, ", ");
print_name_offset(m, timer->start_site);
memcpy(tmp, timer->start_comm, TASK_COMM_LEN);
tmp[TASK_COMM_LEN] = 0;
SEQ_printf(m, ", %s/%d", tmp, timer->start_pid);
#endif
SEQ_printf(m, "\n");
SEQ_printf(m, " # expires at %Ld nsecs [in %Ld nsecs]\n",
(unsigned long long)ktime_to_ns(timer->expires),
(unsigned long long)(ktime_to_ns(timer->expires) - now));
}
static void
print_active_timers(struct seq_file *m, struct hrtimer_clock_base *base,
u64 now)
{
struct hrtimer *timer, tmp;
unsigned long next = 0, i;
struct rb_node *curr;
unsigned long flags;
next_one:
i = 0;
spin_lock_irqsave(&base->cpu_base->lock, flags);
curr = base->first;
/*
* Crude but we have to do this O(N*N) thing, because
* we have to unlock the base when printing:
*/
while (curr && i < next) {
curr = rb_next(curr);
i++;
}
if (curr) {
timer = rb_entry(curr, struct hrtimer, node);
tmp = *timer;
spin_unlock_irqrestore(&base->cpu_base->lock, flags);
print_timer(m, &tmp, i, now);
next++;
goto next_one;
}
spin_unlock_irqrestore(&base->cpu_base->lock, flags);
}
static void
print_base(struct seq_file *m, struct hrtimer_clock_base *base, u64 now)
{
SEQ_printf(m, " .index: %d\n",
base->index);
SEQ_printf(m, " .resolution: %Ld nsecs\n",
(unsigned long long)ktime_to_ns(base->resolution));
SEQ_printf(m, " .get_time: ");
print_name_offset(m, base->get_time);
SEQ_printf(m, "\n");
#ifdef CONFIG_HIGH_RES_TIMERS
SEQ_printf(m, " .offset: %Ld nsecs\n",
ktime_to_ns(base->offset));
#endif
SEQ_printf(m, "active timers:\n");
print_active_timers(m, base, now);
}
static void print_cpu(struct seq_file *m, int cpu, u64 now)
{
struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
int i;
SEQ_printf(m, "\ncpu: %d\n", cpu);
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
SEQ_printf(m, " clock %d:\n", i);
print_base(m, cpu_base->clock_base + i, now);
}
#define P(x) \
SEQ_printf(m, " .%-15s: %Ld\n", #x, (u64)(cpu_base->x))
#define P_ns(x) \
SEQ_printf(m, " .%-15s: %Ld nsecs\n", #x, \
(u64)(ktime_to_ns(cpu_base->x)))
#ifdef CONFIG_HIGH_RES_TIMERS
P_ns(expires_next);
P(hres_active);
P(nr_events);
#endif
#undef P
#undef P_ns
#ifdef CONFIG_TICK_ONESHOT
# define P(x) \
SEQ_printf(m, " .%-15s: %Ld\n", #x, (u64)(ts->x))
# define P_ns(x) \
SEQ_printf(m, " .%-15s: %Ld nsecs\n", #x, \
(u64)(ktime_to_ns(ts->x)))
{
struct tick_sched *ts = tick_get_tick_sched(cpu);
P(nohz_mode);
P_ns(idle_tick);
P(tick_stopped);
P(idle_jiffies);
P(idle_calls);
P(idle_sleeps);
P_ns(idle_entrytime);
P_ns(idle_sleeptime);
P(last_jiffies);
P(next_jiffies);
P_ns(idle_expires);
SEQ_printf(m, "jiffies: %Ld\n", (u64)jiffies);
}
#endif
#undef P
#undef P_ns
}
#ifdef CONFIG_GENERIC_CLOCKEVENTS
static void
print_tickdevice(struct seq_file *m, struct tick_device *td)
{
struct clock_event_device *dev = td->evtdev;
SEQ_printf(m, "\nTick Device: mode: %d\n", td->mode);
SEQ_printf(m, "Clock Event Device: ");
if (!dev) {
SEQ_printf(m, "<NULL>\n");
return;
}
SEQ_printf(m, "%s\n", dev->name);
SEQ_printf(m, " max_delta_ns: %lu\n", dev->max_delta_ns);
SEQ_printf(m, " min_delta_ns: %lu\n", dev->min_delta_ns);
SEQ_printf(m, " mult: %lu\n", dev->mult);
SEQ_printf(m, " shift: %d\n", dev->shift);
SEQ_printf(m, " mode: %d\n", dev->mode);
SEQ_printf(m, " next_event: %Ld nsecs\n",
(unsigned long long) ktime_to_ns(dev->next_event));
SEQ_printf(m, " set_next_event: ");
print_name_offset(m, dev->set_next_event);
SEQ_printf(m, "\n");
SEQ_printf(m, " set_mode: ");
print_name_offset(m, dev->set_mode);
SEQ_printf(m, "\n");
SEQ_printf(m, " event_handler: ");
print_name_offset(m, dev->event_handler);
SEQ_printf(m, "\n");
}
static void timer_list_show_tickdevices(struct seq_file *m)
{
int cpu;
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
print_tickdevice(m, tick_get_broadcast_device());
SEQ_printf(m, "tick_broadcast_mask: %08lx\n",
tick_get_broadcast_mask()->bits[0]);
#ifdef CONFIG_TICK_ONESHOT
SEQ_printf(m, "tick_broadcast_oneshot_mask: %08lx\n",
tick_get_broadcast_oneshot_mask()->bits[0]);
#endif
SEQ_printf(m, "\n");
#endif
for_each_online_cpu(cpu)
print_tickdevice(m, tick_get_device(cpu));
SEQ_printf(m, "\n");
}
#else
static void timer_list_show_tickdevices(struct seq_file *m) { }
#endif
static int timer_list_show(struct seq_file *m, void *v)
{
u64 now = ktime_to_ns(ktime_get());
int cpu;
SEQ_printf(m, "Timer List Version: v0.3\n");
SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
for_each_online_cpu(cpu)
print_cpu(m, cpu, now);
SEQ_printf(m, "\n");
timer_list_show_tickdevices(m);
return 0;
}
void sysrq_timer_list_show(void)
{
timer_list_show(NULL, NULL);
}
static int timer_list_open(struct inode *inode, struct file *filp)
{
return single_open(filp, timer_list_show, NULL);
}
static struct file_operations timer_list_fops = {
.open = timer_list_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};
static int __init init_timer_list_procfs(void)
{
struct proc_dir_entry *pe;
pe = create_proc_entry("timer_list", 0644, NULL);
if (!pe)
return -ENOMEM;
pe->proc_fops = &timer_list_fops;
return 0;
}
__initcall(init_timer_list_procfs);

421
kernel/time/timer_stats.c Normal file
View File

@@ -0,0 +1,421 @@
/*
* kernel/time/timer_stats.c
*
* Collect timer usage statistics.
*
* Copyright(C) 2006, Red Hat, Inc., Ingo Molnar
* Copyright(C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
*
* timer_stats is based on timer_top, a similar functionality which was part of
* Con Kolivas dyntick patch set. It was developed by Daniel Petrini at the
* Instituto Nokia de Tecnologia - INdT - Manaus. timer_top's design was based
* on dynamic allocation of the statistics entries and linear search based
* lookup combined with a global lock, rather than the static array, hash
* and per-CPU locking which is used by timer_stats. It was written for the
* pre hrtimer kernel code and therefore did not take hrtimers into account.
* Nevertheless it provided the base for the timer_stats implementation and
* was a helpful source of inspiration. Kudos to Daniel and the Nokia folks
* for this effort.
*
* timer_top.c is
* Copyright (C) 2005 Instituto Nokia de Tecnologia - INdT - Manaus
* Written by Daniel Petrini <d.pensator@gmail.com>
* timer_top.c was released under the GNU General Public License version 2
*
* We export the addresses and counting of timer functions being called,
* the pid and cmdline from the owner process if applicable.
*
* Start/stop data collection:
* # echo 1[0] >/proc/timer_stats
*
* Display the information collected so far:
* # cat /proc/timer_stats
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/proc_fs.h>
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
#include <linux/kallsyms.h>
#include <asm/uaccess.h>
/*
* This is our basic unit of interest: a timer expiry event identified
* by the timer, its start/expire functions and the PID of the task that
* started the timer. We count the number of times an event happens:
*/
struct entry {
/*
* Hash list:
*/
struct entry *next;
/*
* Hash keys:
*/
void *timer;
void *start_func;
void *expire_func;
pid_t pid;
/*
* Number of timeout events:
*/
unsigned long count;
/*
* We save the command-line string to preserve
* this information past task exit:
*/
char comm[TASK_COMM_LEN + 1];
} ____cacheline_aligned_in_smp;
/*
* Spinlock protecting the tables - not taken during lookup:
*/
static DEFINE_SPINLOCK(table_lock);
/*
* Per-CPU lookup locks for fast hash lookup:
*/
static DEFINE_PER_CPU(spinlock_t, lookup_lock);
/*
* Mutex to serialize state changes with show-stats activities:
*/
static DEFINE_MUTEX(show_mutex);
/*
* Collection status, active/inactive:
*/
static int __read_mostly active;
/*
* Beginning/end timestamps of measurement:
*/
static ktime_t time_start, time_stop;
/*
* tstat entry structs only get allocated while collection is
* active and never freed during that time - this simplifies
* things quite a bit.
*
* They get freed when a new collection period is started.
*/
#define MAX_ENTRIES_BITS 10
#define MAX_ENTRIES (1UL << MAX_ENTRIES_BITS)
static unsigned long nr_entries;
static struct entry entries[MAX_ENTRIES];
static atomic_t overflow_count;
/*
* The entries are in a hash-table, for fast lookup:
*/
#define TSTAT_HASH_BITS (MAX_ENTRIES_BITS - 1)
#define TSTAT_HASH_SIZE (1UL << TSTAT_HASH_BITS)
#define TSTAT_HASH_MASK (TSTAT_HASH_SIZE - 1)
#define __tstat_hashfn(entry) \
(((unsigned long)(entry)->timer ^ \
(unsigned long)(entry)->start_func ^ \
(unsigned long)(entry)->expire_func ^ \
(unsigned long)(entry)->pid ) & TSTAT_HASH_MASK)
#define tstat_hashentry(entry) (tstat_hash_table + __tstat_hashfn(entry))
static struct entry *tstat_hash_table[TSTAT_HASH_SIZE] __read_mostly;
static void reset_entries(void)
{
nr_entries = 0;
memset(entries, 0, sizeof(entries));
memset(tstat_hash_table, 0, sizeof(tstat_hash_table));
atomic_set(&overflow_count, 0);
}
static struct entry *alloc_entry(void)
{
if (nr_entries >= MAX_ENTRIES)
return NULL;
return entries + nr_entries++;
}
static int match_entries(struct entry *entry1, struct entry *entry2)
{
return entry1->timer == entry2->timer &&
entry1->start_func == entry2->start_func &&
entry1->expire_func == entry2->expire_func &&
entry1->pid == entry2->pid;
}
/*
* Look up whether an entry matching this item is present
* in the hash already. Must be called with irqs off and the
* lookup lock held:
*/
static struct entry *tstat_lookup(struct entry *entry, char *comm)
{
struct entry **head, *curr, *prev;
head = tstat_hashentry(entry);
curr = *head;
/*
* The fastpath is when the entry is already hashed,
* we do this with the lookup lock held, but with the
* table lock not held:
*/
while (curr) {
if (match_entries(curr, entry))
return curr;
curr = curr->next;
}
/*
* Slowpath: allocate, set up and link a new hash entry:
*/
prev = NULL;
curr = *head;
spin_lock(&table_lock);
/*
* Make sure we have not raced with another CPU:
*/
while (curr) {
if (match_entries(curr, entry))
goto out_unlock;
prev = curr;
curr = curr->next;
}
curr = alloc_entry();
if (curr) {
*curr = *entry;
curr->count = 0;
curr->next = NULL;
memcpy(curr->comm, comm, TASK_COMM_LEN);
smp_mb(); /* Ensure that curr is initialized before insert */
if (prev)
prev->next = curr;
else
*head = curr;
}
out_unlock:
spin_unlock(&table_lock);
return curr;
}
/**
* timer_stats_update_stats - Update the statistics for a timer.
* @timer: pointer to either a timer_list or a hrtimer
* @pid: the pid of the task which set up the timer
* @startf: pointer to the function which did the timer setup
* @timerf: pointer to the timer callback function of the timer
* @comm: name of the process which set up the timer
*
* When the timer is already registered, then the event counter is
* incremented. Otherwise the timer is registered in a free slot.
*/
void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
void *timerf, char * comm)
{
/*
* It doesnt matter which lock we take:
*/
spinlock_t *lock;
struct entry *entry, input;
unsigned long flags;
if (likely(!active))
return;
lock = &per_cpu(lookup_lock, raw_smp_processor_id());
input.timer = timer;
input.start_func = startf;
input.expire_func = timerf;
input.pid = pid;
spin_lock_irqsave(lock, flags);
if (!active)
goto out_unlock;
entry = tstat_lookup(&input, comm);
if (likely(entry))
entry->count++;
else
atomic_inc(&overflow_count);
out_unlock:
spin_unlock_irqrestore(lock, flags);
}
static void print_name_offset(struct seq_file *m, unsigned long addr)
{
char namebuf[KSYM_NAME_LEN+1];
unsigned long size, offset;
const char *sym_name;
char *modname;
sym_name = kallsyms_lookup(addr, &size, &offset, &modname, namebuf);
if (sym_name)
seq_printf(m, "%s", sym_name);
else
seq_printf(m, "<%p>", (void *)addr);
}
static int tstats_show(struct seq_file *m, void *v)
{
struct timespec period;
struct entry *entry;
unsigned long ms;
long events = 0;
ktime_t time;
int i;
mutex_lock(&show_mutex);
/*
* If still active then calculate up to now:
*/
if (active)
time_stop = ktime_get();
time = ktime_sub(time_stop, time_start);
period = ktime_to_timespec(time);
ms = period.tv_nsec / 1000000;
seq_puts(m, "Timer Stats Version: v0.1\n");
seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms);
if (atomic_read(&overflow_count))
seq_printf(m, "Overflow: %d entries\n",
atomic_read(&overflow_count));
for (i = 0; i < nr_entries; i++) {
entry = entries + i;
seq_printf(m, "%4lu, %5d %-16s ",
entry->count, entry->pid, entry->comm);
print_name_offset(m, (unsigned long)entry->start_func);
seq_puts(m, " (");
print_name_offset(m, (unsigned long)entry->expire_func);
seq_puts(m, ")\n");
events += entry->count;
}
ms += period.tv_sec * 1000;
if (!ms)
ms = 1;
if (events && period.tv_sec)
seq_printf(m, "%ld total events, %ld.%ld events/sec\n", events,
events / period.tv_sec, events * 1000 / ms);
else
seq_printf(m, "%ld total events\n", events);
mutex_unlock(&show_mutex);
return 0;
}
/*
* After a state change, make sure all concurrent lookup/update
* activities have stopped:
*/
static void sync_access(void)
{
unsigned long flags;
int cpu;
for_each_online_cpu(cpu) {
spin_lock_irqsave(&per_cpu(lookup_lock, cpu), flags);
/* nothing */
spin_unlock_irqrestore(&per_cpu(lookup_lock, cpu), flags);
}
}
static ssize_t tstats_write(struct file *file, const char __user *buf,
size_t count, loff_t *offs)
{
char ctl[2];
if (count != 2 || *offs)
return -EINVAL;
if (copy_from_user(ctl, buf, count))
return -EFAULT;
mutex_lock(&show_mutex);
switch (ctl[0]) {
case '0':
if (active) {
active = 0;
time_stop = ktime_get();
sync_access();
}
break;
case '1':
if (!active) {
reset_entries();
time_start = ktime_get();
smp_mb();
active = 1;
}
break;
default:
count = -EINVAL;
}
mutex_unlock(&show_mutex);
return count;
}
static int tstats_open(struct inode *inode, struct file *filp)
{
return single_open(filp, tstats_show, NULL);
}
static struct file_operations tstats_fops = {
.open = tstats_open,
.read = seq_read,
.write = tstats_write,
.llseek = seq_lseek,
.release = seq_release,
};
void __init init_timer_stats(void)
{
int cpu;
for_each_possible_cpu(cpu)
spin_lock_init(&per_cpu(lookup_lock, cpu));
}
static int __init init_tstats_procfs(void)
{
struct proc_dir_entry *pe;
pe = create_proc_entry("timer_stats", 0644, NULL);
if (!pe)
return -ENOMEM;
pe->proc_fops = &tstats_fops;
return 0;
}
__initcall(init_tstats_procfs);