Creation of Cybook 2416 (actually Gen4) repository
This commit is contained in:
56
kernel/Kconfig.hz
Normal file
56
kernel/Kconfig.hz
Normal file
@@ -0,0 +1,56 @@
|
||||
#
|
||||
# Timer Interrupt Frequency Configuration
|
||||
#
|
||||
|
||||
choice
|
||||
prompt "Timer frequency"
|
||||
default HZ_250
|
||||
help
|
||||
Allows the configuration of the timer frequency. It is customary
|
||||
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
|
||||
beneficial for servers and NUMA systems that do not need to have
|
||||
a fast response for user interaction and that may experience bus
|
||||
contention and cacheline bounces as a result of timer interrupts.
|
||||
Note that the timer interrupt occurs on each processor in an SMP
|
||||
environment leading to NR_CPUS * HZ number of timer interrupts
|
||||
per second.
|
||||
|
||||
|
||||
config HZ_100
|
||||
bool "100 HZ"
|
||||
help
|
||||
100 Hz is a typical choice for servers, SMP and NUMA systems
|
||||
with lots of processors that may show reduced performance if
|
||||
too many timer interrupts are occurring.
|
||||
|
||||
config HZ_250
|
||||
bool "250 HZ"
|
||||
help
|
||||
250 Hz is a good compromise choice allowing server performance
|
||||
while also showing good interactive responsiveness even
|
||||
on SMP and NUMA systems. If you are going to be using NTSC video
|
||||
or multimedia, selected 300Hz instead.
|
||||
|
||||
config HZ_300
|
||||
bool "300 HZ"
|
||||
help
|
||||
300 Hz is a good compromise choice allowing server performance
|
||||
while also showing good interactive responsiveness even
|
||||
on SMP and NUMA systems and exactly dividing by both PAL and
|
||||
NTSC frame rates for video and multimedia work.
|
||||
|
||||
config HZ_1000
|
||||
bool "1000 HZ"
|
||||
help
|
||||
1000 Hz is the preferred choice for desktop systems and other
|
||||
systems requiring fast interactive responses to events.
|
||||
|
||||
endchoice
|
||||
|
||||
config HZ
|
||||
int
|
||||
default 100 if HZ_100
|
||||
default 250 if HZ_250
|
||||
default 300 if HZ_300
|
||||
default 1000 if HZ_1000
|
||||
|
||||
65
kernel/Kconfig.preempt
Normal file
65
kernel/Kconfig.preempt
Normal file
@@ -0,0 +1,65 @@
|
||||
|
||||
choice
|
||||
prompt "Preemption Model"
|
||||
default PREEMPT_NONE
|
||||
|
||||
config PREEMPT_NONE
|
||||
bool "No Forced Preemption (Server)"
|
||||
help
|
||||
This is the traditional Linux preemption model, geared towards
|
||||
throughput. It will still provide good latencies most of the
|
||||
time, but there are no guarantees and occasional longer delays
|
||||
are possible.
|
||||
|
||||
Select this option if you are building a kernel for a server or
|
||||
scientific/computation system, or if you want to maximize the
|
||||
raw processing power of the kernel, irrespective of scheduling
|
||||
latencies.
|
||||
|
||||
config PREEMPT_VOLUNTARY
|
||||
bool "Voluntary Kernel Preemption (Desktop)"
|
||||
help
|
||||
This option reduces the latency of the kernel by adding more
|
||||
"explicit preemption points" to the kernel code. These new
|
||||
preemption points have been selected to reduce the maximum
|
||||
latency of rescheduling, providing faster application reactions,
|
||||
at the cost of slighly lower throughput.
|
||||
|
||||
This allows reaction to interactive events by allowing a
|
||||
low priority process to voluntarily preempt itself even if it
|
||||
is in kernel mode executing a system call. This allows
|
||||
applications to run more 'smoothly' even when the system is
|
||||
under load.
|
||||
|
||||
Select this if you are building a kernel for a desktop system.
|
||||
|
||||
config PREEMPT
|
||||
bool "Preemptible Kernel (Low-Latency Desktop)"
|
||||
help
|
||||
This option reduces the latency of the kernel by making
|
||||
all kernel code (that is not executing in a critical section)
|
||||
preemptible. This allows reaction to interactive events by
|
||||
permitting a low priority process to be preempted involuntarily
|
||||
even if it is in kernel mode executing a system call and would
|
||||
otherwise not be about to reach a natural preemption point.
|
||||
This allows applications to run more 'smoothly' even when the
|
||||
system is under load, at the cost of slighly lower throughput
|
||||
and a slight runtime overhead to kernel code.
|
||||
|
||||
Select this if you are building a kernel for a desktop or
|
||||
embedded system with latency requirements in the milliseconds
|
||||
range.
|
||||
|
||||
endchoice
|
||||
|
||||
config PREEMPT_BKL
|
||||
bool "Preempt The Big Kernel Lock"
|
||||
depends on SMP || PREEMPT
|
||||
default y
|
||||
help
|
||||
This option reduces the latency of the kernel by making the
|
||||
big kernel lock preemptible.
|
||||
|
||||
Say Y here if you are building a kernel for a desktop system.
|
||||
Say N if you are unsure.
|
||||
|
||||
76
kernel/Makefile
Normal file
76
kernel/Makefile
Normal file
@@ -0,0 +1,76 @@
|
||||
#
|
||||
# Makefile for the linux kernel.
|
||||
#
|
||||
|
||||
obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
|
||||
exit.o itimer.o time.o softirq.o resource.o \
|
||||
sysctl.o capability.o ptrace.o timer.o user.o \
|
||||
signal.o sys.o kmod.o workqueue.o pid.o \
|
||||
rcupdate.o extable.o params.o posix-timers.o \
|
||||
kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
|
||||
hrtimer.o rwsem.o latency.o nsproxy.o srcu.o
|
||||
|
||||
obj-$(CONFIG_STACKTRACE) += stacktrace.o
|
||||
obj-y += time/
|
||||
obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
|
||||
obj-$(CONFIG_LOCKDEP) += lockdep.o
|
||||
ifeq ($(CONFIG_PROC_FS),y)
|
||||
obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
|
||||
endif
|
||||
obj-$(CONFIG_FUTEX) += futex.o
|
||||
ifeq ($(CONFIG_COMPAT),y)
|
||||
obj-$(CONFIG_FUTEX) += futex_compat.o
|
||||
endif
|
||||
obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
|
||||
obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
|
||||
obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
|
||||
obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
|
||||
obj-$(CONFIG_SMP) += cpu.o spinlock.o
|
||||
obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
|
||||
obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
|
||||
obj-$(CONFIG_UID16) += uid16.o
|
||||
obj-$(CONFIG_MODULES) += module.o
|
||||
obj-$(CONFIG_KALLSYMS) += kallsyms.o
|
||||
obj-$(CONFIG_PM) += power/
|
||||
obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
|
||||
obj-$(CONFIG_KEXEC) += kexec.o
|
||||
obj-$(CONFIG_COMPAT) += compat.o
|
||||
obj-$(CONFIG_CPUSETS) += cpuset.o
|
||||
obj-$(CONFIG_IKCONFIG) += configs.o
|
||||
obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
|
||||
obj-$(CONFIG_AUDIT) += audit.o auditfilter.o
|
||||
obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
|
||||
obj-$(CONFIG_KPROBES) += kprobes.o
|
||||
obj-$(CONFIG_SYSFS) += ksysfs.o
|
||||
obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
|
||||
obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
|
||||
obj-$(CONFIG_SECCOMP) += seccomp.o
|
||||
obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
|
||||
obj-$(CONFIG_RELAY) += relay.o
|
||||
obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
|
||||
obj-$(CONFIG_UTS_NS) += utsname.o
|
||||
obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
|
||||
obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
|
||||
|
||||
ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
|
||||
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
|
||||
# needed for x86 only. Why this used to be enabled for all architectures is beyond
|
||||
# me. I suspect most platforms don't need this, but until we know that for sure
|
||||
# I turn this off for IA-64 only. Andreas Schwab says it's also needed on m68k
|
||||
# to get a correct value for the wait-channel (WCHAN in ps). --davidm
|
||||
CFLAGS_sched.o := $(PROFILING) -fno-omit-frame-pointer
|
||||
endif
|
||||
|
||||
$(obj)/configs.o: $(obj)/config_data.h
|
||||
|
||||
# config_data.h contains the same information as ikconfig.h but gzipped.
|
||||
# Info from config_data can be extracted from /proc/config*
|
||||
targets += config_data.gz
|
||||
$(obj)/config_data.gz: .config FORCE
|
||||
$(call if_changed,gzip)
|
||||
|
||||
quiet_cmd_ikconfiggz = IKCFG $@
|
||||
cmd_ikconfiggz = (echo "static const char kernel_config_data[] = MAGIC_START"; cat $< | scripts/bin2c; echo "MAGIC_END;") > $@
|
||||
targets += config_data.h
|
||||
$(obj)/config_data.h: $(obj)/config_data.gz FORCE
|
||||
$(call if_changed,ikconfiggz)
|
||||
599
kernel/acct.c
Normal file
599
kernel/acct.c
Normal file
@@ -0,0 +1,599 @@
|
||||
/*
|
||||
* linux/kernel/acct.c
|
||||
*
|
||||
* BSD Process Accounting for Linux
|
||||
*
|
||||
* Author: Marco van Wieringen <mvw@planets.elm.net>
|
||||
*
|
||||
* Some code based on ideas and code from:
|
||||
* Thomas K. Dyas <tdyas@eden.rutgers.edu>
|
||||
*
|
||||
* This file implements BSD-style process accounting. Whenever any
|
||||
* process exits, an accounting record of type "struct acct" is
|
||||
* written to the file specified with the acct() system call. It is
|
||||
* up to user-level programs to do useful things with the accounting
|
||||
* log. The kernel just provides the raw accounting information.
|
||||
*
|
||||
* (C) Copyright 1995 - 1997 Marco van Wieringen - ELM Consultancy B.V.
|
||||
*
|
||||
* Plugged two leaks. 1) It didn't return acct_file into the free_filps if
|
||||
* the file happened to be read-only. 2) If the accounting was suspended
|
||||
* due to the lack of space it happily allowed to reopen it and completely
|
||||
* lost the old acct_file. 3/10/98, Al Viro.
|
||||
*
|
||||
* Now we silently close acct_file on attempt to reopen. Cleaned sys_acct().
|
||||
* XTerms and EMACS are manifestations of pure evil. 21/10/98, AV.
|
||||
*
|
||||
* Fixed a nasty interaction with with sys_umount(). If the accointing
|
||||
* was suspeneded we failed to stop it on umount(). Messy.
|
||||
* Another one: remount to readonly didn't stop accounting.
|
||||
* Question: what should we do if we have CAP_SYS_ADMIN but not
|
||||
* CAP_SYS_PACCT? Current code does the following: umount returns -EBUSY
|
||||
* unless we are messing with the root. In that case we are getting a
|
||||
* real mess with do_remount_sb(). 9/11/98, AV.
|
||||
*
|
||||
* Fixed a bunch of races (and pair of leaks). Probably not the best way,
|
||||
* but this one obviously doesn't introduce deadlocks. Later. BTW, found
|
||||
* one race (and leak) in BSD implementation.
|
||||
* OK, that's better. ANOTHER race and leak in BSD variant. There always
|
||||
* is one more bug... 10/11/98, AV.
|
||||
*
|
||||
* Oh, fsck... Oopsable SMP race in do_process_acct() - we must hold
|
||||
* ->mmap_sem to walk the vma list of current->mm. Nasty, since it leaks
|
||||
* a struct file opened for write. Fixed. 2/6/2000, AV.
|
||||
*/
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/acct.h>
|
||||
#include <linux/capability.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/tty.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/vfs.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/times.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/mount.h>
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/div64.h>
|
||||
#include <linux/blkdev.h> /* sector_div */
|
||||
|
||||
/*
|
||||
* These constants control the amount of freespace that suspend and
|
||||
* resume the process accounting system, and the time delay between
|
||||
* each check.
|
||||
* Turned into sysctl-controllable parameters. AV, 12/11/98
|
||||
*/
|
||||
|
||||
int acct_parm[3] = {4, 2, 30};
|
||||
#define RESUME (acct_parm[0]) /* >foo% free space - resume */
|
||||
#define SUSPEND (acct_parm[1]) /* <foo% free space - suspend */
|
||||
#define ACCT_TIMEOUT (acct_parm[2]) /* foo second timeout between checks */
|
||||
|
||||
/*
|
||||
* External references and all of the globals.
|
||||
*/
|
||||
static void do_acct_process(struct file *);
|
||||
|
||||
/*
|
||||
* This structure is used so that all the data protected by lock
|
||||
* can be placed in the same cache line as the lock. This primes
|
||||
* the cache line to have the data after getting the lock.
|
||||
*/
|
||||
struct acct_glbs {
|
||||
spinlock_t lock;
|
||||
volatile int active;
|
||||
volatile int needcheck;
|
||||
struct file *file;
|
||||
struct timer_list timer;
|
||||
};
|
||||
|
||||
static struct acct_glbs acct_globals __cacheline_aligned =
|
||||
{__SPIN_LOCK_UNLOCKED(acct_globals.lock)};
|
||||
|
||||
/*
|
||||
* Called whenever the timer says to check the free space.
|
||||
*/
|
||||
static void acct_timeout(unsigned long unused)
|
||||
{
|
||||
acct_globals.needcheck = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check the amount of free space and suspend/resume accordingly.
|
||||
*/
|
||||
static int check_free_space(struct file *file)
|
||||
{
|
||||
struct kstatfs sbuf;
|
||||
int res;
|
||||
int act;
|
||||
sector_t resume;
|
||||
sector_t suspend;
|
||||
|
||||
spin_lock(&acct_globals.lock);
|
||||
res = acct_globals.active;
|
||||
if (!file || !acct_globals.needcheck)
|
||||
goto out;
|
||||
spin_unlock(&acct_globals.lock);
|
||||
|
||||
/* May block */
|
||||
if (vfs_statfs(file->f_path.dentry, &sbuf))
|
||||
return res;
|
||||
suspend = sbuf.f_blocks * SUSPEND;
|
||||
resume = sbuf.f_blocks * RESUME;
|
||||
|
||||
sector_div(suspend, 100);
|
||||
sector_div(resume, 100);
|
||||
|
||||
if (sbuf.f_bavail <= suspend)
|
||||
act = -1;
|
||||
else if (sbuf.f_bavail >= resume)
|
||||
act = 1;
|
||||
else
|
||||
act = 0;
|
||||
|
||||
/*
|
||||
* If some joker switched acct_globals.file under us we'ld better be
|
||||
* silent and _not_ touch anything.
|
||||
*/
|
||||
spin_lock(&acct_globals.lock);
|
||||
if (file != acct_globals.file) {
|
||||
if (act)
|
||||
res = act>0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (acct_globals.active) {
|
||||
if (act < 0) {
|
||||
acct_globals.active = 0;
|
||||
printk(KERN_INFO "Process accounting paused\n");
|
||||
}
|
||||
} else {
|
||||
if (act > 0) {
|
||||
acct_globals.active = 1;
|
||||
printk(KERN_INFO "Process accounting resumed\n");
|
||||
}
|
||||
}
|
||||
|
||||
del_timer(&acct_globals.timer);
|
||||
acct_globals.needcheck = 0;
|
||||
acct_globals.timer.expires = jiffies + ACCT_TIMEOUT*HZ;
|
||||
add_timer(&acct_globals.timer);
|
||||
res = acct_globals.active;
|
||||
out:
|
||||
spin_unlock(&acct_globals.lock);
|
||||
return res;
|
||||
}
|
||||
|
||||
/*
|
||||
* Close the old accounting file (if currently open) and then replace
|
||||
* it with file (if non-NULL).
|
||||
*
|
||||
* NOTE: acct_globals.lock MUST be held on entry and exit.
|
||||
*/
|
||||
static void acct_file_reopen(struct file *file)
|
||||
{
|
||||
struct file *old_acct = NULL;
|
||||
|
||||
if (acct_globals.file) {
|
||||
old_acct = acct_globals.file;
|
||||
del_timer(&acct_globals.timer);
|
||||
acct_globals.active = 0;
|
||||
acct_globals.needcheck = 0;
|
||||
acct_globals.file = NULL;
|
||||
}
|
||||
if (file) {
|
||||
acct_globals.file = file;
|
||||
acct_globals.needcheck = 0;
|
||||
acct_globals.active = 1;
|
||||
/* It's been deleted if it was used before so this is safe */
|
||||
init_timer(&acct_globals.timer);
|
||||
acct_globals.timer.function = acct_timeout;
|
||||
acct_globals.timer.expires = jiffies + ACCT_TIMEOUT*HZ;
|
||||
add_timer(&acct_globals.timer);
|
||||
}
|
||||
if (old_acct) {
|
||||
mnt_unpin(old_acct->f_path.mnt);
|
||||
spin_unlock(&acct_globals.lock);
|
||||
do_acct_process(old_acct);
|
||||
filp_close(old_acct, NULL);
|
||||
spin_lock(&acct_globals.lock);
|
||||
}
|
||||
}
|
||||
|
||||
static int acct_on(char *name)
|
||||
{
|
||||
struct file *file;
|
||||
int error;
|
||||
|
||||
/* Difference from BSD - they don't do O_APPEND */
|
||||
file = filp_open(name, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
|
||||
if (IS_ERR(file))
|
||||
return PTR_ERR(file);
|
||||
|
||||
if (!S_ISREG(file->f_path.dentry->d_inode->i_mode)) {
|
||||
filp_close(file, NULL);
|
||||
return -EACCES;
|
||||
}
|
||||
|
||||
if (!file->f_op->write) {
|
||||
filp_close(file, NULL);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
error = security_acct(file);
|
||||
if (error) {
|
||||
filp_close(file, NULL);
|
||||
return error;
|
||||
}
|
||||
|
||||
spin_lock(&acct_globals.lock);
|
||||
mnt_pin(file->f_path.mnt);
|
||||
acct_file_reopen(file);
|
||||
spin_unlock(&acct_globals.lock);
|
||||
|
||||
mntput(file->f_path.mnt); /* it's pinned, now give up active reference */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* sys_acct - enable/disable process accounting
|
||||
* @name: file name for accounting records or NULL to shutdown accounting
|
||||
*
|
||||
* Returns 0 for success or negative errno values for failure.
|
||||
*
|
||||
* sys_acct() is the only system call needed to implement process
|
||||
* accounting. It takes the name of the file where accounting records
|
||||
* should be written. If the filename is NULL, accounting will be
|
||||
* shutdown.
|
||||
*/
|
||||
asmlinkage long sys_acct(const char __user *name)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (!capable(CAP_SYS_PACCT))
|
||||
return -EPERM;
|
||||
|
||||
if (name) {
|
||||
char *tmp = getname(name);
|
||||
if (IS_ERR(tmp))
|
||||
return (PTR_ERR(tmp));
|
||||
error = acct_on(tmp);
|
||||
putname(tmp);
|
||||
} else {
|
||||
error = security_acct(NULL);
|
||||
if (!error) {
|
||||
spin_lock(&acct_globals.lock);
|
||||
acct_file_reopen(NULL);
|
||||
spin_unlock(&acct_globals.lock);
|
||||
}
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
/**
|
||||
* acct_auto_close - turn off a filesystem's accounting if it is on
|
||||
* @m: vfsmount being shut down
|
||||
*
|
||||
* If the accounting is turned on for a file in the subtree pointed to
|
||||
* to by m, turn accounting off. Done when m is about to die.
|
||||
*/
|
||||
void acct_auto_close_mnt(struct vfsmount *m)
|
||||
{
|
||||
spin_lock(&acct_globals.lock);
|
||||
if (acct_globals.file && acct_globals.file->f_path.mnt == m)
|
||||
acct_file_reopen(NULL);
|
||||
spin_unlock(&acct_globals.lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* acct_auto_close - turn off a filesystem's accounting if it is on
|
||||
* @sb: super block for the filesystem
|
||||
*
|
||||
* If the accounting is turned on for a file in the filesystem pointed
|
||||
* to by sb, turn accounting off.
|
||||
*/
|
||||
void acct_auto_close(struct super_block *sb)
|
||||
{
|
||||
spin_lock(&acct_globals.lock);
|
||||
if (acct_globals.file &&
|
||||
acct_globals.file->f_path.mnt->mnt_sb == sb) {
|
||||
acct_file_reopen(NULL);
|
||||
}
|
||||
spin_unlock(&acct_globals.lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* encode an unsigned long into a comp_t
|
||||
*
|
||||
* This routine has been adopted from the encode_comp_t() function in
|
||||
* the kern_acct.c file of the FreeBSD operating system. The encoding
|
||||
* is a 13-bit fraction with a 3-bit (base 8) exponent.
|
||||
*/
|
||||
|
||||
#define MANTSIZE 13 /* 13 bit mantissa. */
|
||||
#define EXPSIZE 3 /* Base 8 (3 bit) exponent. */
|
||||
#define MAXFRACT ((1 << MANTSIZE) - 1) /* Maximum fractional value. */
|
||||
|
||||
static comp_t encode_comp_t(unsigned long value)
|
||||
{
|
||||
int exp, rnd;
|
||||
|
||||
exp = rnd = 0;
|
||||
while (value > MAXFRACT) {
|
||||
rnd = value & (1 << (EXPSIZE - 1)); /* Round up? */
|
||||
value >>= EXPSIZE; /* Base 8 exponent == 3 bit shift. */
|
||||
exp++;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we need to round up, do it (and handle overflow correctly).
|
||||
*/
|
||||
if (rnd && (++value > MAXFRACT)) {
|
||||
value >>= EXPSIZE;
|
||||
exp++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Clean it up and polish it off.
|
||||
*/
|
||||
exp <<= MANTSIZE; /* Shift the exponent into place */
|
||||
exp += value; /* and add on the mantissa. */
|
||||
return exp;
|
||||
}
|
||||
|
||||
#if ACCT_VERSION==1 || ACCT_VERSION==2
|
||||
/*
|
||||
* encode an u64 into a comp2_t (24 bits)
|
||||
*
|
||||
* Format: 5 bit base 2 exponent, 20 bits mantissa.
|
||||
* The leading bit of the mantissa is not stored, but implied for
|
||||
* non-zero exponents.
|
||||
* Largest encodable value is 50 bits.
|
||||
*/
|
||||
|
||||
#define MANTSIZE2 20 /* 20 bit mantissa. */
|
||||
#define EXPSIZE2 5 /* 5 bit base 2 exponent. */
|
||||
#define MAXFRACT2 ((1ul << MANTSIZE2) - 1) /* Maximum fractional value. */
|
||||
#define MAXEXP2 ((1 <<EXPSIZE2) - 1) /* Maximum exponent. */
|
||||
|
||||
static comp2_t encode_comp2_t(u64 value)
|
||||
{
|
||||
int exp, rnd;
|
||||
|
||||
exp = (value > (MAXFRACT2>>1));
|
||||
rnd = 0;
|
||||
while (value > MAXFRACT2) {
|
||||
rnd = value & 1;
|
||||
value >>= 1;
|
||||
exp++;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we need to round up, do it (and handle overflow correctly).
|
||||
*/
|
||||
if (rnd && (++value > MAXFRACT2)) {
|
||||
value >>= 1;
|
||||
exp++;
|
||||
}
|
||||
|
||||
if (exp > MAXEXP2) {
|
||||
/* Overflow. Return largest representable number instead. */
|
||||
return (1ul << (MANTSIZE2+EXPSIZE2-1)) - 1;
|
||||
} else {
|
||||
return (value & (MAXFRACT2>>1)) | (exp << (MANTSIZE2-1));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if ACCT_VERSION==3
|
||||
/*
|
||||
* encode an u64 into a 32 bit IEEE float
|
||||
*/
|
||||
static u32 encode_float(u64 value)
|
||||
{
|
||||
unsigned exp = 190;
|
||||
unsigned u;
|
||||
|
||||
if (value==0) return 0;
|
||||
while ((s64)value > 0){
|
||||
value <<= 1;
|
||||
exp--;
|
||||
}
|
||||
u = (u32)(value >> 40) & 0x7fffffu;
|
||||
return u | (exp << 23);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Write an accounting entry for an exiting process
|
||||
*
|
||||
* The acct_process() call is the workhorse of the process
|
||||
* accounting system. The struct acct is built here and then written
|
||||
* into the accounting file. This function should only be called from
|
||||
* do_exit().
|
||||
*/
|
||||
|
||||
/*
|
||||
* do_acct_process does all actual work. Caller holds the reference to file.
|
||||
*/
|
||||
static void do_acct_process(struct file *file)
|
||||
{
|
||||
struct pacct_struct *pacct = ¤t->signal->pacct;
|
||||
acct_t ac;
|
||||
mm_segment_t fs;
|
||||
unsigned long flim;
|
||||
u64 elapsed;
|
||||
u64 run_time;
|
||||
struct timespec uptime;
|
||||
struct tty_struct *tty;
|
||||
|
||||
/*
|
||||
* First check to see if there is enough free_space to continue
|
||||
* the process accounting system.
|
||||
*/
|
||||
if (!check_free_space(file))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Fill the accounting struct with the needed info as recorded
|
||||
* by the different kernel functions.
|
||||
*/
|
||||
memset((caddr_t)&ac, 0, sizeof(acct_t));
|
||||
|
||||
ac.ac_version = ACCT_VERSION | ACCT_BYTEORDER;
|
||||
strlcpy(ac.ac_comm, current->comm, sizeof(ac.ac_comm));
|
||||
|
||||
/* calculate run_time in nsec*/
|
||||
do_posix_clock_monotonic_gettime(&uptime);
|
||||
run_time = (u64)uptime.tv_sec*NSEC_PER_SEC + uptime.tv_nsec;
|
||||
run_time -= (u64)current->group_leader->start_time.tv_sec * NSEC_PER_SEC
|
||||
+ current->group_leader->start_time.tv_nsec;
|
||||
/* convert nsec -> AHZ */
|
||||
elapsed = nsec_to_AHZ(run_time);
|
||||
#if ACCT_VERSION==3
|
||||
ac.ac_etime = encode_float(elapsed);
|
||||
#else
|
||||
ac.ac_etime = encode_comp_t(elapsed < (unsigned long) -1l ?
|
||||
(unsigned long) elapsed : (unsigned long) -1l);
|
||||
#endif
|
||||
#if ACCT_VERSION==1 || ACCT_VERSION==2
|
||||
{
|
||||
/* new enlarged etime field */
|
||||
comp2_t etime = encode_comp2_t(elapsed);
|
||||
ac.ac_etime_hi = etime >> 16;
|
||||
ac.ac_etime_lo = (u16) etime;
|
||||
}
|
||||
#endif
|
||||
do_div(elapsed, AHZ);
|
||||
ac.ac_btime = xtime.tv_sec - elapsed;
|
||||
/* we really need to bite the bullet and change layout */
|
||||
ac.ac_uid = current->uid;
|
||||
ac.ac_gid = current->gid;
|
||||
#if ACCT_VERSION==2
|
||||
ac.ac_ahz = AHZ;
|
||||
#endif
|
||||
#if ACCT_VERSION==1 || ACCT_VERSION==2
|
||||
/* backward-compatible 16 bit fields */
|
||||
ac.ac_uid16 = current->uid;
|
||||
ac.ac_gid16 = current->gid;
|
||||
#endif
|
||||
#if ACCT_VERSION==3
|
||||
ac.ac_pid = current->tgid;
|
||||
ac.ac_ppid = current->parent->tgid;
|
||||
#endif
|
||||
|
||||
spin_lock_irq(¤t->sighand->siglock);
|
||||
tty = current->signal->tty;
|
||||
ac.ac_tty = tty ? old_encode_dev(tty_devnum(tty)) : 0;
|
||||
ac.ac_utime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_utime)));
|
||||
ac.ac_stime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_stime)));
|
||||
ac.ac_flag = pacct->ac_flag;
|
||||
ac.ac_mem = encode_comp_t(pacct->ac_mem);
|
||||
ac.ac_minflt = encode_comp_t(pacct->ac_minflt);
|
||||
ac.ac_majflt = encode_comp_t(pacct->ac_majflt);
|
||||
ac.ac_exitcode = pacct->ac_exitcode;
|
||||
spin_unlock_irq(¤t->sighand->siglock);
|
||||
ac.ac_io = encode_comp_t(0 /* current->io_usage */); /* %% */
|
||||
ac.ac_rw = encode_comp_t(ac.ac_io / 1024);
|
||||
ac.ac_swaps = encode_comp_t(0);
|
||||
|
||||
/*
|
||||
* Kernel segment override to datasegment and write it
|
||||
* to the accounting file.
|
||||
*/
|
||||
fs = get_fs();
|
||||
set_fs(KERNEL_DS);
|
||||
/*
|
||||
* Accounting records are not subject to resource limits.
|
||||
*/
|
||||
flim = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
|
||||
current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
|
||||
file->f_op->write(file, (char *)&ac,
|
||||
sizeof(acct_t), &file->f_pos);
|
||||
current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim;
|
||||
set_fs(fs);
|
||||
}
|
||||
|
||||
/**
|
||||
* acct_init_pacct - initialize a new pacct_struct
|
||||
* @pacct: per-process accounting info struct to initialize
|
||||
*/
|
||||
void acct_init_pacct(struct pacct_struct *pacct)
|
||||
{
|
||||
memset(pacct, 0, sizeof(struct pacct_struct));
|
||||
pacct->ac_utime = pacct->ac_stime = cputime_zero;
|
||||
}
|
||||
|
||||
/**
|
||||
* acct_collect - collect accounting information into pacct_struct
|
||||
* @exitcode: task exit code
|
||||
* @group_dead: not 0, if this thread is the last one in the process.
|
||||
*/
|
||||
void acct_collect(long exitcode, int group_dead)
|
||||
{
|
||||
struct pacct_struct *pacct = ¤t->signal->pacct;
|
||||
unsigned long vsize = 0;
|
||||
|
||||
if (group_dead && current->mm) {
|
||||
struct vm_area_struct *vma;
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
vma = current->mm->mmap;
|
||||
while (vma) {
|
||||
vsize += vma->vm_end - vma->vm_start;
|
||||
vma = vma->vm_next;
|
||||
}
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
}
|
||||
|
||||
spin_lock_irq(¤t->sighand->siglock);
|
||||
if (group_dead)
|
||||
pacct->ac_mem = vsize / 1024;
|
||||
if (thread_group_leader(current)) {
|
||||
pacct->ac_exitcode = exitcode;
|
||||
if (current->flags & PF_FORKNOEXEC)
|
||||
pacct->ac_flag |= AFORK;
|
||||
}
|
||||
if (current->flags & PF_SUPERPRIV)
|
||||
pacct->ac_flag |= ASU;
|
||||
if (current->flags & PF_DUMPCORE)
|
||||
pacct->ac_flag |= ACORE;
|
||||
if (current->flags & PF_SIGNALED)
|
||||
pacct->ac_flag |= AXSIG;
|
||||
pacct->ac_utime = cputime_add(pacct->ac_utime, current->utime);
|
||||
pacct->ac_stime = cputime_add(pacct->ac_stime, current->stime);
|
||||
pacct->ac_minflt += current->min_flt;
|
||||
pacct->ac_majflt += current->maj_flt;
|
||||
spin_unlock_irq(¤t->sighand->siglock);
|
||||
}
|
||||
|
||||
/**
|
||||
* acct_process - now just a wrapper around do_acct_process
|
||||
* @exitcode: task exit code
|
||||
*
|
||||
* handles process accounting for an exiting task
|
||||
*/
|
||||
void acct_process(void)
|
||||
{
|
||||
struct file *file = NULL;
|
||||
|
||||
/*
|
||||
* accelerate the common fastpath:
|
||||
*/
|
||||
if (!acct_globals.file)
|
||||
return;
|
||||
|
||||
spin_lock(&acct_globals.lock);
|
||||
file = acct_globals.file;
|
||||
if (unlikely(!file)) {
|
||||
spin_unlock(&acct_globals.lock);
|
||||
return;
|
||||
}
|
||||
get_file(file);
|
||||
spin_unlock(&acct_globals.lock);
|
||||
|
||||
do_acct_process(file);
|
||||
fput(file);
|
||||
}
|
||||
1313
kernel/audit.c
Normal file
1313
kernel/audit.c
Normal file
File diff suppressed because it is too large
Load Diff
147
kernel/audit.h
Normal file
147
kernel/audit.h
Normal file
@@ -0,0 +1,147 @@
|
||||
/* audit -- definition of audit_context structure and supporting types
|
||||
*
|
||||
* Copyright 2003-2004 Red Hat, Inc.
|
||||
* Copyright 2005 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright 2005 IBM Corporation
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/audit.h>
|
||||
#include <linux/skbuff.h>
|
||||
|
||||
/* 0 = no checking
|
||||
1 = put_count checking
|
||||
2 = verbose put_count checking
|
||||
*/
|
||||
#define AUDIT_DEBUG 0
|
||||
|
||||
/* At task start time, the audit_state is set in the audit_context using
|
||||
a per-task filter. At syscall entry, the audit_state is augmented by
|
||||
the syscall filter. */
|
||||
enum audit_state {
|
||||
AUDIT_DISABLED, /* Do not create per-task audit_context.
|
||||
* No syscall-specific audit records can
|
||||
* be generated. */
|
||||
AUDIT_SETUP_CONTEXT, /* Create the per-task audit_context,
|
||||
* but don't necessarily fill it in at
|
||||
* syscall entry time (i.e., filter
|
||||
* instead). */
|
||||
AUDIT_BUILD_CONTEXT, /* Create the per-task audit_context,
|
||||
* and always fill it in at syscall
|
||||
* entry time. This makes a full
|
||||
* syscall record available if some
|
||||
* other part of the kernel decides it
|
||||
* should be recorded. */
|
||||
AUDIT_RECORD_CONTEXT /* Create the per-task audit_context,
|
||||
* always fill it in at syscall entry
|
||||
* time, and always write out the audit
|
||||
* record at syscall exit time. */
|
||||
};
|
||||
|
||||
/* Rule lists */
|
||||
struct audit_parent;
|
||||
|
||||
struct audit_watch {
|
||||
atomic_t count; /* reference count */
|
||||
char *path; /* insertion path */
|
||||
dev_t dev; /* associated superblock device */
|
||||
unsigned long ino; /* associated inode number */
|
||||
struct audit_parent *parent; /* associated parent */
|
||||
struct list_head wlist; /* entry in parent->watches list */
|
||||
struct list_head rules; /* associated rules */
|
||||
};
|
||||
|
||||
struct audit_field {
|
||||
u32 type;
|
||||
u32 val;
|
||||
u32 op;
|
||||
char *se_str;
|
||||
struct selinux_audit_rule *se_rule;
|
||||
};
|
||||
|
||||
struct audit_krule {
|
||||
int vers_ops;
|
||||
u32 flags;
|
||||
u32 listnr;
|
||||
u32 action;
|
||||
u32 mask[AUDIT_BITMASK_SIZE];
|
||||
u32 buflen; /* for data alloc on list rules */
|
||||
u32 field_count;
|
||||
char *filterkey; /* ties events to rules */
|
||||
struct audit_field *fields;
|
||||
struct audit_field *inode_f; /* quick access to an inode field */
|
||||
struct audit_watch *watch; /* associated watch */
|
||||
struct list_head rlist; /* entry in audit_watch.rules list */
|
||||
};
|
||||
|
||||
struct audit_entry {
|
||||
struct list_head list;
|
||||
struct rcu_head rcu;
|
||||
struct audit_krule rule;
|
||||
};
|
||||
|
||||
extern int audit_pid;
|
||||
|
||||
#define AUDIT_INODE_BUCKETS 32
|
||||
extern struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
|
||||
|
||||
static inline int audit_hash_ino(u32 ino)
|
||||
{
|
||||
return (ino & (AUDIT_INODE_BUCKETS-1));
|
||||
}
|
||||
|
||||
extern int audit_match_class(int class, unsigned syscall);
|
||||
extern int audit_comparator(const u32 left, const u32 op, const u32 right);
|
||||
extern int audit_compare_dname_path(const char *dname, const char *path,
|
||||
int *dirlen);
|
||||
extern struct sk_buff * audit_make_reply(int pid, int seq, int type,
|
||||
int done, int multi,
|
||||
void *payload, int size);
|
||||
extern void audit_send_reply(int pid, int seq, int type,
|
||||
int done, int multi,
|
||||
void *payload, int size);
|
||||
extern void audit_log_lost(const char *message);
|
||||
extern void audit_panic(const char *message);
|
||||
|
||||
struct audit_netlink_list {
|
||||
int pid;
|
||||
struct sk_buff_head q;
|
||||
};
|
||||
|
||||
int audit_send_list(void *);
|
||||
|
||||
struct inotify_watch;
|
||||
extern void audit_free_parent(struct inotify_watch *);
|
||||
extern void audit_handle_ievent(struct inotify_watch *, u32, u32, u32,
|
||||
const char *, struct inode *);
|
||||
extern int selinux_audit_rule_update(void);
|
||||
|
||||
#ifdef CONFIG_AUDITSYSCALL
|
||||
extern void __audit_signal_info(int sig, struct task_struct *t);
|
||||
static inline void audit_signal_info(int sig, struct task_struct *t)
|
||||
{
|
||||
if (unlikely(audit_pid && t->tgid == audit_pid))
|
||||
__audit_signal_info(sig, t);
|
||||
}
|
||||
extern enum audit_state audit_filter_inodes(struct task_struct *,
|
||||
struct audit_context *);
|
||||
extern void audit_set_auditable(struct audit_context *);
|
||||
#else
|
||||
#define audit_signal_info(s,t)
|
||||
#define audit_filter_inodes(t,c) AUDIT_DISABLED
|
||||
#define audit_set_auditable(c)
|
||||
#endif
|
||||
1745
kernel/auditfilter.c
Normal file
1745
kernel/auditfilter.c
Normal file
File diff suppressed because it is too large
Load Diff
1937
kernel/auditsc.c
Normal file
1937
kernel/auditsc.c
Normal file
File diff suppressed because it is too large
Load Diff
253
kernel/capability.c
Normal file
253
kernel/capability.c
Normal file
@@ -0,0 +1,253 @@
|
||||
/*
|
||||
* linux/kernel/capability.c
|
||||
*
|
||||
* Copyright (C) 1997 Andrew Main <zefram@fysh.org>
|
||||
*
|
||||
* Integrated into 2.1.97+, Andrew G. Morgan <morgan@transmeta.com>
|
||||
* 30 May 2002: Cleanup, Robert M. Love <rml@tech9.net>
|
||||
*/
|
||||
|
||||
#include <linux/capability.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */
|
||||
kernel_cap_t cap_bset = CAP_INIT_EFF_SET;
|
||||
|
||||
EXPORT_SYMBOL(securebits);
|
||||
EXPORT_SYMBOL(cap_bset);
|
||||
|
||||
/*
|
||||
* This lock protects task->cap_* for all tasks including current.
|
||||
* Locking rule: acquire this prior to tasklist_lock.
|
||||
*/
|
||||
static DEFINE_SPINLOCK(task_capability_lock);
|
||||
|
||||
/*
|
||||
* For sys_getproccap() and sys_setproccap(), any of the three
|
||||
* capability set pointers may be NULL -- indicating that that set is
|
||||
* uninteresting and/or not to be changed.
|
||||
*/
|
||||
|
||||
/**
|
||||
* sys_capget - get the capabilities of a given process.
|
||||
* @header: pointer to struct that contains capability version and
|
||||
* target pid data
|
||||
* @dataptr: pointer to struct that contains the effective, permitted,
|
||||
* and inheritable capabilities that are returned
|
||||
*
|
||||
* Returns 0 on success and < 0 on error.
|
||||
*/
|
||||
asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
|
||||
{
|
||||
int ret = 0;
|
||||
pid_t pid;
|
||||
__u32 version;
|
||||
struct task_struct *target;
|
||||
struct __user_cap_data_struct data;
|
||||
|
||||
if (get_user(version, &header->version))
|
||||
return -EFAULT;
|
||||
|
||||
if (version != _LINUX_CAPABILITY_VERSION) {
|
||||
if (put_user(_LINUX_CAPABILITY_VERSION, &header->version))
|
||||
return -EFAULT;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (get_user(pid, &header->pid))
|
||||
return -EFAULT;
|
||||
|
||||
if (pid < 0)
|
||||
return -EINVAL;
|
||||
|
||||
spin_lock(&task_capability_lock);
|
||||
read_lock(&tasklist_lock);
|
||||
|
||||
if (pid && pid != current->pid) {
|
||||
target = find_task_by_pid(pid);
|
||||
if (!target) {
|
||||
ret = -ESRCH;
|
||||
goto out;
|
||||
}
|
||||
} else
|
||||
target = current;
|
||||
|
||||
ret = security_capget(target, &data.effective, &data.inheritable, &data.permitted);
|
||||
|
||||
out:
|
||||
read_unlock(&tasklist_lock);
|
||||
spin_unlock(&task_capability_lock);
|
||||
|
||||
if (!ret && copy_to_user(dataptr, &data, sizeof data))
|
||||
return -EFAULT;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* cap_set_pg - set capabilities for all processes in a given process
|
||||
* group. We call this holding task_capability_lock and tasklist_lock.
|
||||
*/
|
||||
static inline int cap_set_pg(int pgrp_nr, kernel_cap_t *effective,
|
||||
kernel_cap_t *inheritable,
|
||||
kernel_cap_t *permitted)
|
||||
{
|
||||
struct task_struct *g, *target;
|
||||
int ret = -EPERM;
|
||||
int found = 0;
|
||||
struct pid *pgrp;
|
||||
|
||||
pgrp = find_pid(pgrp_nr);
|
||||
do_each_pid_task(pgrp, PIDTYPE_PGID, g) {
|
||||
target = g;
|
||||
while_each_thread(g, target) {
|
||||
if (!security_capset_check(target, effective,
|
||||
inheritable,
|
||||
permitted)) {
|
||||
security_capset_set(target, effective,
|
||||
inheritable,
|
||||
permitted);
|
||||
ret = 0;
|
||||
}
|
||||
found = 1;
|
||||
}
|
||||
} while_each_pid_task(pgrp, PIDTYPE_PGID, g);
|
||||
|
||||
if (!found)
|
||||
ret = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* cap_set_all - set capabilities for all processes other than init
|
||||
* and self. We call this holding task_capability_lock and tasklist_lock.
|
||||
*/
|
||||
static inline int cap_set_all(kernel_cap_t *effective,
|
||||
kernel_cap_t *inheritable,
|
||||
kernel_cap_t *permitted)
|
||||
{
|
||||
struct task_struct *g, *target;
|
||||
int ret = -EPERM;
|
||||
int found = 0;
|
||||
|
||||
do_each_thread(g, target) {
|
||||
if (target == current || is_init(target))
|
||||
continue;
|
||||
found = 1;
|
||||
if (security_capset_check(target, effective, inheritable,
|
||||
permitted))
|
||||
continue;
|
||||
ret = 0;
|
||||
security_capset_set(target, effective, inheritable, permitted);
|
||||
} while_each_thread(g, target);
|
||||
|
||||
if (!found)
|
||||
ret = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* sys_capset - set capabilities for a process or a group of processes
|
||||
* @header: pointer to struct that contains capability version and
|
||||
* target pid data
|
||||
* @data: pointer to struct that contains the effective, permitted,
|
||||
* and inheritable capabilities
|
||||
*
|
||||
* Set capabilities for a given process, all processes, or all
|
||||
* processes in a given process group.
|
||||
*
|
||||
* The restrictions on setting capabilities are specified as:
|
||||
*
|
||||
* [pid is for the 'target' task. 'current' is the calling task.]
|
||||
*
|
||||
* I: any raised capabilities must be a subset of the (old current) permitted
|
||||
* P: any raised capabilities must be a subset of the (old current) permitted
|
||||
* E: must be set to a subset of (new target) permitted
|
||||
*
|
||||
* Returns 0 on success and < 0 on error.
|
||||
*/
|
||||
asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
|
||||
{
|
||||
kernel_cap_t inheritable, permitted, effective;
|
||||
__u32 version;
|
||||
struct task_struct *target;
|
||||
int ret;
|
||||
pid_t pid;
|
||||
|
||||
if (get_user(version, &header->version))
|
||||
return -EFAULT;
|
||||
|
||||
if (version != _LINUX_CAPABILITY_VERSION) {
|
||||
if (put_user(_LINUX_CAPABILITY_VERSION, &header->version))
|
||||
return -EFAULT;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (get_user(pid, &header->pid))
|
||||
return -EFAULT;
|
||||
|
||||
if (pid && pid != current->pid && !capable(CAP_SETPCAP))
|
||||
return -EPERM;
|
||||
|
||||
if (copy_from_user(&effective, &data->effective, sizeof(effective)) ||
|
||||
copy_from_user(&inheritable, &data->inheritable, sizeof(inheritable)) ||
|
||||
copy_from_user(&permitted, &data->permitted, sizeof(permitted)))
|
||||
return -EFAULT;
|
||||
|
||||
spin_lock(&task_capability_lock);
|
||||
read_lock(&tasklist_lock);
|
||||
|
||||
if (pid > 0 && pid != current->pid) {
|
||||
target = find_task_by_pid(pid);
|
||||
if (!target) {
|
||||
ret = -ESRCH;
|
||||
goto out;
|
||||
}
|
||||
} else
|
||||
target = current;
|
||||
|
||||
ret = 0;
|
||||
|
||||
/* having verified that the proposed changes are legal,
|
||||
we now put them into effect. */
|
||||
if (pid < 0) {
|
||||
if (pid == -1) /* all procs other than current and init */
|
||||
ret = cap_set_all(&effective, &inheritable, &permitted);
|
||||
|
||||
else /* all procs in process group */
|
||||
ret = cap_set_pg(-pid, &effective, &inheritable,
|
||||
&permitted);
|
||||
} else {
|
||||
ret = security_capset_check(target, &effective, &inheritable,
|
||||
&permitted);
|
||||
if (!ret)
|
||||
security_capset_set(target, &effective, &inheritable,
|
||||
&permitted);
|
||||
}
|
||||
|
||||
out:
|
||||
read_unlock(&tasklist_lock);
|
||||
spin_unlock(&task_capability_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __capable(struct task_struct *t, int cap)
|
||||
{
|
||||
if (security_capable(t, cap) == 0) {
|
||||
t->flags |= PF_SUPERPRIV;
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(__capable);
|
||||
|
||||
int capable(int cap)
|
||||
{
|
||||
return __capable(current, cap);
|
||||
}
|
||||
EXPORT_SYMBOL(capable);
|
||||
1084
kernel/compat.c
Normal file
1084
kernel/compat.c
Normal file
File diff suppressed because it is too large
Load Diff
117
kernel/configs.c
Normal file
117
kernel/configs.c
Normal file
@@ -0,0 +1,117 @@
|
||||
/*
|
||||
* kernel/configs.c
|
||||
* Echo the kernel .config file used to build the kernel
|
||||
*
|
||||
* Copyright (C) 2002 Khalid Aziz <khalid_aziz@hp.com>
|
||||
* Copyright (C) 2002 Randy Dunlap <rdunlap@xenotime.net>
|
||||
* Copyright (C) 2002 Al Stone <ahs3@fc.hp.com>
|
||||
* Copyright (C) 2002 Hewlett-Packard Company
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or (at
|
||||
* your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
|
||||
* NON INFRINGEMENT. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/init.h>
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
/**************************************************/
|
||||
/* the actual current config file */
|
||||
|
||||
/*
|
||||
* Define kernel_config_data and kernel_config_data_size, which contains the
|
||||
* wrapped and compressed configuration file. The file is first compressed
|
||||
* with gzip and then bounded by two eight byte magic numbers to allow
|
||||
* extraction from a binary kernel image:
|
||||
*
|
||||
* IKCFG_ST
|
||||
* <image>
|
||||
* IKCFG_ED
|
||||
*/
|
||||
#define MAGIC_START "IKCFG_ST"
|
||||
#define MAGIC_END "IKCFG_ED"
|
||||
#include "config_data.h"
|
||||
|
||||
|
||||
#define MAGIC_SIZE (sizeof(MAGIC_START) - 1)
|
||||
#define kernel_config_data_size \
|
||||
(sizeof(kernel_config_data) - 1 - MAGIC_SIZE * 2)
|
||||
|
||||
#ifdef CONFIG_IKCONFIG_PROC
|
||||
|
||||
/**************************************************/
|
||||
/* globals and useful constants */
|
||||
|
||||
static ssize_t
|
||||
ikconfig_read_current(struct file *file, char __user *buf,
|
||||
size_t len, loff_t * offset)
|
||||
{
|
||||
loff_t pos = *offset;
|
||||
ssize_t count;
|
||||
|
||||
if (pos >= kernel_config_data_size)
|
||||
return 0;
|
||||
|
||||
count = min(len, (size_t)(kernel_config_data_size - pos));
|
||||
if (copy_to_user(buf, kernel_config_data + MAGIC_SIZE + pos, count))
|
||||
return -EFAULT;
|
||||
|
||||
*offset += count;
|
||||
return count;
|
||||
}
|
||||
|
||||
static const struct file_operations ikconfig_file_ops = {
|
||||
.owner = THIS_MODULE,
|
||||
.read = ikconfig_read_current,
|
||||
};
|
||||
|
||||
/***************************************************/
|
||||
/* ikconfig_init: start up everything we need to */
|
||||
|
||||
static int __init ikconfig_init(void)
|
||||
{
|
||||
struct proc_dir_entry *entry;
|
||||
|
||||
/* create the current config file */
|
||||
entry = create_proc_entry("config.gz", S_IFREG | S_IRUGO,
|
||||
&proc_root);
|
||||
if (!entry)
|
||||
return -ENOMEM;
|
||||
|
||||
entry->proc_fops = &ikconfig_file_ops;
|
||||
entry->size = kernel_config_data_size;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/***************************************************/
|
||||
/* ikconfig_cleanup: clean up our mess */
|
||||
|
||||
static void __exit ikconfig_cleanup(void)
|
||||
{
|
||||
remove_proc_entry("config.gz", &proc_root);
|
||||
}
|
||||
|
||||
module_init(ikconfig_init);
|
||||
module_exit(ikconfig_cleanup);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Randy Dunlap");
|
||||
MODULE_DESCRIPTION("Echo the kernel .config file used to build the kernel");
|
||||
|
||||
#endif /* CONFIG_IKCONFIG_PROC */
|
||||
327
kernel/cpu.c
Normal file
327
kernel/cpu.c
Normal file
@@ -0,0 +1,327 @@
|
||||
/* CPU control.
|
||||
* (C) 2001, 2002, 2003, 2004 Rusty Russell
|
||||
*
|
||||
* This code is licenced under the GPL.
|
||||
*/
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/unistd.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/stop_machine.h>
|
||||
#include <linux/mutex.h>
|
||||
|
||||
/* This protects CPUs going up and down... */
|
||||
static DEFINE_MUTEX(cpu_add_remove_lock);
|
||||
static DEFINE_MUTEX(cpu_bitmask_lock);
|
||||
|
||||
static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain);
|
||||
|
||||
/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
|
||||
* Should always be manipulated under cpu_add_remove_lock
|
||||
*/
|
||||
static int cpu_hotplug_disabled;
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
|
||||
/* Crappy recursive lock-takers in cpufreq! Complain loudly about idiots */
|
||||
static struct task_struct *recursive;
|
||||
static int recursive_depth;
|
||||
|
||||
void lock_cpu_hotplug(void)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
|
||||
if (tsk == recursive) {
|
||||
static int warnings = 10;
|
||||
if (warnings) {
|
||||
printk(KERN_ERR "Lukewarm IQ detected in hotplug locking\n");
|
||||
WARN_ON(1);
|
||||
warnings--;
|
||||
}
|
||||
recursive_depth++;
|
||||
return;
|
||||
}
|
||||
mutex_lock(&cpu_bitmask_lock);
|
||||
recursive = tsk;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(lock_cpu_hotplug);
|
||||
|
||||
void unlock_cpu_hotplug(void)
|
||||
{
|
||||
WARN_ON(recursive != current);
|
||||
if (recursive_depth) {
|
||||
recursive_depth--;
|
||||
return;
|
||||
}
|
||||
recursive = NULL;
|
||||
mutex_unlock(&cpu_bitmask_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(unlock_cpu_hotplug);
|
||||
|
||||
#endif /* CONFIG_HOTPLUG_CPU */
|
||||
|
||||
/* Need to know about CPUs going up/down? */
|
||||
int __cpuinit register_cpu_notifier(struct notifier_block *nb)
|
||||
{
|
||||
int ret;
|
||||
mutex_lock(&cpu_add_remove_lock);
|
||||
ret = raw_notifier_chain_register(&cpu_chain, nb);
|
||||
mutex_unlock(&cpu_add_remove_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
|
||||
EXPORT_SYMBOL(register_cpu_notifier);
|
||||
|
||||
void unregister_cpu_notifier(struct notifier_block *nb)
|
||||
{
|
||||
mutex_lock(&cpu_add_remove_lock);
|
||||
raw_notifier_chain_unregister(&cpu_chain, nb);
|
||||
mutex_unlock(&cpu_add_remove_lock);
|
||||
}
|
||||
EXPORT_SYMBOL(unregister_cpu_notifier);
|
||||
|
||||
static inline void check_for_tasks(int cpu)
|
||||
{
|
||||
struct task_struct *p;
|
||||
|
||||
write_lock_irq(&tasklist_lock);
|
||||
for_each_process(p) {
|
||||
if (task_cpu(p) == cpu &&
|
||||
(!cputime_eq(p->utime, cputime_zero) ||
|
||||
!cputime_eq(p->stime, cputime_zero)))
|
||||
printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\
|
||||
(state = %ld, flags = %lx) \n",
|
||||
p->comm, p->pid, cpu, p->state, p->flags);
|
||||
}
|
||||
write_unlock_irq(&tasklist_lock);
|
||||
}
|
||||
|
||||
/* Take this CPU down. */
|
||||
static int take_cpu_down(void *unused)
|
||||
{
|
||||
int err;
|
||||
|
||||
/* Ensure this CPU doesn't handle any more interrupts. */
|
||||
err = __cpu_disable();
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
/* Force idle task to run as soon as we yield: it should
|
||||
immediately notice cpu is offline and die quickly. */
|
||||
sched_idle_next();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Requires cpu_add_remove_lock to be held */
|
||||
static int _cpu_down(unsigned int cpu)
|
||||
{
|
||||
int err;
|
||||
struct task_struct *p;
|
||||
cpumask_t old_allowed, tmp;
|
||||
|
||||
if (num_online_cpus() == 1)
|
||||
return -EBUSY;
|
||||
|
||||
if (!cpu_online(cpu))
|
||||
return -EINVAL;
|
||||
|
||||
err = raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE,
|
||||
(void *)(long)cpu);
|
||||
if (err == NOTIFY_BAD) {
|
||||
printk("%s: attempt to take down CPU %u failed\n",
|
||||
__FUNCTION__, cpu);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Ensure that we are not runnable on dying cpu */
|
||||
old_allowed = current->cpus_allowed;
|
||||
tmp = CPU_MASK_ALL;
|
||||
cpu_clear(cpu, tmp);
|
||||
set_cpus_allowed(current, tmp);
|
||||
|
||||
mutex_lock(&cpu_bitmask_lock);
|
||||
p = __stop_machine_run(take_cpu_down, NULL, cpu);
|
||||
mutex_unlock(&cpu_bitmask_lock);
|
||||
|
||||
if (IS_ERR(p) || cpu_online(cpu)) {
|
||||
/* CPU didn't die: tell everyone. Can't complain. */
|
||||
if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED,
|
||||
(void *)(long)cpu) == NOTIFY_BAD)
|
||||
BUG();
|
||||
|
||||
if (IS_ERR(p)) {
|
||||
err = PTR_ERR(p);
|
||||
goto out_allowed;
|
||||
}
|
||||
goto out_thread;
|
||||
}
|
||||
|
||||
/* Wait for it to sleep (leaving idle task). */
|
||||
while (!idle_cpu(cpu))
|
||||
yield();
|
||||
|
||||
/* This actually kills the CPU. */
|
||||
__cpu_die(cpu);
|
||||
|
||||
/* Move it here so it can run. */
|
||||
kthread_bind(p, get_cpu());
|
||||
put_cpu();
|
||||
|
||||
/* CPU is completely dead: tell everyone. Too late to complain. */
|
||||
if (raw_notifier_call_chain(&cpu_chain, CPU_DEAD,
|
||||
(void *)(long)cpu) == NOTIFY_BAD)
|
||||
BUG();
|
||||
|
||||
check_for_tasks(cpu);
|
||||
|
||||
out_thread:
|
||||
err = kthread_stop(p);
|
||||
out_allowed:
|
||||
set_cpus_allowed(current, old_allowed);
|
||||
return err;
|
||||
}
|
||||
|
||||
int cpu_down(unsigned int cpu)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
mutex_lock(&cpu_add_remove_lock);
|
||||
if (cpu_hotplug_disabled)
|
||||
err = -EBUSY;
|
||||
else
|
||||
err = _cpu_down(cpu);
|
||||
|
||||
mutex_unlock(&cpu_add_remove_lock);
|
||||
return err;
|
||||
}
|
||||
#endif /*CONFIG_HOTPLUG_CPU*/
|
||||
|
||||
/* Requires cpu_add_remove_lock to be held */
|
||||
static int __cpuinit _cpu_up(unsigned int cpu)
|
||||
{
|
||||
int ret;
|
||||
void *hcpu = (void *)(long)cpu;
|
||||
|
||||
if (cpu_online(cpu) || !cpu_present(cpu))
|
||||
return -EINVAL;
|
||||
|
||||
ret = raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu);
|
||||
if (ret == NOTIFY_BAD) {
|
||||
printk("%s: attempt to bring up CPU %u failed\n",
|
||||
__FUNCTION__, cpu);
|
||||
ret = -EINVAL;
|
||||
goto out_notify;
|
||||
}
|
||||
|
||||
/* Arch-specific enabling code. */
|
||||
mutex_lock(&cpu_bitmask_lock);
|
||||
ret = __cpu_up(cpu);
|
||||
mutex_unlock(&cpu_bitmask_lock);
|
||||
if (ret != 0)
|
||||
goto out_notify;
|
||||
BUG_ON(!cpu_online(cpu));
|
||||
|
||||
/* Now call notifier in preparation. */
|
||||
raw_notifier_call_chain(&cpu_chain, CPU_ONLINE, hcpu);
|
||||
|
||||
out_notify:
|
||||
if (ret != 0)
|
||||
raw_notifier_call_chain(&cpu_chain,
|
||||
CPU_UP_CANCELED, hcpu);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __cpuinit cpu_up(unsigned int cpu)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
mutex_lock(&cpu_add_remove_lock);
|
||||
if (cpu_hotplug_disabled)
|
||||
err = -EBUSY;
|
||||
else
|
||||
err = _cpu_up(cpu);
|
||||
|
||||
mutex_unlock(&cpu_add_remove_lock);
|
||||
return err;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SUSPEND_SMP
|
||||
/* Needed to prevent the microcode driver from requesting firmware in its CPU
|
||||
* hotplug notifier during the suspend/resume.
|
||||
*/
|
||||
int suspend_cpu_hotplug;
|
||||
EXPORT_SYMBOL(suspend_cpu_hotplug);
|
||||
|
||||
static cpumask_t frozen_cpus;
|
||||
|
||||
int disable_nonboot_cpus(void)
|
||||
{
|
||||
int cpu, first_cpu, error = 0;
|
||||
|
||||
mutex_lock(&cpu_add_remove_lock);
|
||||
suspend_cpu_hotplug = 1;
|
||||
first_cpu = first_cpu(cpu_online_map);
|
||||
/* We take down all of the non-boot CPUs in one shot to avoid races
|
||||
* with the userspace trying to use the CPU hotplug at the same time
|
||||
*/
|
||||
cpus_clear(frozen_cpus);
|
||||
printk("Disabling non-boot CPUs ...\n");
|
||||
for_each_online_cpu(cpu) {
|
||||
if (cpu == first_cpu)
|
||||
continue;
|
||||
error = _cpu_down(cpu);
|
||||
if (!error) {
|
||||
cpu_set(cpu, frozen_cpus);
|
||||
printk("CPU%d is down\n", cpu);
|
||||
} else {
|
||||
printk(KERN_ERR "Error taking CPU%d down: %d\n",
|
||||
cpu, error);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!error) {
|
||||
BUG_ON(num_online_cpus() > 1);
|
||||
/* Make sure the CPUs won't be enabled by someone else */
|
||||
cpu_hotplug_disabled = 1;
|
||||
} else {
|
||||
printk(KERN_ERR "Non-boot CPUs are not disabled\n");
|
||||
}
|
||||
suspend_cpu_hotplug = 0;
|
||||
mutex_unlock(&cpu_add_remove_lock);
|
||||
return error;
|
||||
}
|
||||
|
||||
void enable_nonboot_cpus(void)
|
||||
{
|
||||
int cpu, error;
|
||||
|
||||
/* Allow everyone to use the CPU hotplug again */
|
||||
mutex_lock(&cpu_add_remove_lock);
|
||||
cpu_hotplug_disabled = 0;
|
||||
if (cpus_empty(frozen_cpus))
|
||||
goto out;
|
||||
|
||||
suspend_cpu_hotplug = 1;
|
||||
printk("Enabling non-boot CPUs ...\n");
|
||||
for_each_cpu_mask(cpu, frozen_cpus) {
|
||||
error = _cpu_up(cpu);
|
||||
if (!error) {
|
||||
printk("CPU%d is up\n", cpu);
|
||||
continue;
|
||||
}
|
||||
printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error);
|
||||
}
|
||||
cpus_clear(frozen_cpus);
|
||||
suspend_cpu_hotplug = 0;
|
||||
out:
|
||||
mutex_unlock(&cpu_add_remove_lock);
|
||||
}
|
||||
#endif
|
||||
2671
kernel/cpuset.c
Normal file
2671
kernel/cpuset.c
Normal file
File diff suppressed because it is too large
Load Diff
165
kernel/delayacct.c
Normal file
165
kernel/delayacct.c
Normal file
@@ -0,0 +1,165 @@
|
||||
/* delayacct.c - per-task delay accounting
|
||||
*
|
||||
* Copyright (C) Shailabh Nagar, IBM Corp. 2006
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it would be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
|
||||
* the GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/time.h>
|
||||
#include <linux/sysctl.h>
|
||||
#include <linux/delayacct.h>
|
||||
|
||||
int delayacct_on __read_mostly = 1; /* Delay accounting turned on/off */
|
||||
struct kmem_cache *delayacct_cache;
|
||||
|
||||
static int __init delayacct_setup_disable(char *str)
|
||||
{
|
||||
delayacct_on = 0;
|
||||
return 1;
|
||||
}
|
||||
__setup("nodelayacct", delayacct_setup_disable);
|
||||
|
||||
void delayacct_init(void)
|
||||
{
|
||||
delayacct_cache = kmem_cache_create("delayacct_cache",
|
||||
sizeof(struct task_delay_info),
|
||||
0,
|
||||
SLAB_PANIC,
|
||||
NULL, NULL);
|
||||
delayacct_tsk_init(&init_task);
|
||||
}
|
||||
|
||||
void __delayacct_tsk_init(struct task_struct *tsk)
|
||||
{
|
||||
tsk->delays = kmem_cache_zalloc(delayacct_cache, GFP_KERNEL);
|
||||
if (tsk->delays)
|
||||
spin_lock_init(&tsk->delays->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Start accounting for a delay statistic using
|
||||
* its starting timestamp (@start)
|
||||
*/
|
||||
|
||||
static inline void delayacct_start(struct timespec *start)
|
||||
{
|
||||
do_posix_clock_monotonic_gettime(start);
|
||||
}
|
||||
|
||||
/*
|
||||
* Finish delay accounting for a statistic using
|
||||
* its timestamps (@start, @end), accumalator (@total) and @count
|
||||
*/
|
||||
|
||||
static void delayacct_end(struct timespec *start, struct timespec *end,
|
||||
u64 *total, u32 *count)
|
||||
{
|
||||
struct timespec ts;
|
||||
s64 ns;
|
||||
unsigned long flags;
|
||||
|
||||
do_posix_clock_monotonic_gettime(end);
|
||||
ts = timespec_sub(*end, *start);
|
||||
ns = timespec_to_ns(&ts);
|
||||
if (ns < 0)
|
||||
return;
|
||||
|
||||
spin_lock_irqsave(¤t->delays->lock, flags);
|
||||
*total += ns;
|
||||
(*count)++;
|
||||
spin_unlock_irqrestore(¤t->delays->lock, flags);
|
||||
}
|
||||
|
||||
void __delayacct_blkio_start(void)
|
||||
{
|
||||
delayacct_start(¤t->delays->blkio_start);
|
||||
}
|
||||
|
||||
void __delayacct_blkio_end(void)
|
||||
{
|
||||
if (current->delays->flags & DELAYACCT_PF_SWAPIN)
|
||||
/* Swapin block I/O */
|
||||
delayacct_end(¤t->delays->blkio_start,
|
||||
¤t->delays->blkio_end,
|
||||
¤t->delays->swapin_delay,
|
||||
¤t->delays->swapin_count);
|
||||
else /* Other block I/O */
|
||||
delayacct_end(¤t->delays->blkio_start,
|
||||
¤t->delays->blkio_end,
|
||||
¤t->delays->blkio_delay,
|
||||
¤t->delays->blkio_count);
|
||||
}
|
||||
|
||||
int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
|
||||
{
|
||||
s64 tmp;
|
||||
unsigned long t1;
|
||||
unsigned long long t2,t3;
|
||||
unsigned long flags;
|
||||
struct timespec ts;
|
||||
|
||||
/* Though tsk->delays accessed later, early exit avoids
|
||||
* unnecessary returning of other data
|
||||
*/
|
||||
if (!tsk->delays)
|
||||
goto done;
|
||||
|
||||
tmp = (s64)d->cpu_run_real_total;
|
||||
cputime_to_timespec(tsk->utime + tsk->stime, &ts);
|
||||
tmp += timespec_to_ns(&ts);
|
||||
d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp;
|
||||
|
||||
/*
|
||||
* No locking available for sched_info (and too expensive to add one)
|
||||
* Mitigate by taking snapshot of values
|
||||
*/
|
||||
t1 = tsk->sched_info.pcnt;
|
||||
t2 = tsk->sched_info.run_delay;
|
||||
t3 = tsk->sched_info.cpu_time;
|
||||
|
||||
d->cpu_count += t1;
|
||||
|
||||
tmp = (s64)d->cpu_delay_total + t2;
|
||||
d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp;
|
||||
|
||||
tmp = (s64)d->cpu_run_virtual_total + t3;
|
||||
d->cpu_run_virtual_total =
|
||||
(tmp < (s64)d->cpu_run_virtual_total) ? 0 : tmp;
|
||||
|
||||
/* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */
|
||||
|
||||
spin_lock_irqsave(&tsk->delays->lock, flags);
|
||||
tmp = d->blkio_delay_total + tsk->delays->blkio_delay;
|
||||
d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;
|
||||
tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
|
||||
d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
|
||||
d->blkio_count += tsk->delays->blkio_count;
|
||||
d->swapin_count += tsk->delays->swapin_count;
|
||||
spin_unlock_irqrestore(&tsk->delays->lock, flags);
|
||||
|
||||
done:
|
||||
return 0;
|
||||
}
|
||||
|
||||
__u64 __delayacct_blkio_ticks(struct task_struct *tsk)
|
||||
{
|
||||
__u64 ret;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&tsk->delays->lock, flags);
|
||||
ret = nsec_to_clock_t(tsk->delays->blkio_delay +
|
||||
tsk->delays->swapin_delay);
|
||||
spin_unlock_irqrestore(&tsk->delays->lock, flags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
166
kernel/dma.c
Normal file
166
kernel/dma.c
Normal file
@@ -0,0 +1,166 @@
|
||||
/* $Id: dma.c,v 1.1.1.1 2007/06/12 07:27:11 eyryu Exp $
|
||||
* linux/kernel/dma.c: A DMA channel allocator. Inspired by linux/kernel/irq.c.
|
||||
*
|
||||
* Written by Hennus Bergman, 1992.
|
||||
*
|
||||
* 1994/12/26: Changes by Alex Nash to fix a minor bug in /proc/dma.
|
||||
* In the previous version the reported device could end up being wrong,
|
||||
* if a device requested a DMA channel that was already in use.
|
||||
* [It also happened to remove the sizeof(char *) == sizeof(int)
|
||||
* assumption introduced because of those /proc/dma patches. -- Hennus]
|
||||
*/
|
||||
#include <linux/module.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/init.h>
|
||||
#include <asm/dma.h>
|
||||
#include <asm/system.h>
|
||||
|
||||
|
||||
|
||||
/* A note on resource allocation:
|
||||
*
|
||||
* All drivers needing DMA channels, should allocate and release them
|
||||
* through the public routines `request_dma()' and `free_dma()'.
|
||||
*
|
||||
* In order to avoid problems, all processes should allocate resources in
|
||||
* the same sequence and release them in the reverse order.
|
||||
*
|
||||
* So, when allocating DMAs and IRQs, first allocate the IRQ, then the DMA.
|
||||
* When releasing them, first release the DMA, then release the IRQ.
|
||||
* If you don't, you may cause allocation requests to fail unnecessarily.
|
||||
* This doesn't really matter now, but it will once we get real semaphores
|
||||
* in the kernel.
|
||||
*/
|
||||
|
||||
|
||||
DEFINE_SPINLOCK(dma_spin_lock);
|
||||
|
||||
/*
|
||||
* If our port doesn't define this it has no PC like DMA
|
||||
*/
|
||||
|
||||
#ifdef MAX_DMA_CHANNELS
|
||||
|
||||
|
||||
/* Channel n is busy iff dma_chan_busy[n].lock != 0.
|
||||
* DMA0 used to be reserved for DRAM refresh, but apparently not any more...
|
||||
* DMA4 is reserved for cascading.
|
||||
*/
|
||||
|
||||
struct dma_chan {
|
||||
int lock;
|
||||
const char *device_id;
|
||||
};
|
||||
|
||||
static struct dma_chan dma_chan_busy[MAX_DMA_CHANNELS] = {
|
||||
[4] = { 1, "cascade" },
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* request_dma - request and reserve a system DMA channel
|
||||
* @dmanr: DMA channel number
|
||||
* @device_id: reserving device ID string, used in /proc/dma
|
||||
*/
|
||||
int request_dma(unsigned int dmanr, const char * device_id)
|
||||
{
|
||||
if (dmanr >= MAX_DMA_CHANNELS)
|
||||
return -EINVAL;
|
||||
|
||||
if (xchg(&dma_chan_busy[dmanr].lock, 1) != 0)
|
||||
return -EBUSY;
|
||||
|
||||
dma_chan_busy[dmanr].device_id = device_id;
|
||||
|
||||
/* old flag was 0, now contains 1 to indicate busy */
|
||||
return 0;
|
||||
} /* request_dma */
|
||||
|
||||
/**
|
||||
* free_dma - free a reserved system DMA channel
|
||||
* @dmanr: DMA channel number
|
||||
*/
|
||||
void free_dma(unsigned int dmanr)
|
||||
{
|
||||
if (dmanr >= MAX_DMA_CHANNELS) {
|
||||
printk(KERN_WARNING "Trying to free DMA%d\n", dmanr);
|
||||
return;
|
||||
}
|
||||
|
||||
if (xchg(&dma_chan_busy[dmanr].lock, 0) == 0) {
|
||||
printk(KERN_WARNING "Trying to free free DMA%d\n", dmanr);
|
||||
return;
|
||||
}
|
||||
|
||||
} /* free_dma */
|
||||
|
||||
#else
|
||||
|
||||
int request_dma(unsigned int dmanr, const char *device_id)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
void free_dma(unsigned int dmanr)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
|
||||
#ifdef MAX_DMA_CHANNELS
|
||||
static int proc_dma_show(struct seq_file *m, void *v)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0 ; i < MAX_DMA_CHANNELS ; i++) {
|
||||
if (dma_chan_busy[i].lock) {
|
||||
seq_printf(m, "%2d: %s\n", i,
|
||||
dma_chan_busy[i].device_id);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
static int proc_dma_show(struct seq_file *m, void *v)
|
||||
{
|
||||
seq_puts(m, "No DMA\n");
|
||||
return 0;
|
||||
}
|
||||
#endif /* MAX_DMA_CHANNELS */
|
||||
|
||||
static int proc_dma_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, proc_dma_show, NULL);
|
||||
}
|
||||
|
||||
static const struct file_operations proc_dma_operations = {
|
||||
.open = proc_dma_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
static int __init proc_dma_init(void)
|
||||
{
|
||||
struct proc_dir_entry *e;
|
||||
|
||||
e = create_proc_entry("dma", 0, NULL);
|
||||
if (e)
|
||||
e->proc_fops = &proc_dma_operations;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
__initcall(proc_dma_init);
|
||||
#endif
|
||||
|
||||
EXPORT_SYMBOL(request_dma);
|
||||
EXPORT_SYMBOL(free_dma);
|
||||
EXPORT_SYMBOL(dma_spin_lock);
|
||||
209
kernel/exec_domain.c
Normal file
209
kernel/exec_domain.c
Normal file
@@ -0,0 +1,209 @@
|
||||
/*
|
||||
* Handling of different ABIs (personalities).
|
||||
*
|
||||
* We group personalities into execution domains which have their
|
||||
* own handlers for kernel entry points, signal mapping, etc...
|
||||
*
|
||||
* 2001-05-06 Complete rewrite, Christoph Hellwig (hch@infradead.org)
|
||||
*/
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/kmod.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/personality.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/sysctl.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
|
||||
static void default_handler(int, struct pt_regs *);
|
||||
|
||||
static struct exec_domain *exec_domains = &default_exec_domain;
|
||||
static DEFINE_RWLOCK(exec_domains_lock);
|
||||
|
||||
|
||||
static u_long ident_map[32] = {
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31
|
||||
};
|
||||
|
||||
struct exec_domain default_exec_domain = {
|
||||
.name = "Linux", /* name */
|
||||
.handler = default_handler, /* lcall7 causes a seg fault. */
|
||||
.pers_low = 0, /* PER_LINUX personality. */
|
||||
.pers_high = 0, /* PER_LINUX personality. */
|
||||
.signal_map = ident_map, /* Identity map signals. */
|
||||
.signal_invmap = ident_map, /* - both ways. */
|
||||
};
|
||||
|
||||
|
||||
static void
|
||||
default_handler(int segment, struct pt_regs *regp)
|
||||
{
|
||||
set_personality(0);
|
||||
|
||||
if (current_thread_info()->exec_domain->handler != default_handler)
|
||||
current_thread_info()->exec_domain->handler(segment, regp);
|
||||
else
|
||||
send_sig(SIGSEGV, current, 1);
|
||||
}
|
||||
|
||||
static struct exec_domain *
|
||||
lookup_exec_domain(u_long personality)
|
||||
{
|
||||
struct exec_domain * ep;
|
||||
u_long pers = personality(personality);
|
||||
|
||||
read_lock(&exec_domains_lock);
|
||||
for (ep = exec_domains; ep; ep = ep->next) {
|
||||
if (pers >= ep->pers_low && pers <= ep->pers_high)
|
||||
if (try_module_get(ep->module))
|
||||
goto out;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KMOD
|
||||
read_unlock(&exec_domains_lock);
|
||||
request_module("personality-%ld", pers);
|
||||
read_lock(&exec_domains_lock);
|
||||
|
||||
for (ep = exec_domains; ep; ep = ep->next) {
|
||||
if (pers >= ep->pers_low && pers <= ep->pers_high)
|
||||
if (try_module_get(ep->module))
|
||||
goto out;
|
||||
}
|
||||
#endif
|
||||
|
||||
ep = &default_exec_domain;
|
||||
out:
|
||||
read_unlock(&exec_domains_lock);
|
||||
return (ep);
|
||||
}
|
||||
|
||||
int
|
||||
register_exec_domain(struct exec_domain *ep)
|
||||
{
|
||||
struct exec_domain *tmp;
|
||||
int err = -EBUSY;
|
||||
|
||||
if (ep == NULL)
|
||||
return -EINVAL;
|
||||
|
||||
if (ep->next != NULL)
|
||||
return -EBUSY;
|
||||
|
||||
write_lock(&exec_domains_lock);
|
||||
for (tmp = exec_domains; tmp; tmp = tmp->next) {
|
||||
if (tmp == ep)
|
||||
goto out;
|
||||
}
|
||||
|
||||
ep->next = exec_domains;
|
||||
exec_domains = ep;
|
||||
err = 0;
|
||||
|
||||
out:
|
||||
write_unlock(&exec_domains_lock);
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
unregister_exec_domain(struct exec_domain *ep)
|
||||
{
|
||||
struct exec_domain **epp;
|
||||
|
||||
epp = &exec_domains;
|
||||
write_lock(&exec_domains_lock);
|
||||
for (epp = &exec_domains; *epp; epp = &(*epp)->next) {
|
||||
if (ep == *epp)
|
||||
goto unregister;
|
||||
}
|
||||
write_unlock(&exec_domains_lock);
|
||||
return -EINVAL;
|
||||
|
||||
unregister:
|
||||
*epp = ep->next;
|
||||
ep->next = NULL;
|
||||
write_unlock(&exec_domains_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
__set_personality(u_long personality)
|
||||
{
|
||||
struct exec_domain *ep, *oep;
|
||||
|
||||
ep = lookup_exec_domain(personality);
|
||||
if (ep == current_thread_info()->exec_domain) {
|
||||
current->personality = personality;
|
||||
module_put(ep->module);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (atomic_read(¤t->fs->count) != 1) {
|
||||
struct fs_struct *fsp, *ofsp;
|
||||
|
||||
fsp = copy_fs_struct(current->fs);
|
||||
if (fsp == NULL) {
|
||||
module_put(ep->module);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
task_lock(current);
|
||||
ofsp = current->fs;
|
||||
current->fs = fsp;
|
||||
task_unlock(current);
|
||||
|
||||
put_fs_struct(ofsp);
|
||||
}
|
||||
|
||||
/*
|
||||
* At that point we are guaranteed to be the sole owner of
|
||||
* current->fs.
|
||||
*/
|
||||
|
||||
current->personality = personality;
|
||||
oep = current_thread_info()->exec_domain;
|
||||
current_thread_info()->exec_domain = ep;
|
||||
set_fs_altroot();
|
||||
|
||||
module_put(oep->module);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
get_exec_domain_list(char *page)
|
||||
{
|
||||
struct exec_domain *ep;
|
||||
int len = 0;
|
||||
|
||||
read_lock(&exec_domains_lock);
|
||||
for (ep = exec_domains; ep && len < PAGE_SIZE - 80; ep = ep->next)
|
||||
len += sprintf(page + len, "%d-%d\t%-16s\t[%s]\n",
|
||||
ep->pers_low, ep->pers_high, ep->name,
|
||||
module_name(ep->module));
|
||||
read_unlock(&exec_domains_lock);
|
||||
return (len);
|
||||
}
|
||||
|
||||
asmlinkage long
|
||||
sys_personality(u_long personality)
|
||||
{
|
||||
u_long old = current->personality;
|
||||
|
||||
if (personality != 0xffffffff) {
|
||||
set_personality(personality);
|
||||
if (current->personality != personality)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return (long)old;
|
||||
}
|
||||
|
||||
|
||||
EXPORT_SYMBOL(register_exec_domain);
|
||||
EXPORT_SYMBOL(unregister_exec_domain);
|
||||
EXPORT_SYMBOL(__set_personality);
|
||||
1662
kernel/exit.c
Normal file
1662
kernel/exit.c
Normal file
File diff suppressed because it is too large
Load Diff
67
kernel/extable.c
Normal file
67
kernel/extable.c
Normal file
@@ -0,0 +1,67 @@
|
||||
/* Rewritten by Rusty Russell, on the backs of many others...
|
||||
Copyright (C) 2001 Rusty Russell, 2002 Rusty Russell IBM.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/sections.h>
|
||||
|
||||
extern struct exception_table_entry __start___ex_table[];
|
||||
extern struct exception_table_entry __stop___ex_table[];
|
||||
|
||||
/* Sort the kernel's built-in exception table */
|
||||
void __init sort_main_extable(void)
|
||||
{
|
||||
sort_extable(__start___ex_table, __stop___ex_table);
|
||||
}
|
||||
|
||||
/* Given an address, look for it in the exception tables. */
|
||||
const struct exception_table_entry *search_exception_tables(unsigned long addr)
|
||||
{
|
||||
const struct exception_table_entry *e;
|
||||
|
||||
e = search_extable(__start___ex_table, __stop___ex_table-1, addr);
|
||||
if (!e)
|
||||
e = search_module_extables(addr);
|
||||
return e;
|
||||
}
|
||||
|
||||
int core_kernel_text(unsigned long addr)
|
||||
{
|
||||
if (addr >= (unsigned long)_stext &&
|
||||
addr <= (unsigned long)_etext)
|
||||
return 1;
|
||||
|
||||
if (addr >= (unsigned long)_sinittext &&
|
||||
addr <= (unsigned long)_einittext)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __kernel_text_address(unsigned long addr)
|
||||
{
|
||||
if (core_kernel_text(addr))
|
||||
return 1;
|
||||
return __module_text_address(addr) != NULL;
|
||||
}
|
||||
|
||||
int kernel_text_address(unsigned long addr)
|
||||
{
|
||||
if (core_kernel_text(addr))
|
||||
return 1;
|
||||
return module_text_address(addr) != NULL;
|
||||
}
|
||||
1752
kernel/fork.c
Normal file
1752
kernel/fork.c
Normal file
File diff suppressed because it is too large
Load Diff
1879
kernel/futex.c
Normal file
1879
kernel/futex.c
Normal file
File diff suppressed because it is too large
Load Diff
164
kernel/futex_compat.c
Normal file
164
kernel/futex_compat.c
Normal file
@@ -0,0 +1,164 @@
|
||||
/*
|
||||
* linux/kernel/futex_compat.c
|
||||
*
|
||||
* Futex compatibililty routines.
|
||||
*
|
||||
* Copyright 2006, Red Hat, Inc., Ingo Molnar
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <linux/compat.h>
|
||||
#include <linux/futex.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
|
||||
/*
|
||||
* Fetch a robust-list pointer. Bit 0 signals PI futexes:
|
||||
*/
|
||||
static inline int
|
||||
fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry,
|
||||
compat_uptr_t __user *head, int *pi)
|
||||
{
|
||||
if (get_user(*uentry, head))
|
||||
return -EFAULT;
|
||||
|
||||
*entry = compat_ptr((*uentry) & ~1);
|
||||
*pi = (unsigned int)(*uentry) & 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Walk curr->robust_list (very carefully, it's a userspace list!)
|
||||
* and mark any locks found there dead, and notify any waiters.
|
||||
*
|
||||
* We silently return on any sign of list-walking problem.
|
||||
*/
|
||||
void compat_exit_robust_list(struct task_struct *curr)
|
||||
{
|
||||
struct compat_robust_list_head __user *head = curr->compat_robust_list;
|
||||
struct robust_list __user *entry, *pending;
|
||||
unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
|
||||
compat_uptr_t uentry, upending;
|
||||
compat_long_t futex_offset;
|
||||
|
||||
/*
|
||||
* Fetch the list head (which was registered earlier, via
|
||||
* sys_set_robust_list()):
|
||||
*/
|
||||
if (fetch_robust_entry(&uentry, &entry, &head->list.next, &pi))
|
||||
return;
|
||||
/*
|
||||
* Fetch the relative futex offset:
|
||||
*/
|
||||
if (get_user(futex_offset, &head->futex_offset))
|
||||
return;
|
||||
/*
|
||||
* Fetch any possibly pending lock-add first, and handle it
|
||||
* if it exists:
|
||||
*/
|
||||
if (fetch_robust_entry(&upending, &pending,
|
||||
&head->list_op_pending, &pip))
|
||||
return;
|
||||
if (upending)
|
||||
handle_futex_death((void __user *)pending + futex_offset, curr, pip);
|
||||
|
||||
while (compat_ptr(uentry) != &head->list) {
|
||||
/*
|
||||
* A pending lock might already be on the list, so
|
||||
* dont process it twice:
|
||||
*/
|
||||
if (entry != pending)
|
||||
if (handle_futex_death((void __user *)entry + futex_offset,
|
||||
curr, pi))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Fetch the next entry in the list:
|
||||
*/
|
||||
if (fetch_robust_entry(&uentry, &entry,
|
||||
(compat_uptr_t __user *)&entry->next, &pi))
|
||||
return;
|
||||
/*
|
||||
* Avoid excessively long or circular lists:
|
||||
*/
|
||||
if (!--limit)
|
||||
break;
|
||||
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
|
||||
asmlinkage long
|
||||
compat_sys_set_robust_list(struct compat_robust_list_head __user *head,
|
||||
compat_size_t len)
|
||||
{
|
||||
if (unlikely(len != sizeof(*head)))
|
||||
return -EINVAL;
|
||||
|
||||
current->compat_robust_list = head;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
asmlinkage long
|
||||
compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
|
||||
compat_size_t __user *len_ptr)
|
||||
{
|
||||
struct compat_robust_list_head __user *head;
|
||||
unsigned long ret;
|
||||
|
||||
if (!pid)
|
||||
head = current->compat_robust_list;
|
||||
else {
|
||||
struct task_struct *p;
|
||||
|
||||
ret = -ESRCH;
|
||||
read_lock(&tasklist_lock);
|
||||
p = find_task_by_pid(pid);
|
||||
if (!p)
|
||||
goto err_unlock;
|
||||
ret = -EPERM;
|
||||
if ((current->euid != p->euid) && (current->euid != p->uid) &&
|
||||
!capable(CAP_SYS_PTRACE))
|
||||
goto err_unlock;
|
||||
head = p->compat_robust_list;
|
||||
read_unlock(&tasklist_lock);
|
||||
}
|
||||
|
||||
if (put_user(sizeof(*head), len_ptr))
|
||||
return -EFAULT;
|
||||
return put_user(ptr_to_compat(head), head_ptr);
|
||||
|
||||
err_unlock:
|
||||
read_unlock(&tasklist_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
|
||||
struct compat_timespec __user *utime, u32 __user *uaddr2,
|
||||
u32 val3)
|
||||
{
|
||||
struct timespec t;
|
||||
unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
|
||||
int val2 = 0;
|
||||
|
||||
if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) {
|
||||
if (get_compat_timespec(&t, utime))
|
||||
return -EFAULT;
|
||||
if (!timespec_valid(&t))
|
||||
return -EINVAL;
|
||||
if (op == FUTEX_WAIT)
|
||||
timeout = timespec_to_jiffies(&t) + 1;
|
||||
else {
|
||||
timeout = t.tv_sec;
|
||||
val2 = t.tv_nsec;
|
||||
}
|
||||
}
|
||||
if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE)
|
||||
val2 = (int) (unsigned long) utime;
|
||||
|
||||
return do_futex(uaddr, op, val, timeout, uaddr2, val2, val3);
|
||||
}
|
||||
1440
kernel/hrtimer.c
Normal file
1440
kernel/hrtimer.c
Normal file
File diff suppressed because it is too large
Load Diff
5
kernel/irq/Makefile
Normal file
5
kernel/irq/Makefile
Normal file
@@ -0,0 +1,5 @@
|
||||
|
||||
obj-y := handle.o manage.o spurious.o resend.o chip.o devres.o
|
||||
obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
|
||||
obj-$(CONFIG_PROC_FS) += proc.o
|
||||
obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o
|
||||
202
kernel/irq/autoprobe.c
Normal file
202
kernel/irq/autoprobe.c
Normal file
@@ -0,0 +1,202 @@
|
||||
/*
|
||||
* linux/kernel/irq/autoprobe.c
|
||||
*
|
||||
* Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar
|
||||
*
|
||||
* This file contains the interrupt probing code and driver APIs.
|
||||
*/
|
||||
|
||||
#include <linux/irq.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/delay.h>
|
||||
|
||||
#include "internals.h"
|
||||
|
||||
/*
|
||||
* Autodetection depends on the fact that any interrupt that
|
||||
* comes in on to an unassigned handler will get stuck with
|
||||
* "IRQ_WAITING" cleared and the interrupt disabled.
|
||||
*/
|
||||
static DEFINE_MUTEX(probing_active);
|
||||
|
||||
/**
|
||||
* probe_irq_on - begin an interrupt autodetect
|
||||
*
|
||||
* Commence probing for an interrupt. The interrupts are scanned
|
||||
* and a mask of potential interrupt lines is returned.
|
||||
*
|
||||
*/
|
||||
unsigned long probe_irq_on(void)
|
||||
{
|
||||
struct irq_desc *desc;
|
||||
unsigned long mask;
|
||||
unsigned int i;
|
||||
|
||||
mutex_lock(&probing_active);
|
||||
/*
|
||||
* something may have generated an irq long ago and we want to
|
||||
* flush such a longstanding irq before considering it as spurious.
|
||||
*/
|
||||
for (i = NR_IRQS-1; i > 0; i--) {
|
||||
desc = irq_desc + i;
|
||||
|
||||
spin_lock_irq(&desc->lock);
|
||||
if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
|
||||
/*
|
||||
* An old-style architecture might still have
|
||||
* the handle_bad_irq handler there:
|
||||
*/
|
||||
compat_irq_chip_set_default_handler(desc);
|
||||
|
||||
/*
|
||||
* Some chips need to know about probing in
|
||||
* progress:
|
||||
*/
|
||||
if (desc->chip->set_type)
|
||||
desc->chip->set_type(i, IRQ_TYPE_PROBE);
|
||||
desc->chip->startup(i);
|
||||
}
|
||||
spin_unlock_irq(&desc->lock);
|
||||
}
|
||||
|
||||
/* Wait for longstanding interrupts to trigger. */
|
||||
msleep(20);
|
||||
|
||||
/*
|
||||
* enable any unassigned irqs
|
||||
* (we must startup again here because if a longstanding irq
|
||||
* happened in the previous stage, it may have masked itself)
|
||||
*/
|
||||
for (i = NR_IRQS-1; i > 0; i--) {
|
||||
desc = irq_desc + i;
|
||||
|
||||
spin_lock_irq(&desc->lock);
|
||||
if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
|
||||
desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
|
||||
if (desc->chip->startup(i))
|
||||
desc->status |= IRQ_PENDING;
|
||||
}
|
||||
spin_unlock_irq(&desc->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait for spurious interrupts to trigger
|
||||
*/
|
||||
msleep(100);
|
||||
|
||||
/*
|
||||
* Now filter out any obviously spurious interrupts
|
||||
*/
|
||||
mask = 0;
|
||||
for (i = 0; i < NR_IRQS; i++) {
|
||||
unsigned int status;
|
||||
|
||||
desc = irq_desc + i;
|
||||
spin_lock_irq(&desc->lock);
|
||||
status = desc->status;
|
||||
|
||||
if (status & IRQ_AUTODETECT) {
|
||||
/* It triggered already - consider it spurious. */
|
||||
if (!(status & IRQ_WAITING)) {
|
||||
desc->status = status & ~IRQ_AUTODETECT;
|
||||
desc->chip->shutdown(i);
|
||||
} else
|
||||
if (i < 32)
|
||||
mask |= 1 << i;
|
||||
}
|
||||
spin_unlock_irq(&desc->lock);
|
||||
}
|
||||
|
||||
return mask;
|
||||
}
|
||||
EXPORT_SYMBOL(probe_irq_on);
|
||||
|
||||
/**
|
||||
* probe_irq_mask - scan a bitmap of interrupt lines
|
||||
* @val: mask of interrupts to consider
|
||||
*
|
||||
* Scan the interrupt lines and return a bitmap of active
|
||||
* autodetect interrupts. The interrupt probe logic state
|
||||
* is then returned to its previous value.
|
||||
*
|
||||
* Note: we need to scan all the irq's even though we will
|
||||
* only return autodetect irq numbers - just so that we reset
|
||||
* them all to a known state.
|
||||
*/
|
||||
unsigned int probe_irq_mask(unsigned long val)
|
||||
{
|
||||
unsigned int mask;
|
||||
int i;
|
||||
|
||||
mask = 0;
|
||||
for (i = 0; i < NR_IRQS; i++) {
|
||||
struct irq_desc *desc = irq_desc + i;
|
||||
unsigned int status;
|
||||
|
||||
spin_lock_irq(&desc->lock);
|
||||
status = desc->status;
|
||||
|
||||
if (status & IRQ_AUTODETECT) {
|
||||
if (i < 16 && !(status & IRQ_WAITING))
|
||||
mask |= 1 << i;
|
||||
|
||||
desc->status = status & ~IRQ_AUTODETECT;
|
||||
desc->chip->shutdown(i);
|
||||
}
|
||||
spin_unlock_irq(&desc->lock);
|
||||
}
|
||||
mutex_unlock(&probing_active);
|
||||
|
||||
return mask & val;
|
||||
}
|
||||
EXPORT_SYMBOL(probe_irq_mask);
|
||||
|
||||
/**
|
||||
* probe_irq_off - end an interrupt autodetect
|
||||
* @val: mask of potential interrupts (unused)
|
||||
*
|
||||
* Scans the unused interrupt lines and returns the line which
|
||||
* appears to have triggered the interrupt. If no interrupt was
|
||||
* found then zero is returned. If more than one interrupt is
|
||||
* found then minus the first candidate is returned to indicate
|
||||
* their is doubt.
|
||||
*
|
||||
* The interrupt probe logic state is returned to its previous
|
||||
* value.
|
||||
*
|
||||
* BUGS: When used in a module (which arguably shouldn't happen)
|
||||
* nothing prevents two IRQ probe callers from overlapping. The
|
||||
* results of this are non-optimal.
|
||||
*/
|
||||
int probe_irq_off(unsigned long val)
|
||||
{
|
||||
int i, irq_found = 0, nr_irqs = 0;
|
||||
|
||||
for (i = 0; i < NR_IRQS; i++) {
|
||||
struct irq_desc *desc = irq_desc + i;
|
||||
unsigned int status;
|
||||
|
||||
spin_lock_irq(&desc->lock);
|
||||
status = desc->status;
|
||||
|
||||
if (status & IRQ_AUTODETECT) {
|
||||
if (!(status & IRQ_WAITING)) {
|
||||
if (!nr_irqs)
|
||||
irq_found = i;
|
||||
nr_irqs++;
|
||||
}
|
||||
desc->status = status & ~IRQ_AUTODETECT;
|
||||
desc->chip->shutdown(i);
|
||||
}
|
||||
spin_unlock_irq(&desc->lock);
|
||||
}
|
||||
mutex_unlock(&probing_active);
|
||||
|
||||
if (nr_irqs > 1)
|
||||
irq_found = -irq_found;
|
||||
|
||||
return irq_found;
|
||||
}
|
||||
EXPORT_SYMBOL(probe_irq_off);
|
||||
|
||||
599
kernel/irq/chip.c
Normal file
599
kernel/irq/chip.c
Normal file
@@ -0,0 +1,599 @@
|
||||
/*
|
||||
* linux/kernel/irq/chip.c
|
||||
*
|
||||
* Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar
|
||||
* Copyright (C) 2005-2006, Thomas Gleixner, Russell King
|
||||
*
|
||||
* This file contains the core interrupt handling code, for irq-chip
|
||||
* based architectures.
|
||||
*
|
||||
* Detailed information is available in Documentation/DocBook/genericirq
|
||||
*/
|
||||
|
||||
#include <linux/irq.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/kernel_stat.h>
|
||||
|
||||
#include "internals.h"
|
||||
|
||||
/**
|
||||
* dynamic_irq_init - initialize a dynamically allocated irq
|
||||
* @irq: irq number to initialize
|
||||
*/
|
||||
void dynamic_irq_init(unsigned int irq)
|
||||
{
|
||||
struct irq_desc *desc;
|
||||
unsigned long flags;
|
||||
|
||||
if (irq >= NR_IRQS) {
|
||||
printk(KERN_ERR "Trying to initialize invalid IRQ%d\n", irq);
|
||||
WARN_ON(1);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Ensure we don't have left over values from a previous use of this irq */
|
||||
desc = irq_desc + irq;
|
||||
spin_lock_irqsave(&desc->lock, flags);
|
||||
desc->status = IRQ_DISABLED;
|
||||
desc->chip = &no_irq_chip;
|
||||
desc->handle_irq = handle_bad_irq;
|
||||
desc->depth = 1;
|
||||
desc->msi_desc = NULL;
|
||||
desc->handler_data = NULL;
|
||||
desc->chip_data = NULL;
|
||||
desc->action = NULL;
|
||||
desc->irq_count = 0;
|
||||
desc->irqs_unhandled = 0;
|
||||
#ifdef CONFIG_SMP
|
||||
desc->affinity = CPU_MASK_ALL;
|
||||
#endif
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* dynamic_irq_cleanup - cleanup a dynamically allocated irq
|
||||
* @irq: irq number to initialize
|
||||
*/
|
||||
void dynamic_irq_cleanup(unsigned int irq)
|
||||
{
|
||||
struct irq_desc *desc;
|
||||
unsigned long flags;
|
||||
|
||||
if (irq >= NR_IRQS) {
|
||||
printk(KERN_ERR "Trying to cleanup invalid IRQ%d\n", irq);
|
||||
WARN_ON(1);
|
||||
return;
|
||||
}
|
||||
|
||||
desc = irq_desc + irq;
|
||||
spin_lock_irqsave(&desc->lock, flags);
|
||||
if (desc->action) {
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
printk(KERN_ERR "Destroying IRQ%d without calling free_irq\n",
|
||||
irq);
|
||||
WARN_ON(1);
|
||||
return;
|
||||
}
|
||||
desc->msi_desc = NULL;
|
||||
desc->handler_data = NULL;
|
||||
desc->chip_data = NULL;
|
||||
desc->handle_irq = handle_bad_irq;
|
||||
desc->chip = &no_irq_chip;
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* set_irq_chip - set the irq chip for an irq
|
||||
* @irq: irq number
|
||||
* @chip: pointer to irq chip description structure
|
||||
*/
|
||||
int set_irq_chip(unsigned int irq, struct irq_chip *chip)
|
||||
{
|
||||
struct irq_desc *desc;
|
||||
unsigned long flags;
|
||||
|
||||
if (irq >= NR_IRQS) {
|
||||
printk(KERN_ERR "Trying to install chip for IRQ%d\n", irq);
|
||||
WARN_ON(1);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!chip)
|
||||
chip = &no_irq_chip;
|
||||
|
||||
desc = irq_desc + irq;
|
||||
spin_lock_irqsave(&desc->lock, flags);
|
||||
irq_chip_set_defaults(chip);
|
||||
desc->chip = chip;
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(set_irq_chip);
|
||||
|
||||
/**
|
||||
* set_irq_type - set the irq type for an irq
|
||||
* @irq: irq number
|
||||
* @type: interrupt type - see include/linux/interrupt.h
|
||||
*/
|
||||
int set_irq_type(unsigned int irq, unsigned int type)
|
||||
{
|
||||
struct irq_desc *desc;
|
||||
unsigned long flags;
|
||||
int ret = -ENXIO;
|
||||
|
||||
if (irq >= NR_IRQS) {
|
||||
printk(KERN_ERR "Trying to set irq type for IRQ%d\n", irq);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
desc = irq_desc + irq;
|
||||
if (desc->chip->set_type) {
|
||||
spin_lock_irqsave(&desc->lock, flags);
|
||||
ret = desc->chip->set_type(irq, type);
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(set_irq_type);
|
||||
|
||||
/**
|
||||
* set_irq_data - set irq type data for an irq
|
||||
* @irq: Interrupt number
|
||||
* @data: Pointer to interrupt specific data
|
||||
*
|
||||
* Set the hardware irq controller data for an irq
|
||||
*/
|
||||
int set_irq_data(unsigned int irq, void *data)
|
||||
{
|
||||
struct irq_desc *desc;
|
||||
unsigned long flags;
|
||||
|
||||
if (irq >= NR_IRQS) {
|
||||
printk(KERN_ERR
|
||||
"Trying to install controller data for IRQ%d\n", irq);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
desc = irq_desc + irq;
|
||||
spin_lock_irqsave(&desc->lock, flags);
|
||||
desc->handler_data = data;
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(set_irq_data);
|
||||
|
||||
/**
|
||||
* set_irq_data - set irq type data for an irq
|
||||
* @irq: Interrupt number
|
||||
* @entry: Pointer to MSI descriptor data
|
||||
*
|
||||
* Set the hardware irq controller data for an irq
|
||||
*/
|
||||
int set_irq_msi(unsigned int irq, struct msi_desc *entry)
|
||||
{
|
||||
struct irq_desc *desc;
|
||||
unsigned long flags;
|
||||
|
||||
if (irq >= NR_IRQS) {
|
||||
printk(KERN_ERR
|
||||
"Trying to install msi data for IRQ%d\n", irq);
|
||||
return -EINVAL;
|
||||
}
|
||||
desc = irq_desc + irq;
|
||||
spin_lock_irqsave(&desc->lock, flags);
|
||||
desc->msi_desc = entry;
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* set_irq_chip_data - set irq chip data for an irq
|
||||
* @irq: Interrupt number
|
||||
* @data: Pointer to chip specific data
|
||||
*
|
||||
* Set the hardware irq chip data for an irq
|
||||
*/
|
||||
int set_irq_chip_data(unsigned int irq, void *data)
|
||||
{
|
||||
struct irq_desc *desc = irq_desc + irq;
|
||||
unsigned long flags;
|
||||
|
||||
if (irq >= NR_IRQS || !desc->chip) {
|
||||
printk(KERN_ERR "BUG: bad set_irq_chip_data(IRQ#%d)\n", irq);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&desc->lock, flags);
|
||||
desc->chip_data = data;
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(set_irq_chip_data);
|
||||
|
||||
/*
|
||||
* default enable function
|
||||
*/
|
||||
static void default_enable(unsigned int irq)
|
||||
{
|
||||
struct irq_desc *desc = irq_desc + irq;
|
||||
|
||||
desc->chip->unmask(irq);
|
||||
desc->status &= ~IRQ_MASKED;
|
||||
}
|
||||
|
||||
/*
|
||||
* default disable function
|
||||
*/
|
||||
static void default_disable(unsigned int irq)
|
||||
{
|
||||
}
|
||||
|
||||
/*
|
||||
* default startup function
|
||||
*/
|
||||
static unsigned int default_startup(unsigned int irq)
|
||||
{
|
||||
irq_desc[irq].chip->enable(irq);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fixup enable/disable function pointers
|
||||
*/
|
||||
void irq_chip_set_defaults(struct irq_chip *chip)
|
||||
{
|
||||
if (!chip->enable)
|
||||
chip->enable = default_enable;
|
||||
if (!chip->disable)
|
||||
chip->disable = default_disable;
|
||||
if (!chip->startup)
|
||||
chip->startup = default_startup;
|
||||
if (!chip->shutdown)
|
||||
chip->shutdown = chip->disable;
|
||||
if (!chip->name)
|
||||
chip->name = chip->typename;
|
||||
if (!chip->end)
|
||||
chip->end = dummy_irq_chip.end;
|
||||
}
|
||||
|
||||
static inline void mask_ack_irq(struct irq_desc *desc, int irq)
|
||||
{
|
||||
if (desc->chip->mask_ack)
|
||||
desc->chip->mask_ack(irq);
|
||||
else {
|
||||
desc->chip->mask(irq);
|
||||
desc->chip->ack(irq);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* handle_simple_irq - Simple and software-decoded IRQs.
|
||||
* @irq: the interrupt number
|
||||
* @desc: the interrupt description structure for this irq
|
||||
*
|
||||
* Simple interrupts are either sent from a demultiplexing interrupt
|
||||
* handler or come from hardware, where no interrupt hardware control
|
||||
* is necessary.
|
||||
*
|
||||
* Note: The caller is expected to handle the ack, clear, mask and
|
||||
* unmask issues if necessary.
|
||||
*/
|
||||
void fastcall
|
||||
handle_simple_irq(unsigned int irq, struct irq_desc *desc)
|
||||
{
|
||||
struct irqaction *action;
|
||||
irqreturn_t action_ret;
|
||||
const unsigned int cpu = smp_processor_id();
|
||||
|
||||
spin_lock(&desc->lock);
|
||||
|
||||
if (unlikely(desc->status & IRQ_INPROGRESS))
|
||||
goto out_unlock;
|
||||
kstat_cpu(cpu).irqs[irq]++;
|
||||
|
||||
action = desc->action;
|
||||
if (unlikely(!action || (desc->status & IRQ_DISABLED))) {
|
||||
if (desc->chip->mask)
|
||||
desc->chip->mask(irq);
|
||||
desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
|
||||
desc->status |= IRQ_PENDING;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
desc->status &= ~(IRQ_REPLAY | IRQ_WAITING | IRQ_PENDING);
|
||||
desc->status |= IRQ_INPROGRESS;
|
||||
spin_unlock(&desc->lock);
|
||||
|
||||
action_ret = handle_IRQ_event(irq, action);
|
||||
if (!noirqdebug)
|
||||
note_interrupt(irq, desc, action_ret);
|
||||
|
||||
spin_lock(&desc->lock);
|
||||
desc->status &= ~IRQ_INPROGRESS;
|
||||
out_unlock:
|
||||
spin_unlock(&desc->lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* handle_level_irq - Level type irq handler
|
||||
* @irq: the interrupt number
|
||||
* @desc: the interrupt description structure for this irq
|
||||
*
|
||||
* Level type interrupts are active as long as the hardware line has
|
||||
* the active level. This may require to mask the interrupt and unmask
|
||||
* it after the associated handler has acknowledged the device, so the
|
||||
* interrupt line is back to inactive.
|
||||
*/
|
||||
void fastcall
|
||||
handle_level_irq(unsigned int irq, struct irq_desc *desc)
|
||||
{
|
||||
unsigned int cpu = smp_processor_id();
|
||||
struct irqaction *action;
|
||||
irqreturn_t action_ret;
|
||||
|
||||
spin_lock(&desc->lock);
|
||||
mask_ack_irq(desc, irq);
|
||||
|
||||
if (unlikely(desc->status & IRQ_INPROGRESS))
|
||||
goto out_unlock;
|
||||
desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
|
||||
kstat_cpu(cpu).irqs[irq]++;
|
||||
|
||||
/*
|
||||
* If its disabled or no action available
|
||||
* keep it masked and get out of here
|
||||
*/
|
||||
action = desc->action;
|
||||
if (unlikely(!action || (desc->status & IRQ_DISABLED))) {
|
||||
desc->status |= IRQ_PENDING;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
desc->status |= IRQ_INPROGRESS;
|
||||
desc->status &= ~IRQ_PENDING;
|
||||
spin_unlock(&desc->lock);
|
||||
|
||||
action_ret = handle_IRQ_event(irq, action);
|
||||
if (!noirqdebug)
|
||||
note_interrupt(irq, desc, action_ret);
|
||||
|
||||
spin_lock(&desc->lock);
|
||||
desc->status &= ~IRQ_INPROGRESS;
|
||||
if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask)
|
||||
desc->chip->unmask(irq);
|
||||
out_unlock:
|
||||
spin_unlock(&desc->lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* handle_fasteoi_irq - irq handler for transparent controllers
|
||||
* @irq: the interrupt number
|
||||
* @desc: the interrupt description structure for this irq
|
||||
*
|
||||
* Only a single callback will be issued to the chip: an ->eoi()
|
||||
* call when the interrupt has been serviced. This enables support
|
||||
* for modern forms of interrupt handlers, which handle the flow
|
||||
* details in hardware, transparently.
|
||||
*/
|
||||
void fastcall
|
||||
handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
|
||||
{
|
||||
unsigned int cpu = smp_processor_id();
|
||||
struct irqaction *action;
|
||||
irqreturn_t action_ret;
|
||||
|
||||
spin_lock(&desc->lock);
|
||||
|
||||
if (unlikely(desc->status & IRQ_INPROGRESS))
|
||||
goto out;
|
||||
|
||||
desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
|
||||
kstat_cpu(cpu).irqs[irq]++;
|
||||
|
||||
/*
|
||||
* If its disabled or no action available
|
||||
* then mask it and get out of here:
|
||||
*/
|
||||
action = desc->action;
|
||||
if (unlikely(!action || (desc->status & IRQ_DISABLED))) {
|
||||
desc->status |= IRQ_PENDING;
|
||||
if (desc->chip->mask)
|
||||
desc->chip->mask(irq);
|
||||
goto out;
|
||||
}
|
||||
|
||||
desc->status |= IRQ_INPROGRESS;
|
||||
desc->status &= ~IRQ_PENDING;
|
||||
spin_unlock(&desc->lock);
|
||||
|
||||
action_ret = handle_IRQ_event(irq, action);
|
||||
if (!noirqdebug)
|
||||
note_interrupt(irq, desc, action_ret);
|
||||
|
||||
spin_lock(&desc->lock);
|
||||
desc->status &= ~IRQ_INPROGRESS;
|
||||
out:
|
||||
desc->chip->eoi(irq);
|
||||
|
||||
spin_unlock(&desc->lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* handle_edge_irq - edge type IRQ handler
|
||||
* @irq: the interrupt number
|
||||
* @desc: the interrupt description structure for this irq
|
||||
*
|
||||
* Interrupt occures on the falling and/or rising edge of a hardware
|
||||
* signal. The occurence is latched into the irq controller hardware
|
||||
* and must be acked in order to be reenabled. After the ack another
|
||||
* interrupt can happen on the same source even before the first one
|
||||
* is handled by the assosiacted event handler. If this happens it
|
||||
* might be necessary to disable (mask) the interrupt depending on the
|
||||
* controller hardware. This requires to reenable the interrupt inside
|
||||
* of the loop which handles the interrupts which have arrived while
|
||||
* the handler was running. If all pending interrupts are handled, the
|
||||
* loop is left.
|
||||
*/
|
||||
void fastcall
|
||||
handle_edge_irq(unsigned int irq, struct irq_desc *desc)
|
||||
{
|
||||
const unsigned int cpu = smp_processor_id();
|
||||
|
||||
spin_lock(&desc->lock);
|
||||
|
||||
desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
|
||||
|
||||
/*
|
||||
* If we're currently running this IRQ, or its disabled,
|
||||
* we shouldn't process the IRQ. Mark it pending, handle
|
||||
* the necessary masking and go out
|
||||
*/
|
||||
if (unlikely((desc->status & (IRQ_INPROGRESS | IRQ_DISABLED)) ||
|
||||
!desc->action)) {
|
||||
desc->status |= (IRQ_PENDING | IRQ_MASKED);
|
||||
mask_ack_irq(desc, irq);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
kstat_cpu(cpu).irqs[irq]++;
|
||||
|
||||
/* Start handling the irq */
|
||||
desc->chip->ack(irq);
|
||||
|
||||
/* Mark the IRQ currently in progress.*/
|
||||
desc->status |= IRQ_INPROGRESS;
|
||||
|
||||
do {
|
||||
struct irqaction *action = desc->action;
|
||||
irqreturn_t action_ret;
|
||||
|
||||
if (unlikely(!action)) {
|
||||
desc->chip->mask(irq);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* When another irq arrived while we were handling
|
||||
* one, we could have masked the irq.
|
||||
* Renable it, if it was not disabled in meantime.
|
||||
*/
|
||||
if (unlikely((desc->status &
|
||||
(IRQ_PENDING | IRQ_MASKED | IRQ_DISABLED)) ==
|
||||
(IRQ_PENDING | IRQ_MASKED))) {
|
||||
desc->chip->unmask(irq);
|
||||
desc->status &= ~IRQ_MASKED;
|
||||
}
|
||||
|
||||
desc->status &= ~IRQ_PENDING;
|
||||
spin_unlock(&desc->lock);
|
||||
action_ret = handle_IRQ_event(irq, action);
|
||||
if (!noirqdebug)
|
||||
note_interrupt(irq, desc, action_ret);
|
||||
spin_lock(&desc->lock);
|
||||
|
||||
} while ((desc->status & (IRQ_PENDING | IRQ_DISABLED)) == IRQ_PENDING);
|
||||
|
||||
desc->status &= ~IRQ_INPROGRESS;
|
||||
out_unlock:
|
||||
spin_unlock(&desc->lock);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/**
|
||||
* handle_percpu_IRQ - Per CPU local irq handler
|
||||
* @irq: the interrupt number
|
||||
* @desc: the interrupt description structure for this irq
|
||||
*
|
||||
* Per CPU interrupts on SMP machines without locking requirements
|
||||
*/
|
||||
void fastcall
|
||||
handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
|
||||
{
|
||||
irqreturn_t action_ret;
|
||||
|
||||
kstat_this_cpu.irqs[irq]++;
|
||||
|
||||
if (desc->chip->ack)
|
||||
desc->chip->ack(irq);
|
||||
|
||||
action_ret = handle_IRQ_event(irq, desc->action);
|
||||
if (!noirqdebug)
|
||||
note_interrupt(irq, desc, action_ret);
|
||||
|
||||
if (desc->chip->eoi)
|
||||
desc->chip->eoi(irq);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
void
|
||||
__set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
|
||||
const char *name)
|
||||
{
|
||||
struct irq_desc *desc;
|
||||
unsigned long flags;
|
||||
|
||||
if (irq >= NR_IRQS) {
|
||||
printk(KERN_ERR
|
||||
"Trying to install type control for IRQ%d\n", irq);
|
||||
return;
|
||||
}
|
||||
|
||||
desc = irq_desc + irq;
|
||||
|
||||
if (!handle)
|
||||
handle = handle_bad_irq;
|
||||
else if (desc->chip == &no_irq_chip) {
|
||||
printk(KERN_WARNING "Trying to install %sinterrupt handler "
|
||||
"for IRQ%d\n", is_chained ? "chained " : "", irq);
|
||||
/*
|
||||
* Some ARM implementations install a handler for really dumb
|
||||
* interrupt hardware without setting an irq_chip. This worked
|
||||
* with the ARM no_irq_chip but the check in setup_irq would
|
||||
* prevent us to setup the interrupt at all. Switch it to
|
||||
* dummy_irq_chip for easy transition.
|
||||
*/
|
||||
desc->chip = &dummy_irq_chip;
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&desc->lock, flags);
|
||||
|
||||
/* Uninstall? */
|
||||
if (handle == handle_bad_irq) {
|
||||
if (desc->chip != &no_irq_chip)
|
||||
mask_ack_irq(desc, irq);
|
||||
desc->status |= IRQ_DISABLED;
|
||||
desc->depth = 1;
|
||||
}
|
||||
desc->handle_irq = handle;
|
||||
desc->name = name;
|
||||
|
||||
if (handle != handle_bad_irq && is_chained) {
|
||||
desc->status &= ~IRQ_DISABLED;
|
||||
desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE;
|
||||
desc->depth = 0;
|
||||
desc->chip->unmask(irq);
|
||||
}
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
}
|
||||
|
||||
void
|
||||
set_irq_chip_and_handler(unsigned int irq, struct irq_chip *chip,
|
||||
irq_flow_handler_t handle)
|
||||
{
|
||||
set_irq_chip(irq, chip);
|
||||
__set_irq_handler(irq, handle, 0, NULL);
|
||||
}
|
||||
|
||||
void
|
||||
set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
|
||||
irq_flow_handler_t handle, const char *name)
|
||||
{
|
||||
set_irq_chip(irq, chip);
|
||||
__set_irq_handler(irq, handle, 0, name);
|
||||
}
|
||||
88
kernel/irq/devres.c
Normal file
88
kernel/irq/devres.c
Normal file
@@ -0,0 +1,88 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/interrupt.h>
|
||||
|
||||
/*
|
||||
* Device resource management aware IRQ request/free implementation.
|
||||
*/
|
||||
struct irq_devres {
|
||||
unsigned int irq;
|
||||
void *dev_id;
|
||||
};
|
||||
|
||||
static void devm_irq_release(struct device *dev, void *res)
|
||||
{
|
||||
struct irq_devres *this = res;
|
||||
|
||||
free_irq(this->irq, this->dev_id);
|
||||
}
|
||||
|
||||
static int devm_irq_match(struct device *dev, void *res, void *data)
|
||||
{
|
||||
struct irq_devres *this = res, *match = data;
|
||||
|
||||
return this->irq == match->irq && this->dev_id == match->dev_id;
|
||||
}
|
||||
|
||||
/**
|
||||
* devm_request_irq - allocate an interrupt line for a managed device
|
||||
* @dev: device to request interrupt for
|
||||
* @irq: Interrupt line to allocate
|
||||
* @handler: Function to be called when the IRQ occurs
|
||||
* @irqflags: Interrupt type flags
|
||||
* @devname: An ascii name for the claiming device
|
||||
* @dev_id: A cookie passed back to the handler function
|
||||
*
|
||||
* Except for the extra @dev argument, this function takes the
|
||||
* same arguments and performs the same function as
|
||||
* request_irq(). IRQs requested with this function will be
|
||||
* automatically freed on driver detach.
|
||||
*
|
||||
* If an IRQ allocated with this function needs to be freed
|
||||
* separately, dev_free_irq() must be used.
|
||||
*/
|
||||
int devm_request_irq(struct device *dev, unsigned int irq,
|
||||
irq_handler_t handler, unsigned long irqflags,
|
||||
const char *devname, void *dev_id)
|
||||
{
|
||||
struct irq_devres *dr;
|
||||
int rc;
|
||||
|
||||
dr = devres_alloc(devm_irq_release, sizeof(struct irq_devres),
|
||||
GFP_KERNEL);
|
||||
if (!dr)
|
||||
return -ENOMEM;
|
||||
|
||||
rc = request_irq(irq, handler, irqflags, devname, dev_id);
|
||||
if (rc) {
|
||||
devres_free(dr);
|
||||
return rc;
|
||||
}
|
||||
|
||||
dr->irq = irq;
|
||||
dr->dev_id = dev_id;
|
||||
devres_add(dev, dr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(devm_request_irq);
|
||||
|
||||
/**
|
||||
* devm_free_irq - free an interrupt
|
||||
* @dev: device to free interrupt for
|
||||
* @irq: Interrupt line to free
|
||||
* @dev_id: Device identity to free
|
||||
*
|
||||
* Except for the extra @dev argument, this function takes the
|
||||
* same arguments and performs the same function as free_irq().
|
||||
* This function instead of free_irq() should be used to manually
|
||||
* free IRQs allocated with dev_request_irq().
|
||||
*/
|
||||
void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id)
|
||||
{
|
||||
struct irq_devres match_data = { irq, dev_id };
|
||||
|
||||
free_irq(irq, dev_id);
|
||||
WARN_ON(devres_destroy(dev, devm_irq_release, devm_irq_match,
|
||||
&match_data));
|
||||
}
|
||||
EXPORT_SYMBOL(devm_free_irq);
|
||||
271
kernel/irq/handle.c
Normal file
271
kernel/irq/handle.c
Normal file
@@ -0,0 +1,271 @@
|
||||
/*
|
||||
* linux/kernel/irq/handle.c
|
||||
*
|
||||
* Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar
|
||||
* Copyright (C) 2005-2006, Thomas Gleixner, Russell King
|
||||
*
|
||||
* This file contains the core interrupt handling code.
|
||||
*
|
||||
* Detailed information is available in Documentation/DocBook/genericirq
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/irq.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/kernel_stat.h>
|
||||
|
||||
#include "internals.h"
|
||||
|
||||
/**
|
||||
* handle_bad_irq - handle spurious and unhandled irqs
|
||||
* @irq: the interrupt number
|
||||
* @desc: description of the interrupt
|
||||
* @regs: pointer to a register structure
|
||||
*
|
||||
* Handles spurious and unhandled IRQ's. It also prints a debugmessage.
|
||||
*/
|
||||
void fastcall
|
||||
handle_bad_irq(unsigned int irq, struct irq_desc *desc)
|
||||
{
|
||||
print_irq_desc(irq, desc);
|
||||
kstat_this_cpu.irqs[irq]++;
|
||||
ack_bad_irq(irq);
|
||||
}
|
||||
|
||||
/*
|
||||
* Linux has a controller-independent interrupt architecture.
|
||||
* Every controller has a 'controller-template', that is used
|
||||
* by the main code to do the right thing. Each driver-visible
|
||||
* interrupt source is transparently wired to the appropriate
|
||||
* controller. Thus drivers need not be aware of the
|
||||
* interrupt-controller.
|
||||
*
|
||||
* The code is designed to be easily extended with new/different
|
||||
* interrupt controllers, without having to do assembly magic or
|
||||
* having to touch the generic code.
|
||||
*
|
||||
* Controller mappings for all interrupt sources:
|
||||
*/
|
||||
struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned = {
|
||||
[0 ... NR_IRQS-1] = {
|
||||
.status = IRQ_DISABLED,
|
||||
.chip = &no_irq_chip,
|
||||
.handle_irq = handle_bad_irq,
|
||||
.depth = 1,
|
||||
.lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock),
|
||||
#ifdef CONFIG_SMP
|
||||
.affinity = CPU_MASK_ALL
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
* What should we do if we get a hw irq event on an illegal vector?
|
||||
* Each architecture has to answer this themself.
|
||||
*/
|
||||
static void ack_bad(unsigned int irq)
|
||||
{
|
||||
print_irq_desc(irq, irq_desc + irq);
|
||||
ack_bad_irq(irq);
|
||||
}
|
||||
|
||||
/*
|
||||
* NOP functions
|
||||
*/
|
||||
static void noop(unsigned int irq)
|
||||
{
|
||||
}
|
||||
|
||||
static unsigned int noop_ret(unsigned int irq)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Generic no controller implementation
|
||||
*/
|
||||
struct irq_chip no_irq_chip = {
|
||||
.name = "none",
|
||||
.startup = noop_ret,
|
||||
.shutdown = noop,
|
||||
.enable = noop,
|
||||
.disable = noop,
|
||||
.ack = ack_bad,
|
||||
.end = noop,
|
||||
};
|
||||
|
||||
/*
|
||||
* Generic dummy implementation which can be used for
|
||||
* real dumb interrupt sources
|
||||
*/
|
||||
struct irq_chip dummy_irq_chip = {
|
||||
.name = "dummy",
|
||||
.startup = noop_ret,
|
||||
.shutdown = noop,
|
||||
.enable = noop,
|
||||
.disable = noop,
|
||||
.ack = noop,
|
||||
.mask = noop,
|
||||
.unmask = noop,
|
||||
.end = noop,
|
||||
};
|
||||
|
||||
/*
|
||||
* Special, empty irq handler:
|
||||
*/
|
||||
irqreturn_t no_action(int cpl, void *dev_id)
|
||||
{
|
||||
return IRQ_NONE;
|
||||
}
|
||||
|
||||
/**
|
||||
* handle_IRQ_event - irq action chain handler
|
||||
* @irq: the interrupt number
|
||||
* @action: the interrupt action chain for this irq
|
||||
*
|
||||
* Handles the action chain of an irq event
|
||||
*/
|
||||
irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
|
||||
{
|
||||
irqreturn_t ret, retval = IRQ_NONE;
|
||||
unsigned int status = 0;
|
||||
|
||||
handle_dynamic_tick(action);
|
||||
|
||||
if (!(action->flags & IRQF_DISABLED))
|
||||
local_irq_enable_in_hardirq();
|
||||
|
||||
do {
|
||||
ret = action->handler(irq, action->dev_id);
|
||||
if (ret == IRQ_HANDLED)
|
||||
status |= action->flags;
|
||||
retval |= ret;
|
||||
action = action->next;
|
||||
} while (action);
|
||||
|
||||
if (status & IRQF_SAMPLE_RANDOM)
|
||||
add_interrupt_randomness(irq);
|
||||
local_irq_disable();
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
|
||||
/**
|
||||
* __do_IRQ - original all in one highlevel IRQ handler
|
||||
* @irq: the interrupt number
|
||||
*
|
||||
* __do_IRQ handles all normal device IRQ's (the special
|
||||
* SMP cross-CPU interrupts have their own specific
|
||||
* handlers).
|
||||
*
|
||||
* This is the original x86 implementation which is used for every
|
||||
* interrupt type.
|
||||
*/
|
||||
fastcall unsigned int __do_IRQ(unsigned int irq)
|
||||
{
|
||||
struct irq_desc *desc = irq_desc + irq;
|
||||
struct irqaction *action;
|
||||
unsigned int status;
|
||||
|
||||
kstat_this_cpu.irqs[irq]++;
|
||||
if (CHECK_IRQ_PER_CPU(desc->status)) {
|
||||
irqreturn_t action_ret;
|
||||
|
||||
/*
|
||||
* No locking required for CPU-local interrupts:
|
||||
*/
|
||||
if (desc->chip->ack)
|
||||
desc->chip->ack(irq);
|
||||
action_ret = handle_IRQ_event(irq, desc->action);
|
||||
desc->chip->end(irq);
|
||||
return 1;
|
||||
}
|
||||
|
||||
spin_lock(&desc->lock);
|
||||
if (desc->chip->ack)
|
||||
desc->chip->ack(irq);
|
||||
/*
|
||||
* REPLAY is when Linux resends an IRQ that was dropped earlier
|
||||
* WAITING is used by probe to mark irqs that are being tested
|
||||
*/
|
||||
status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
|
||||
status |= IRQ_PENDING; /* we _want_ to handle it */
|
||||
|
||||
/*
|
||||
* If the IRQ is disabled for whatever reason, we cannot
|
||||
* use the action we have.
|
||||
*/
|
||||
action = NULL;
|
||||
if (likely(!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))) {
|
||||
action = desc->action;
|
||||
status &= ~IRQ_PENDING; /* we commit to handling */
|
||||
status |= IRQ_INPROGRESS; /* we are handling it */
|
||||
}
|
||||
desc->status = status;
|
||||
|
||||
/*
|
||||
* If there is no IRQ handler or it was disabled, exit early.
|
||||
* Since we set PENDING, if another processor is handling
|
||||
* a different instance of this same irq, the other processor
|
||||
* will take care of it.
|
||||
*/
|
||||
if (unlikely(!action))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Edge triggered interrupts need to remember
|
||||
* pending events.
|
||||
* This applies to any hw interrupts that allow a second
|
||||
* instance of the same irq to arrive while we are in do_IRQ
|
||||
* or in the handler. But the code here only handles the _second_
|
||||
* instance of the irq, not the third or fourth. So it is mostly
|
||||
* useful for irq hardware that does not mask cleanly in an
|
||||
* SMP environment.
|
||||
*/
|
||||
for (;;) {
|
||||
irqreturn_t action_ret;
|
||||
|
||||
spin_unlock(&desc->lock);
|
||||
|
||||
action_ret = handle_IRQ_event(irq, action);
|
||||
if (!noirqdebug)
|
||||
note_interrupt(irq, desc, action_ret);
|
||||
|
||||
spin_lock(&desc->lock);
|
||||
if (likely(!(desc->status & IRQ_PENDING)))
|
||||
break;
|
||||
desc->status &= ~IRQ_PENDING;
|
||||
}
|
||||
desc->status &= ~IRQ_INPROGRESS;
|
||||
|
||||
out:
|
||||
/*
|
||||
* The ->end() handler has to deal with interrupts which got
|
||||
* disabled while the handler was running.
|
||||
*/
|
||||
desc->chip->end(irq);
|
||||
spin_unlock(&desc->lock);
|
||||
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_TRACE_IRQFLAGS
|
||||
|
||||
/*
|
||||
* lockdep: we want to handle all irq_desc locks as a single lock-class:
|
||||
*/
|
||||
static struct lock_class_key irq_desc_lock_class;
|
||||
|
||||
void early_init_irq_lock_class(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NR_IRQS; i++)
|
||||
lockdep_set_class(&irq_desc[i].lock, &irq_desc_lock_class);
|
||||
}
|
||||
|
||||
#endif
|
||||
64
kernel/irq/internals.h
Normal file
64
kernel/irq/internals.h
Normal file
@@ -0,0 +1,64 @@
|
||||
/*
|
||||
* IRQ subsystem internal functions and variables:
|
||||
*/
|
||||
|
||||
extern int noirqdebug;
|
||||
|
||||
/* Set default functions for irq_chip structures: */
|
||||
extern void irq_chip_set_defaults(struct irq_chip *chip);
|
||||
|
||||
/* Set default handler: */
|
||||
extern void compat_irq_chip_set_default_handler(struct irq_desc *desc);
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
extern void register_irq_proc(unsigned int irq);
|
||||
extern void register_handler_proc(unsigned int irq, struct irqaction *action);
|
||||
extern void unregister_handler_proc(unsigned int irq, struct irqaction *action);
|
||||
#else
|
||||
static inline void register_irq_proc(unsigned int irq) { }
|
||||
static inline void register_handler_proc(unsigned int irq,
|
||||
struct irqaction *action) { }
|
||||
static inline void unregister_handler_proc(unsigned int irq,
|
||||
struct irqaction *action) { }
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Debugging printout:
|
||||
*/
|
||||
|
||||
#include <linux/kallsyms.h>
|
||||
|
||||
#define P(f) if (desc->status & f) printk("%14s set\n", #f)
|
||||
|
||||
static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc)
|
||||
{
|
||||
printk("irq %d, desc: %p, depth: %d, count: %d, unhandled: %d\n",
|
||||
irq, desc, desc->depth, desc->irq_count, desc->irqs_unhandled);
|
||||
printk("->handle_irq(): %p, ", desc->handle_irq);
|
||||
print_symbol("%s\n", (unsigned long)desc->handle_irq);
|
||||
printk("->chip(): %p, ", desc->chip);
|
||||
print_symbol("%s\n", (unsigned long)desc->chip);
|
||||
printk("->action(): %p\n", desc->action);
|
||||
if (desc->action) {
|
||||
printk("->action->handler(): %p, ", desc->action->handler);
|
||||
print_symbol("%s\n", (unsigned long)desc->action->handler);
|
||||
}
|
||||
|
||||
P(IRQ_INPROGRESS);
|
||||
P(IRQ_DISABLED);
|
||||
P(IRQ_PENDING);
|
||||
P(IRQ_REPLAY);
|
||||
P(IRQ_AUTODETECT);
|
||||
P(IRQ_WAITING);
|
||||
P(IRQ_LEVEL);
|
||||
P(IRQ_MASKED);
|
||||
#ifdef CONFIG_IRQ_PER_CPU
|
||||
P(IRQ_PER_CPU);
|
||||
#endif
|
||||
P(IRQ_NOPROBE);
|
||||
P(IRQ_NOREQUEST);
|
||||
P(IRQ_NOAUTOEN);
|
||||
}
|
||||
|
||||
#undef P
|
||||
|
||||
563
kernel/irq/manage.c
Normal file
563
kernel/irq/manage.c
Normal file
@@ -0,0 +1,563 @@
|
||||
/*
|
||||
* linux/kernel/irq/manage.c
|
||||
*
|
||||
* Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar
|
||||
* Copyright (C) 2005-2006 Thomas Gleixner
|
||||
*
|
||||
* This file contains driver APIs to the irq subsystem.
|
||||
*/
|
||||
|
||||
#include <linux/irq.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/interrupt.h>
|
||||
|
||||
#include "internals.h"
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
/**
|
||||
* synchronize_irq - wait for pending IRQ handlers (on other CPUs)
|
||||
* @irq: interrupt number to wait for
|
||||
*
|
||||
* This function waits for any pending IRQ handlers for this interrupt
|
||||
* to complete before returning. If you use this function while
|
||||
* holding a resource the IRQ handler may need you will deadlock.
|
||||
*
|
||||
* This function may be called - with care - from IRQ context.
|
||||
*/
|
||||
void synchronize_irq(unsigned int irq)
|
||||
{
|
||||
struct irq_desc *desc = irq_desc + irq;
|
||||
|
||||
if (irq >= NR_IRQS)
|
||||
return;
|
||||
|
||||
while (desc->status & IRQ_INPROGRESS)
|
||||
cpu_relax();
|
||||
}
|
||||
EXPORT_SYMBOL(synchronize_irq);
|
||||
|
||||
/**
|
||||
* irq_can_set_affinity - Check if the affinity of a given irq can be set
|
||||
* @irq: Interrupt to check
|
||||
*
|
||||
*/
|
||||
int irq_can_set_affinity(unsigned int irq)
|
||||
{
|
||||
struct irq_desc *desc = irq_desc + irq;
|
||||
|
||||
if (CHECK_IRQ_PER_CPU(desc->status) || !desc->chip ||
|
||||
!desc->chip->set_affinity)
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* irq_set_affinity - Set the irq affinity of a given irq
|
||||
* @irq: Interrupt to set affinity
|
||||
* @cpumask: cpumask
|
||||
*
|
||||
*/
|
||||
int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
|
||||
{
|
||||
struct irq_desc *desc = irq_desc + irq;
|
||||
|
||||
if (!desc->chip->set_affinity)
|
||||
return -EINVAL;
|
||||
|
||||
set_balance_irq_affinity(irq, cpumask);
|
||||
|
||||
#ifdef CONFIG_GENERIC_PENDING_IRQ
|
||||
set_pending_irq(irq, cpumask);
|
||||
#else
|
||||
desc->affinity = cpumask;
|
||||
desc->chip->set_affinity(irq, cpumask);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* disable_irq_nosync - disable an irq without waiting
|
||||
* @irq: Interrupt to disable
|
||||
*
|
||||
* Disable the selected interrupt line. Disables and Enables are
|
||||
* nested.
|
||||
* Unlike disable_irq(), this function does not ensure existing
|
||||
* instances of the IRQ handler have completed before returning.
|
||||
*
|
||||
* This function may be called from IRQ context.
|
||||
*/
|
||||
void disable_irq_nosync(unsigned int irq)
|
||||
{
|
||||
struct irq_desc *desc = irq_desc + irq;
|
||||
unsigned long flags;
|
||||
|
||||
if (irq >= NR_IRQS)
|
||||
return;
|
||||
|
||||
spin_lock_irqsave(&desc->lock, flags);
|
||||
if (!desc->depth++) {
|
||||
desc->status |= IRQ_DISABLED;
|
||||
desc->chip->disable(irq);
|
||||
}
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(disable_irq_nosync);
|
||||
|
||||
/**
|
||||
* disable_irq - disable an irq and wait for completion
|
||||
* @irq: Interrupt to disable
|
||||
*
|
||||
* Disable the selected interrupt line. Enables and Disables are
|
||||
* nested.
|
||||
* This function waits for any pending IRQ handlers for this interrupt
|
||||
* to complete before returning. If you use this function while
|
||||
* holding a resource the IRQ handler may need you will deadlock.
|
||||
*
|
||||
* This function may be called - with care - from IRQ context.
|
||||
*/
|
||||
void disable_irq(unsigned int irq)
|
||||
{
|
||||
struct irq_desc *desc = irq_desc + irq;
|
||||
|
||||
if (irq >= NR_IRQS)
|
||||
return;
|
||||
|
||||
disable_irq_nosync(irq);
|
||||
if (desc->action)
|
||||
synchronize_irq(irq);
|
||||
}
|
||||
EXPORT_SYMBOL(disable_irq);
|
||||
|
||||
/**
|
||||
* enable_irq - enable handling of an irq
|
||||
* @irq: Interrupt to enable
|
||||
*
|
||||
* Undoes the effect of one call to disable_irq(). If this
|
||||
* matches the last disable, processing of interrupts on this
|
||||
* IRQ line is re-enabled.
|
||||
*
|
||||
* This function may be called from IRQ context.
|
||||
*/
|
||||
void enable_irq(unsigned int irq)
|
||||
{
|
||||
struct irq_desc *desc = irq_desc + irq;
|
||||
unsigned long flags;
|
||||
|
||||
if (irq >= NR_IRQS)
|
||||
return;
|
||||
|
||||
spin_lock_irqsave(&desc->lock, flags);
|
||||
switch (desc->depth) {
|
||||
case 0:
|
||||
printk(KERN_WARNING "Unbalanced enable for IRQ %d\n", irq);
|
||||
WARN_ON(1);
|
||||
break;
|
||||
case 1: {
|
||||
unsigned int status = desc->status & ~IRQ_DISABLED;
|
||||
|
||||
/* Prevent probing on this irq: */
|
||||
desc->status = status | IRQ_NOPROBE;
|
||||
check_irq_resend(desc, irq);
|
||||
/* fall-through */
|
||||
}
|
||||
default:
|
||||
desc->depth--;
|
||||
}
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(enable_irq);
|
||||
|
||||
/**
|
||||
* set_irq_wake - control irq power management wakeup
|
||||
* @irq: interrupt to control
|
||||
* @on: enable/disable power management wakeup
|
||||
*
|
||||
* Enable/disable power management wakeup mode, which is
|
||||
* disabled by default. Enables and disables must match,
|
||||
* just as they match for non-wakeup mode support.
|
||||
*
|
||||
* Wakeup mode lets this IRQ wake the system from sleep
|
||||
* states like "suspend to RAM".
|
||||
*/
|
||||
int set_irq_wake(unsigned int irq, unsigned int on)
|
||||
{
|
||||
struct irq_desc *desc = irq_desc + irq;
|
||||
unsigned long flags;
|
||||
int ret = -ENXIO;
|
||||
int (*set_wake)(unsigned, unsigned) = desc->chip->set_wake;
|
||||
|
||||
/* wakeup-capable irqs can be shared between drivers that
|
||||
* don't need to have the same sleep mode behaviors.
|
||||
*/
|
||||
spin_lock_irqsave(&desc->lock, flags);
|
||||
if (on) {
|
||||
if (desc->wake_depth++ == 0)
|
||||
desc->status |= IRQ_WAKEUP;
|
||||
else
|
||||
set_wake = NULL;
|
||||
} else {
|
||||
if (desc->wake_depth == 0) {
|
||||
printk(KERN_WARNING "Unbalanced IRQ %d "
|
||||
"wake disable\n", irq);
|
||||
WARN_ON(1);
|
||||
} else if (--desc->wake_depth == 0)
|
||||
desc->status &= ~IRQ_WAKEUP;
|
||||
else
|
||||
set_wake = NULL;
|
||||
}
|
||||
if (set_wake)
|
||||
ret = desc->chip->set_wake(irq, on);
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(set_irq_wake);
|
||||
|
||||
/*
|
||||
* Internal function that tells the architecture code whether a
|
||||
* particular irq has been exclusively allocated or is available
|
||||
* for driver use.
|
||||
*/
|
||||
int can_request_irq(unsigned int irq, unsigned long irqflags)
|
||||
{
|
||||
struct irqaction *action;
|
||||
|
||||
if (irq >= NR_IRQS || irq_desc[irq].status & IRQ_NOREQUEST)
|
||||
return 0;
|
||||
|
||||
action = irq_desc[irq].action;
|
||||
if (action)
|
||||
if (irqflags & action->flags & IRQF_SHARED)
|
||||
action = NULL;
|
||||
|
||||
return !action;
|
||||
}
|
||||
|
||||
void compat_irq_chip_set_default_handler(struct irq_desc *desc)
|
||||
{
|
||||
/*
|
||||
* If the architecture still has not overriden
|
||||
* the flow handler then zap the default. This
|
||||
* should catch incorrect flow-type setting.
|
||||
*/
|
||||
if (desc->handle_irq == &handle_bad_irq)
|
||||
desc->handle_irq = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Internal function to register an irqaction - typically used to
|
||||
* allocate special interrupts that are part of the architecture.
|
||||
*/
|
||||
int setup_irq(unsigned int irq, struct irqaction *new)
|
||||
{
|
||||
struct irq_desc *desc = irq_desc + irq;
|
||||
struct irqaction *old, **p;
|
||||
const char *old_name = NULL;
|
||||
unsigned long flags;
|
||||
int shared = 0;
|
||||
|
||||
if (irq >= NR_IRQS)
|
||||
return -EINVAL;
|
||||
|
||||
if (desc->chip == &no_irq_chip)
|
||||
return -ENOSYS;
|
||||
/*
|
||||
* Some drivers like serial.c use request_irq() heavily,
|
||||
* so we have to be careful not to interfere with a
|
||||
* running system.
|
||||
*/
|
||||
if (new->flags & IRQF_SAMPLE_RANDOM) {
|
||||
/*
|
||||
* This function might sleep, we want to call it first,
|
||||
* outside of the atomic block.
|
||||
* Yes, this might clear the entropy pool if the wrong
|
||||
* driver is attempted to be loaded, without actually
|
||||
* installing a new handler, but is this really a problem,
|
||||
* only the sysadmin is able to do this.
|
||||
*/
|
||||
rand_initialize_irq(irq);
|
||||
}
|
||||
|
||||
/*
|
||||
* The following block of code has to be executed atomically
|
||||
*/
|
||||
spin_lock_irqsave(&desc->lock, flags);
|
||||
p = &desc->action;
|
||||
old = *p;
|
||||
if (old) {
|
||||
/*
|
||||
* Can't share interrupts unless both agree to and are
|
||||
* the same type (level, edge, polarity). So both flag
|
||||
* fields must have IRQF_SHARED set and the bits which
|
||||
* set the trigger type must match.
|
||||
*/
|
||||
if (!((old->flags & new->flags) & IRQF_SHARED) ||
|
||||
((old->flags ^ new->flags) & IRQF_TRIGGER_MASK)) {
|
||||
old_name = old->name;
|
||||
goto mismatch;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_IRQ_PER_CPU)
|
||||
/* All handlers must agree on per-cpuness */
|
||||
if ((old->flags & IRQF_PERCPU) !=
|
||||
(new->flags & IRQF_PERCPU))
|
||||
goto mismatch;
|
||||
#endif
|
||||
|
||||
/* add new interrupt at end of irq queue */
|
||||
do {
|
||||
p = &old->next;
|
||||
old = *p;
|
||||
} while (old);
|
||||
shared = 1;
|
||||
}
|
||||
|
||||
*p = new;
|
||||
#if defined(CONFIG_IRQ_PER_CPU)
|
||||
if (new->flags & IRQF_PERCPU)
|
||||
desc->status |= IRQ_PER_CPU;
|
||||
#endif
|
||||
/* Exclude IRQ from balancing */
|
||||
if (new->flags & IRQF_NOBALANCING)
|
||||
desc->status |= IRQ_NO_BALANCING;
|
||||
|
||||
if (!shared) {
|
||||
irq_chip_set_defaults(desc->chip);
|
||||
|
||||
/* Setup the type (level, edge polarity) if configured: */
|
||||
if (new->flags & IRQF_TRIGGER_MASK) {
|
||||
if (desc->chip && desc->chip->set_type)
|
||||
desc->chip->set_type(irq,
|
||||
new->flags & IRQF_TRIGGER_MASK);
|
||||
else
|
||||
/*
|
||||
* IRQF_TRIGGER_* but the PIC does not support
|
||||
* multiple flow-types?
|
||||
*/
|
||||
printk(KERN_WARNING "No IRQF_TRIGGER set_type "
|
||||
"function for IRQ %d (%s)\n", irq,
|
||||
desc->chip ? desc->chip->name :
|
||||
"unknown");
|
||||
} else
|
||||
compat_irq_chip_set_default_handler(desc);
|
||||
|
||||
desc->status &= ~(IRQ_AUTODETECT | IRQ_WAITING |
|
||||
IRQ_INPROGRESS);
|
||||
|
||||
if (!(desc->status & IRQ_NOAUTOEN)) {
|
||||
desc->depth = 0;
|
||||
desc->status &= ~IRQ_DISABLED;
|
||||
if (desc->chip->startup)
|
||||
desc->chip->startup(irq);
|
||||
else
|
||||
desc->chip->enable(irq);
|
||||
} else
|
||||
/* Undo nested disables: */
|
||||
desc->depth = 1;
|
||||
}
|
||||
/* Reset broken irq detection when installing new handler */
|
||||
desc->irq_count = 0;
|
||||
desc->irqs_unhandled = 0;
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
|
||||
new->irq = irq;
|
||||
register_irq_proc(irq);
|
||||
new->dir = NULL;
|
||||
register_handler_proc(irq, new);
|
||||
|
||||
return 0;
|
||||
|
||||
mismatch:
|
||||
#ifdef CONFIG_DEBUG_SHIRQ
|
||||
if (!(new->flags & IRQF_PROBE_SHARED)) {
|
||||
printk(KERN_ERR "IRQ handler type mismatch for IRQ %d\n", irq);
|
||||
if (old_name)
|
||||
printk(KERN_ERR "current handler: %s\n", old_name);
|
||||
dump_stack();
|
||||
}
|
||||
#endif
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
/**
|
||||
* free_irq - free an interrupt
|
||||
* @irq: Interrupt line to free
|
||||
* @dev_id: Device identity to free
|
||||
*
|
||||
* Remove an interrupt handler. The handler is removed and if the
|
||||
* interrupt line is no longer in use by any driver it is disabled.
|
||||
* On a shared IRQ the caller must ensure the interrupt is disabled
|
||||
* on the card it drives before calling this function. The function
|
||||
* does not return until any executing interrupts for this IRQ
|
||||
* have completed.
|
||||
*
|
||||
* This function must not be called from interrupt context.
|
||||
*/
|
||||
void free_irq(unsigned int irq, void *dev_id)
|
||||
{
|
||||
struct irq_desc *desc;
|
||||
struct irqaction **p;
|
||||
unsigned long flags;
|
||||
irqreturn_t (*handler)(int, void *) = NULL;
|
||||
|
||||
WARN_ON(in_interrupt());
|
||||
if (irq >= NR_IRQS)
|
||||
return;
|
||||
|
||||
desc = irq_desc + irq;
|
||||
spin_lock_irqsave(&desc->lock, flags);
|
||||
p = &desc->action;
|
||||
for (;;) {
|
||||
struct irqaction *action = *p;
|
||||
|
||||
if (action) {
|
||||
struct irqaction **pp = p;
|
||||
|
||||
p = &action->next;
|
||||
if (action->dev_id != dev_id)
|
||||
continue;
|
||||
|
||||
/* Found it - now remove it from the list of entries */
|
||||
*pp = action->next;
|
||||
|
||||
/* Currently used only by UML, might disappear one day.*/
|
||||
#ifdef CONFIG_IRQ_RELEASE_METHOD
|
||||
if (desc->chip->release)
|
||||
desc->chip->release(irq, dev_id);
|
||||
#endif
|
||||
|
||||
if (!desc->action) {
|
||||
desc->status |= IRQ_DISABLED;
|
||||
if (desc->chip->shutdown)
|
||||
desc->chip->shutdown(irq);
|
||||
else
|
||||
desc->chip->disable(irq);
|
||||
}
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
unregister_handler_proc(irq, action);
|
||||
|
||||
/* Make sure it's not being used on another CPU */
|
||||
synchronize_irq(irq);
|
||||
if (action->flags & IRQF_SHARED)
|
||||
handler = action->handler;
|
||||
kfree(action);
|
||||
return;
|
||||
}
|
||||
printk(KERN_ERR "Trying to free already-free IRQ %d\n", irq);
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
return;
|
||||
}
|
||||
#ifdef CONFIG_DEBUG_SHIRQ
|
||||
if (handler) {
|
||||
/*
|
||||
* It's a shared IRQ -- the driver ought to be prepared for it
|
||||
* to happen even now it's being freed, so let's make sure....
|
||||
* We do this after actually deregistering it, to make sure that
|
||||
* a 'real' IRQ doesn't run in parallel with our fake
|
||||
*/
|
||||
handler(irq, dev_id);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL(free_irq);
|
||||
|
||||
/**
|
||||
* request_irq - allocate an interrupt line
|
||||
* @irq: Interrupt line to allocate
|
||||
* @handler: Function to be called when the IRQ occurs
|
||||
* @irqflags: Interrupt type flags
|
||||
* @devname: An ascii name for the claiming device
|
||||
* @dev_id: A cookie passed back to the handler function
|
||||
*
|
||||
* This call allocates interrupt resources and enables the
|
||||
* interrupt line and IRQ handling. From the point this
|
||||
* call is made your handler function may be invoked. Since
|
||||
* your handler function must clear any interrupt the board
|
||||
* raises, you must take care both to initialise your hardware
|
||||
* and to set up the interrupt handler in the right order.
|
||||
*
|
||||
* Dev_id must be globally unique. Normally the address of the
|
||||
* device data structure is used as the cookie. Since the handler
|
||||
* receives this value it makes sense to use it.
|
||||
*
|
||||
* If your interrupt is shared you must pass a non NULL dev_id
|
||||
* as this is required when freeing the interrupt.
|
||||
*
|
||||
* Flags:
|
||||
*
|
||||
* IRQF_SHARED Interrupt is shared
|
||||
* IRQF_DISABLED Disable local interrupts while processing
|
||||
* IRQF_SAMPLE_RANDOM The interrupt can be used for entropy
|
||||
*
|
||||
*/
|
||||
int request_irq(unsigned int irq, irq_handler_t handler,
|
||||
unsigned long irqflags, const char *devname, void *dev_id)
|
||||
{
|
||||
struct irqaction *action;
|
||||
int retval;
|
||||
|
||||
#ifdef CONFIG_LOCKDEP
|
||||
/*
|
||||
* Lockdep wants atomic interrupt handlers:
|
||||
*/
|
||||
irqflags |= IRQF_DISABLED;
|
||||
#endif
|
||||
/*
|
||||
* Sanity-check: shared interrupts must pass in a real dev-ID,
|
||||
* otherwise we'll have trouble later trying to figure out
|
||||
* which interrupt is which (messes up the interrupt freeing
|
||||
* logic etc).
|
||||
*/
|
||||
if ((irqflags & IRQF_SHARED) && !dev_id)
|
||||
return -EINVAL;
|
||||
if (irq >= NR_IRQS)
|
||||
return -EINVAL;
|
||||
if (irq_desc[irq].status & IRQ_NOREQUEST)
|
||||
return -EINVAL;
|
||||
if (!handler)
|
||||
return -EINVAL;
|
||||
|
||||
action = kmalloc(sizeof(struct irqaction), GFP_ATOMIC);
|
||||
if (!action)
|
||||
return -ENOMEM;
|
||||
|
||||
action->handler = handler;
|
||||
action->flags = irqflags;
|
||||
cpus_clear(action->mask);
|
||||
action->name = devname;
|
||||
action->next = NULL;
|
||||
action->dev_id = dev_id;
|
||||
|
||||
select_smp_affinity(irq);
|
||||
|
||||
#ifdef CONFIG_DEBUG_SHIRQ
|
||||
if (irqflags & IRQF_SHARED) {
|
||||
/*
|
||||
* It's a shared IRQ -- the driver ought to be prepared for it
|
||||
* to happen immediately, so let's make sure....
|
||||
* We do this before actually registering it, to make sure that
|
||||
* a 'real' IRQ doesn't run in parallel with our fake
|
||||
*/
|
||||
if (irqflags & IRQF_DISABLED) {
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
handler(irq, dev_id);
|
||||
local_irq_restore(flags);
|
||||
} else
|
||||
handler(irq, dev_id);
|
||||
}
|
||||
#endif
|
||||
|
||||
retval = setup_irq(irq, action);
|
||||
if (retval)
|
||||
kfree(action);
|
||||
|
||||
return retval;
|
||||
}
|
||||
EXPORT_SYMBOL(request_irq);
|
||||
75
kernel/irq/migration.c
Normal file
75
kernel/irq/migration.c
Normal file
@@ -0,0 +1,75 @@
|
||||
|
||||
#include <linux/irq.h>
|
||||
|
||||
void set_pending_irq(unsigned int irq, cpumask_t mask)
|
||||
{
|
||||
struct irq_desc *desc = irq_desc + irq;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&desc->lock, flags);
|
||||
desc->status |= IRQ_MOVE_PENDING;
|
||||
irq_desc[irq].pending_mask = mask;
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
}
|
||||
|
||||
void move_masked_irq(int irq)
|
||||
{
|
||||
struct irq_desc *desc = irq_desc + irq;
|
||||
cpumask_t tmp;
|
||||
|
||||
if (likely(!(desc->status & IRQ_MOVE_PENDING)))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Paranoia: cpu-local interrupts shouldn't be calling in here anyway.
|
||||
*/
|
||||
if (CHECK_IRQ_PER_CPU(desc->status)) {
|
||||
WARN_ON(1);
|
||||
return;
|
||||
}
|
||||
|
||||
desc->status &= ~IRQ_MOVE_PENDING;
|
||||
|
||||
if (unlikely(cpus_empty(irq_desc[irq].pending_mask)))
|
||||
return;
|
||||
|
||||
if (!desc->chip->set_affinity)
|
||||
return;
|
||||
|
||||
assert_spin_locked(&desc->lock);
|
||||
|
||||
cpus_and(tmp, irq_desc[irq].pending_mask, cpu_online_map);
|
||||
|
||||
/*
|
||||
* If there was a valid mask to work with, please
|
||||
* do the disable, re-program, enable sequence.
|
||||
* This is *not* particularly important for level triggered
|
||||
* but in a edge trigger case, we might be setting rte
|
||||
* when an active trigger is comming in. This could
|
||||
* cause some ioapics to mal-function.
|
||||
* Being paranoid i guess!
|
||||
*
|
||||
* For correct operation this depends on the caller
|
||||
* masking the irqs.
|
||||
*/
|
||||
if (likely(!cpus_empty(tmp))) {
|
||||
desc->chip->set_affinity(irq,tmp);
|
||||
}
|
||||
cpus_clear(irq_desc[irq].pending_mask);
|
||||
}
|
||||
|
||||
void move_native_irq(int irq)
|
||||
{
|
||||
struct irq_desc *desc = irq_desc + irq;
|
||||
|
||||
if (likely(!(desc->status & IRQ_MOVE_PENDING)))
|
||||
return;
|
||||
|
||||
if (unlikely(desc->status & IRQ_DISABLED))
|
||||
return;
|
||||
|
||||
desc->chip->mask(irq);
|
||||
move_masked_irq(irq);
|
||||
desc->chip->unmask(irq);
|
||||
}
|
||||
|
||||
150
kernel/irq/proc.c
Normal file
150
kernel/irq/proc.c
Normal file
@@ -0,0 +1,150 @@
|
||||
/*
|
||||
* linux/kernel/irq/proc.c
|
||||
*
|
||||
* Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar
|
||||
*
|
||||
* This file contains the /proc/irq/ handling code.
|
||||
*/
|
||||
|
||||
#include <linux/irq.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/interrupt.h>
|
||||
|
||||
#include "internals.h"
|
||||
|
||||
static struct proc_dir_entry *root_irq_dir;
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
static int irq_affinity_read_proc(char *page, char **start, off_t off,
|
||||
int count, int *eof, void *data)
|
||||
{
|
||||
int len = cpumask_scnprintf(page, count, irq_desc[(long)data].affinity);
|
||||
|
||||
if (count - len < 2)
|
||||
return -EINVAL;
|
||||
len += sprintf(page + len, "\n");
|
||||
return len;
|
||||
}
|
||||
|
||||
int no_irq_affinity;
|
||||
static int irq_affinity_write_proc(struct file *file, const char __user *buffer,
|
||||
unsigned long count, void *data)
|
||||
{
|
||||
unsigned int irq = (int)(long)data, full_count = count, err;
|
||||
cpumask_t new_value, tmp;
|
||||
|
||||
if (!irq_desc[irq].chip->set_affinity || no_irq_affinity ||
|
||||
irq_balancing_disabled(irq))
|
||||
return -EIO;
|
||||
|
||||
err = cpumask_parse_user(buffer, count, new_value);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/*
|
||||
* Do not allow disabling IRQs completely - it's a too easy
|
||||
* way to make the system unusable accidentally :-) At least
|
||||
* one online CPU still has to be targeted.
|
||||
*/
|
||||
cpus_and(tmp, new_value, cpu_online_map);
|
||||
if (cpus_empty(tmp))
|
||||
/* Special case for empty set - allow the architecture
|
||||
code to set default SMP affinity. */
|
||||
return select_smp_affinity(irq) ? -EINVAL : full_count;
|
||||
|
||||
irq_set_affinity(irq, new_value);
|
||||
|
||||
return full_count;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#define MAX_NAMELEN 128
|
||||
|
||||
static int name_unique(unsigned int irq, struct irqaction *new_action)
|
||||
{
|
||||
struct irq_desc *desc = irq_desc + irq;
|
||||
struct irqaction *action;
|
||||
|
||||
for (action = desc->action ; action; action = action->next)
|
||||
if ((action != new_action) && action->name &&
|
||||
!strcmp(new_action->name, action->name))
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
void register_handler_proc(unsigned int irq, struct irqaction *action)
|
||||
{
|
||||
char name [MAX_NAMELEN];
|
||||
|
||||
if (!irq_desc[irq].dir || action->dir || !action->name ||
|
||||
!name_unique(irq, action))
|
||||
return;
|
||||
|
||||
memset(name, 0, MAX_NAMELEN);
|
||||
snprintf(name, MAX_NAMELEN, "%s", action->name);
|
||||
|
||||
/* create /proc/irq/1234/handler/ */
|
||||
action->dir = proc_mkdir(name, irq_desc[irq].dir);
|
||||
}
|
||||
|
||||
#undef MAX_NAMELEN
|
||||
|
||||
#define MAX_NAMELEN 10
|
||||
|
||||
void register_irq_proc(unsigned int irq)
|
||||
{
|
||||
char name [MAX_NAMELEN];
|
||||
|
||||
if (!root_irq_dir ||
|
||||
(irq_desc[irq].chip == &no_irq_chip) ||
|
||||
irq_desc[irq].dir)
|
||||
return;
|
||||
|
||||
memset(name, 0, MAX_NAMELEN);
|
||||
sprintf(name, "%d", irq);
|
||||
|
||||
/* create /proc/irq/1234 */
|
||||
irq_desc[irq].dir = proc_mkdir(name, root_irq_dir);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
{
|
||||
struct proc_dir_entry *entry;
|
||||
|
||||
/* create /proc/irq/<irq>/smp_affinity */
|
||||
entry = create_proc_entry("smp_affinity", 0600, irq_desc[irq].dir);
|
||||
|
||||
if (entry) {
|
||||
entry->data = (void *)(long)irq;
|
||||
entry->read_proc = irq_affinity_read_proc;
|
||||
entry->write_proc = irq_affinity_write_proc;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#undef MAX_NAMELEN
|
||||
|
||||
void unregister_handler_proc(unsigned int irq, struct irqaction *action)
|
||||
{
|
||||
if (action->dir)
|
||||
remove_proc_entry(action->dir->name, irq_desc[irq].dir);
|
||||
}
|
||||
|
||||
void init_irq_proc(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* create /proc/irq */
|
||||
root_irq_dir = proc_mkdir("irq", NULL);
|
||||
if (!root_irq_dir)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Create entries for all existing IRQs.
|
||||
*/
|
||||
for (i = 0; i < NR_IRQS; i++)
|
||||
register_irq_proc(i);
|
||||
}
|
||||
|
||||
77
kernel/irq/resend.c
Normal file
77
kernel/irq/resend.c
Normal file
@@ -0,0 +1,77 @@
|
||||
/*
|
||||
* linux/kernel/irq/resend.c
|
||||
*
|
||||
* Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar
|
||||
* Copyright (C) 2005-2006, Thomas Gleixner
|
||||
*
|
||||
* This file contains the IRQ-resend code
|
||||
*
|
||||
* If the interrupt is waiting to be processed, we try to re-run it.
|
||||
* We can't directly run it from here since the caller might be in an
|
||||
* interrupt-protected region. Not all irq controller chips can
|
||||
* retrigger interrupts at the hardware level, so in those cases
|
||||
* we allow the resending of IRQs via a tasklet.
|
||||
*/
|
||||
|
||||
#include <linux/irq.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/interrupt.h>
|
||||
|
||||
#include "internals.h"
|
||||
|
||||
#ifdef CONFIG_HARDIRQS_SW_RESEND
|
||||
|
||||
/* Bitmap to handle software resend of interrupts: */
|
||||
static DECLARE_BITMAP(irqs_resend, NR_IRQS);
|
||||
|
||||
/*
|
||||
* Run software resends of IRQ's
|
||||
*/
|
||||
static void resend_irqs(unsigned long arg)
|
||||
{
|
||||
struct irq_desc *desc;
|
||||
int irq;
|
||||
|
||||
while (!bitmap_empty(irqs_resend, NR_IRQS)) {
|
||||
irq = find_first_bit(irqs_resend, NR_IRQS);
|
||||
clear_bit(irq, irqs_resend);
|
||||
desc = irq_desc + irq;
|
||||
local_irq_disable();
|
||||
desc->handle_irq(irq, desc);
|
||||
local_irq_enable();
|
||||
}
|
||||
}
|
||||
|
||||
/* Tasklet to handle resend: */
|
||||
static DECLARE_TASKLET(resend_tasklet, resend_irqs, 0);
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* IRQ resend
|
||||
*
|
||||
* Is called with interrupts disabled and desc->lock held.
|
||||
*/
|
||||
void check_irq_resend(struct irq_desc *desc, unsigned int irq)
|
||||
{
|
||||
unsigned int status = desc->status;
|
||||
|
||||
/*
|
||||
* Make sure the interrupt is enabled, before resending it:
|
||||
*/
|
||||
desc->chip->enable(irq);
|
||||
|
||||
if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
|
||||
desc->status = (status & ~IRQ_PENDING) | IRQ_REPLAY;
|
||||
|
||||
if (!desc->chip || !desc->chip->retrigger ||
|
||||
!desc->chip->retrigger(irq)) {
|
||||
#ifdef CONFIG_HARDIRQS_SW_RESEND
|
||||
/* Set it pending and activate the softirq: */
|
||||
set_bit(irq, irqs_resend);
|
||||
tasklet_schedule(&resend_tasklet);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
210
kernel/irq/spurious.c
Normal file
210
kernel/irq/spurious.c
Normal file
@@ -0,0 +1,210 @@
|
||||
/*
|
||||
* linux/kernel/irq/spurious.c
|
||||
*
|
||||
* Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar
|
||||
*
|
||||
* This file contains spurious interrupt handling.
|
||||
*/
|
||||
|
||||
#include <linux/irq.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/interrupt.h>
|
||||
|
||||
static int irqfixup __read_mostly;
|
||||
|
||||
/*
|
||||
* Recovery handler for misrouted interrupts.
|
||||
*/
|
||||
static int misrouted_irq(int irq)
|
||||
{
|
||||
int i;
|
||||
int ok = 0;
|
||||
int work = 0; /* Did we do work for a real IRQ */
|
||||
|
||||
for (i = 1; i < NR_IRQS; i++) {
|
||||
struct irq_desc *desc = irq_desc + i;
|
||||
struct irqaction *action;
|
||||
|
||||
if (i == irq) /* Already tried */
|
||||
continue;
|
||||
|
||||
spin_lock(&desc->lock);
|
||||
/* Already running on another processor */
|
||||
if (desc->status & IRQ_INPROGRESS) {
|
||||
/*
|
||||
* Already running: If it is shared get the other
|
||||
* CPU to go looking for our mystery interrupt too
|
||||
*/
|
||||
if (desc->action && (desc->action->flags & IRQF_SHARED))
|
||||
desc->status |= IRQ_PENDING;
|
||||
spin_unlock(&desc->lock);
|
||||
continue;
|
||||
}
|
||||
/* Honour the normal IRQ locking */
|
||||
desc->status |= IRQ_INPROGRESS;
|
||||
action = desc->action;
|
||||
spin_unlock(&desc->lock);
|
||||
|
||||
while (action) {
|
||||
/* Only shared IRQ handlers are safe to call */
|
||||
if (action->flags & IRQF_SHARED) {
|
||||
if (action->handler(i, action->dev_id) ==
|
||||
IRQ_HANDLED)
|
||||
ok = 1;
|
||||
}
|
||||
action = action->next;
|
||||
}
|
||||
local_irq_disable();
|
||||
/* Now clean up the flags */
|
||||
spin_lock(&desc->lock);
|
||||
action = desc->action;
|
||||
|
||||
/*
|
||||
* While we were looking for a fixup someone queued a real
|
||||
* IRQ clashing with our walk:
|
||||
*/
|
||||
while ((desc->status & IRQ_PENDING) && action) {
|
||||
/*
|
||||
* Perform real IRQ processing for the IRQ we deferred
|
||||
*/
|
||||
work = 1;
|
||||
spin_unlock(&desc->lock);
|
||||
handle_IRQ_event(i, action);
|
||||
spin_lock(&desc->lock);
|
||||
desc->status &= ~IRQ_PENDING;
|
||||
}
|
||||
desc->status &= ~IRQ_INPROGRESS;
|
||||
/*
|
||||
* If we did actual work for the real IRQ line we must let the
|
||||
* IRQ controller clean up too
|
||||
*/
|
||||
if (work && desc->chip && desc->chip->end)
|
||||
desc->chip->end(i);
|
||||
spin_unlock(&desc->lock);
|
||||
}
|
||||
/* So the caller can adjust the irq error counts */
|
||||
return ok;
|
||||
}
|
||||
|
||||
/*
|
||||
* If 99,900 of the previous 100,000 interrupts have not been handled
|
||||
* then assume that the IRQ is stuck in some manner. Drop a diagnostic
|
||||
* and try to turn the IRQ off.
|
||||
*
|
||||
* (The other 100-of-100,000 interrupts may have been a correctly
|
||||
* functioning device sharing an IRQ with the failing one)
|
||||
*
|
||||
* Called under desc->lock
|
||||
*/
|
||||
|
||||
static void
|
||||
__report_bad_irq(unsigned int irq, struct irq_desc *desc,
|
||||
irqreturn_t action_ret)
|
||||
{
|
||||
struct irqaction *action;
|
||||
|
||||
if (action_ret != IRQ_HANDLED && action_ret != IRQ_NONE) {
|
||||
printk(KERN_ERR "irq event %d: bogus return value %x\n",
|
||||
irq, action_ret);
|
||||
} else {
|
||||
printk(KERN_ERR "irq %d: nobody cared (try booting with "
|
||||
"the \"irqpoll\" option)\n", irq);
|
||||
}
|
||||
dump_stack();
|
||||
printk(KERN_ERR "handlers:\n");
|
||||
|
||||
action = desc->action;
|
||||
while (action) {
|
||||
printk(KERN_ERR "[<%p>]", action->handler);
|
||||
print_symbol(" (%s)",
|
||||
(unsigned long)action->handler);
|
||||
printk("\n");
|
||||
action = action->next;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
report_bad_irq(unsigned int irq, struct irq_desc *desc, irqreturn_t action_ret)
|
||||
{
|
||||
static int count = 100;
|
||||
|
||||
if (count > 0) {
|
||||
count--;
|
||||
__report_bad_irq(irq, desc, action_ret);
|
||||
}
|
||||
}
|
||||
|
||||
void note_interrupt(unsigned int irq, struct irq_desc *desc,
|
||||
irqreturn_t action_ret)
|
||||
{
|
||||
if (unlikely(action_ret != IRQ_HANDLED)) {
|
||||
desc->irqs_unhandled++;
|
||||
if (unlikely(action_ret != IRQ_NONE))
|
||||
report_bad_irq(irq, desc, action_ret);
|
||||
}
|
||||
|
||||
if (unlikely(irqfixup)) {
|
||||
/* Don't punish working computers */
|
||||
if ((irqfixup == 2 && irq == 0) || action_ret == IRQ_NONE) {
|
||||
int ok = misrouted_irq(irq);
|
||||
if (action_ret == IRQ_NONE)
|
||||
desc->irqs_unhandled -= ok;
|
||||
}
|
||||
}
|
||||
|
||||
desc->irq_count++;
|
||||
if (likely(desc->irq_count < 100000))
|
||||
return;
|
||||
|
||||
desc->irq_count = 0;
|
||||
if (unlikely(desc->irqs_unhandled > 99900)) {
|
||||
/*
|
||||
* The interrupt is stuck
|
||||
*/
|
||||
__report_bad_irq(irq, desc, action_ret);
|
||||
/*
|
||||
* Now kill the IRQ
|
||||
*/
|
||||
printk(KERN_EMERG "Disabling IRQ #%d\n", irq);
|
||||
desc->status |= IRQ_DISABLED;
|
||||
desc->depth = 1;
|
||||
desc->chip->disable(irq);
|
||||
}
|
||||
desc->irqs_unhandled = 0;
|
||||
}
|
||||
|
||||
int noirqdebug __read_mostly;
|
||||
|
||||
int noirqdebug_setup(char *str)
|
||||
{
|
||||
noirqdebug = 1;
|
||||
printk(KERN_INFO "IRQ lockup detection disabled\n");
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("noirqdebug", noirqdebug_setup);
|
||||
|
||||
static int __init irqfixup_setup(char *str)
|
||||
{
|
||||
irqfixup = 1;
|
||||
printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
|
||||
printk(KERN_WARNING "This may impact system performance.\n");
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("irqfixup", irqfixup_setup);
|
||||
|
||||
static int __init irqpoll_setup(char *str)
|
||||
{
|
||||
irqfixup = 2;
|
||||
printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
|
||||
"enabled\n");
|
||||
printk(KERN_WARNING "This may significantly impact system "
|
||||
"performance\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("irqpoll", irqpoll_setup);
|
||||
350
kernel/itimer.c
Normal file
350
kernel/itimer.c
Normal file
@@ -0,0 +1,350 @@
|
||||
/*
|
||||
* linux/kernel/itimer.c
|
||||
*
|
||||
* Copyright (C) 1992 Darren Senn
|
||||
*/
|
||||
|
||||
/* These are all the functions necessary to implement itimers */
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/smp_lock.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/time.h>
|
||||
#include <linux/posix-timers.h>
|
||||
#include <linux/hrtimer.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
/**
|
||||
* itimer_get_remtime - get remaining time for the timer
|
||||
*
|
||||
* @timer: the timer to read
|
||||
*
|
||||
* Returns the delta between the expiry time and now, which can be
|
||||
* less than zero or 1usec for an pending expired timer
|
||||
*/
|
||||
static struct timeval itimer_get_remtime(struct hrtimer *timer)
|
||||
{
|
||||
ktime_t rem = hrtimer_get_remaining(timer);
|
||||
|
||||
/*
|
||||
* Racy but safe: if the itimer expires after the above
|
||||
* hrtimer_get_remtime() call but before this condition
|
||||
* then we return 0 - which is correct.
|
||||
*/
|
||||
if (hrtimer_active(timer)) {
|
||||
if (rem.tv64 <= 0)
|
||||
rem.tv64 = NSEC_PER_USEC;
|
||||
} else
|
||||
rem.tv64 = 0;
|
||||
|
||||
return ktime_to_timeval(rem);
|
||||
}
|
||||
|
||||
int do_getitimer(int which, struct itimerval *value)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
cputime_t cinterval, cval;
|
||||
|
||||
switch (which) {
|
||||
case ITIMER_REAL:
|
||||
spin_lock_irq(&tsk->sighand->siglock);
|
||||
value->it_value = itimer_get_remtime(&tsk->signal->real_timer);
|
||||
value->it_interval =
|
||||
ktime_to_timeval(tsk->signal->it_real_incr);
|
||||
spin_unlock_irq(&tsk->sighand->siglock);
|
||||
break;
|
||||
case ITIMER_VIRTUAL:
|
||||
read_lock(&tasklist_lock);
|
||||
spin_lock_irq(&tsk->sighand->siglock);
|
||||
cval = tsk->signal->it_virt_expires;
|
||||
cinterval = tsk->signal->it_virt_incr;
|
||||
if (!cputime_eq(cval, cputime_zero)) {
|
||||
struct task_struct *t = tsk;
|
||||
cputime_t utime = tsk->signal->utime;
|
||||
do {
|
||||
utime = cputime_add(utime, t->utime);
|
||||
t = next_thread(t);
|
||||
} while (t != tsk);
|
||||
if (cputime_le(cval, utime)) { /* about to fire */
|
||||
cval = jiffies_to_cputime(1);
|
||||
} else {
|
||||
cval = cputime_sub(cval, utime);
|
||||
}
|
||||
}
|
||||
spin_unlock_irq(&tsk->sighand->siglock);
|
||||
read_unlock(&tasklist_lock);
|
||||
cputime_to_timeval(cval, &value->it_value);
|
||||
cputime_to_timeval(cinterval, &value->it_interval);
|
||||
break;
|
||||
case ITIMER_PROF:
|
||||
read_lock(&tasklist_lock);
|
||||
spin_lock_irq(&tsk->sighand->siglock);
|
||||
cval = tsk->signal->it_prof_expires;
|
||||
cinterval = tsk->signal->it_prof_incr;
|
||||
if (!cputime_eq(cval, cputime_zero)) {
|
||||
struct task_struct *t = tsk;
|
||||
cputime_t ptime = cputime_add(tsk->signal->utime,
|
||||
tsk->signal->stime);
|
||||
do {
|
||||
ptime = cputime_add(ptime,
|
||||
cputime_add(t->utime,
|
||||
t->stime));
|
||||
t = next_thread(t);
|
||||
} while (t != tsk);
|
||||
if (cputime_le(cval, ptime)) { /* about to fire */
|
||||
cval = jiffies_to_cputime(1);
|
||||
} else {
|
||||
cval = cputime_sub(cval, ptime);
|
||||
}
|
||||
}
|
||||
spin_unlock_irq(&tsk->sighand->siglock);
|
||||
read_unlock(&tasklist_lock);
|
||||
cputime_to_timeval(cval, &value->it_value);
|
||||
cputime_to_timeval(cinterval, &value->it_interval);
|
||||
break;
|
||||
default:
|
||||
return(-EINVAL);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
asmlinkage long sys_getitimer(int which, struct itimerval __user *value)
|
||||
{
|
||||
int error = -EFAULT;
|
||||
struct itimerval get_buffer;
|
||||
|
||||
if (value) {
|
||||
error = do_getitimer(which, &get_buffer);
|
||||
if (!error &&
|
||||
copy_to_user(value, &get_buffer, sizeof(get_buffer)))
|
||||
error = -EFAULT;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* The timer is automagically restarted, when interval != 0
|
||||
*/
|
||||
enum hrtimer_restart it_real_fn(struct hrtimer *timer)
|
||||
{
|
||||
struct signal_struct *sig =
|
||||
container_of(timer, struct signal_struct, real_timer);
|
||||
|
||||
send_group_sig_info(SIGALRM, SEND_SIG_PRIV, sig->tsk);
|
||||
|
||||
return HRTIMER_NORESTART;
|
||||
}
|
||||
|
||||
/*
|
||||
* We do not care about correctness. We just sanitize the values so
|
||||
* the ktime_t operations which expect normalized values do not
|
||||
* break. This converts negative values to long timeouts similar to
|
||||
* the code in kernel versions < 2.6.16
|
||||
*
|
||||
* Print a limited number of warning messages when an invalid timeval
|
||||
* is detected.
|
||||
*/
|
||||
static void fixup_timeval(struct timeval *tv, int interval)
|
||||
{
|
||||
static int warnlimit = 10;
|
||||
unsigned long tmp;
|
||||
|
||||
if (warnlimit > 0) {
|
||||
warnlimit--;
|
||||
printk(KERN_WARNING
|
||||
"setitimer: %s (pid = %d) provided "
|
||||
"invalid timeval %s: tv_sec = %ld tv_usec = %ld\n",
|
||||
current->comm, current->pid,
|
||||
interval ? "it_interval" : "it_value",
|
||||
tv->tv_sec, (long) tv->tv_usec);
|
||||
}
|
||||
|
||||
tmp = tv->tv_usec;
|
||||
if (tmp >= USEC_PER_SEC) {
|
||||
tv->tv_usec = tmp % USEC_PER_SEC;
|
||||
tv->tv_sec += tmp / USEC_PER_SEC;
|
||||
}
|
||||
|
||||
tmp = tv->tv_sec;
|
||||
if (tmp > LONG_MAX)
|
||||
tv->tv_sec = LONG_MAX;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if the timeval is in canonical form
|
||||
*/
|
||||
#define timeval_valid(t) \
|
||||
(((t)->tv_sec >= 0) && (((unsigned long) (t)->tv_usec) < USEC_PER_SEC))
|
||||
|
||||
/*
|
||||
* Check for invalid timevals, sanitize them and print a limited
|
||||
* number of warnings.
|
||||
*/
|
||||
static void check_itimerval(struct itimerval *value) {
|
||||
|
||||
if (unlikely(!timeval_valid(&value->it_value)))
|
||||
fixup_timeval(&value->it_value, 0);
|
||||
|
||||
if (unlikely(!timeval_valid(&value->it_interval)))
|
||||
fixup_timeval(&value->it_interval, 1);
|
||||
}
|
||||
|
||||
int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
struct hrtimer *timer;
|
||||
ktime_t expires;
|
||||
cputime_t cval, cinterval, nval, ninterval;
|
||||
|
||||
/*
|
||||
* Validate the timevals in value.
|
||||
*
|
||||
* Note: Although the spec requires that invalid values shall
|
||||
* return -EINVAL, we just fixup the value and print a limited
|
||||
* number of warnings in order not to break users of this
|
||||
* historical misfeature.
|
||||
*
|
||||
* Scheduled for replacement in March 2007
|
||||
*/
|
||||
check_itimerval(value);
|
||||
|
||||
switch (which) {
|
||||
case ITIMER_REAL:
|
||||
again:
|
||||
spin_lock_irq(&tsk->sighand->siglock);
|
||||
timer = &tsk->signal->real_timer;
|
||||
if (ovalue) {
|
||||
ovalue->it_value = itimer_get_remtime(timer);
|
||||
ovalue->it_interval
|
||||
= ktime_to_timeval(tsk->signal->it_real_incr);
|
||||
}
|
||||
/* We are sharing ->siglock with it_real_fn() */
|
||||
if (hrtimer_try_to_cancel(timer) < 0) {
|
||||
spin_unlock_irq(&tsk->sighand->siglock);
|
||||
goto again;
|
||||
}
|
||||
expires = timeval_to_ktime(value->it_value);
|
||||
if (expires.tv64 != 0) {
|
||||
tsk->signal->it_real_incr =
|
||||
timeval_to_ktime(value->it_interval);
|
||||
hrtimer_start(timer, expires, HRTIMER_MODE_REL);
|
||||
} else
|
||||
tsk->signal->it_real_incr.tv64 = 0;
|
||||
|
||||
spin_unlock_irq(&tsk->sighand->siglock);
|
||||
break;
|
||||
case ITIMER_VIRTUAL:
|
||||
nval = timeval_to_cputime(&value->it_value);
|
||||
ninterval = timeval_to_cputime(&value->it_interval);
|
||||
read_lock(&tasklist_lock);
|
||||
spin_lock_irq(&tsk->sighand->siglock);
|
||||
cval = tsk->signal->it_virt_expires;
|
||||
cinterval = tsk->signal->it_virt_incr;
|
||||
if (!cputime_eq(cval, cputime_zero) ||
|
||||
!cputime_eq(nval, cputime_zero)) {
|
||||
if (cputime_gt(nval, cputime_zero))
|
||||
nval = cputime_add(nval,
|
||||
jiffies_to_cputime(1));
|
||||
set_process_cpu_timer(tsk, CPUCLOCK_VIRT,
|
||||
&nval, &cval);
|
||||
}
|
||||
tsk->signal->it_virt_expires = nval;
|
||||
tsk->signal->it_virt_incr = ninterval;
|
||||
spin_unlock_irq(&tsk->sighand->siglock);
|
||||
read_unlock(&tasklist_lock);
|
||||
if (ovalue) {
|
||||
cputime_to_timeval(cval, &ovalue->it_value);
|
||||
cputime_to_timeval(cinterval, &ovalue->it_interval);
|
||||
}
|
||||
break;
|
||||
case ITIMER_PROF:
|
||||
nval = timeval_to_cputime(&value->it_value);
|
||||
ninterval = timeval_to_cputime(&value->it_interval);
|
||||
read_lock(&tasklist_lock);
|
||||
spin_lock_irq(&tsk->sighand->siglock);
|
||||
cval = tsk->signal->it_prof_expires;
|
||||
cinterval = tsk->signal->it_prof_incr;
|
||||
if (!cputime_eq(cval, cputime_zero) ||
|
||||
!cputime_eq(nval, cputime_zero)) {
|
||||
if (cputime_gt(nval, cputime_zero))
|
||||
nval = cputime_add(nval,
|
||||
jiffies_to_cputime(1));
|
||||
set_process_cpu_timer(tsk, CPUCLOCK_PROF,
|
||||
&nval, &cval);
|
||||
}
|
||||
tsk->signal->it_prof_expires = nval;
|
||||
tsk->signal->it_prof_incr = ninterval;
|
||||
spin_unlock_irq(&tsk->sighand->siglock);
|
||||
read_unlock(&tasklist_lock);
|
||||
if (ovalue) {
|
||||
cputime_to_timeval(cval, &ovalue->it_value);
|
||||
cputime_to_timeval(cinterval, &ovalue->it_interval);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* alarm_setitimer - set alarm in seconds
|
||||
*
|
||||
* @seconds: number of seconds until alarm
|
||||
* 0 disables the alarm
|
||||
*
|
||||
* Returns the remaining time in seconds of a pending timer or 0 when
|
||||
* the timer is not active.
|
||||
*
|
||||
* On 32 bit machines the seconds value is limited to (INT_MAX/2) to avoid
|
||||
* negative timeval settings which would cause immediate expiry.
|
||||
*/
|
||||
unsigned int alarm_setitimer(unsigned int seconds)
|
||||
{
|
||||
struct itimerval it_new, it_old;
|
||||
|
||||
#if BITS_PER_LONG < 64
|
||||
if (seconds > INT_MAX)
|
||||
seconds = INT_MAX;
|
||||
#endif
|
||||
it_new.it_value.tv_sec = seconds;
|
||||
it_new.it_value.tv_usec = 0;
|
||||
it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
|
||||
|
||||
do_setitimer(ITIMER_REAL, &it_new, &it_old);
|
||||
|
||||
/*
|
||||
* We can't return 0 if we have an alarm pending ... And we'd
|
||||
* better return too much than too little anyway
|
||||
*/
|
||||
if ((!it_old.it_value.tv_sec && it_old.it_value.tv_usec) ||
|
||||
it_old.it_value.tv_usec >= 500000)
|
||||
it_old.it_value.tv_sec++;
|
||||
|
||||
return it_old.it_value.tv_sec;
|
||||
}
|
||||
|
||||
asmlinkage long sys_setitimer(int which,
|
||||
struct itimerval __user *value,
|
||||
struct itimerval __user *ovalue)
|
||||
{
|
||||
struct itimerval set_buffer, get_buffer;
|
||||
int error;
|
||||
|
||||
if (value) {
|
||||
if(copy_from_user(&set_buffer, value, sizeof(set_buffer)))
|
||||
return -EFAULT;
|
||||
} else
|
||||
memset((char *) &set_buffer, 0, sizeof(set_buffer));
|
||||
|
||||
error = do_setitimer(which, &set_buffer, ovalue ? &get_buffer : NULL);
|
||||
if (error || !ovalue)
|
||||
return error;
|
||||
|
||||
if (copy_to_user(ovalue, &get_buffer, sizeof(get_buffer)))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
}
|
||||
454
kernel/kallsyms.c
Normal file
454
kernel/kallsyms.c
Normal file
@@ -0,0 +1,454 @@
|
||||
/*
|
||||
* kallsyms.c: in-kernel printing of symbolic oopses and stack traces.
|
||||
*
|
||||
* Rewritten and vastly simplified by Rusty Russell for in-kernel
|
||||
* module loader:
|
||||
* Copyright 2002 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
|
||||
*
|
||||
* ChangeLog:
|
||||
*
|
||||
* (25/Aug/2004) Paulo Marques <pmarques@grupopie.com>
|
||||
* Changed the compression method from stem compression to "table lookup"
|
||||
* compression (see scripts/kallsyms.c for a more complete description)
|
||||
*/
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/sched.h> /* for cond_resched */
|
||||
#include <linux/mm.h>
|
||||
#include <linux/ctype.h>
|
||||
|
||||
#include <asm/sections.h>
|
||||
|
||||
#ifdef CONFIG_KALLSYMS_ALL
|
||||
#define all_var 1
|
||||
#else
|
||||
#define all_var 0
|
||||
#endif
|
||||
|
||||
/* These will be re-linked against their real values during the second link stage */
|
||||
extern const unsigned long kallsyms_addresses[] __attribute__((weak));
|
||||
extern const unsigned long kallsyms_num_syms __attribute__((weak));
|
||||
extern const u8 kallsyms_names[] __attribute__((weak));
|
||||
|
||||
extern const u8 kallsyms_token_table[] __attribute__((weak));
|
||||
extern const u16 kallsyms_token_index[] __attribute__((weak));
|
||||
|
||||
extern const unsigned long kallsyms_markers[] __attribute__((weak));
|
||||
|
||||
static inline int is_kernel_inittext(unsigned long addr)
|
||||
{
|
||||
if (addr >= (unsigned long)_sinittext
|
||||
&& addr <= (unsigned long)_einittext)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int is_kernel_extratext(unsigned long addr)
|
||||
{
|
||||
if (addr >= (unsigned long)_sextratext
|
||||
&& addr <= (unsigned long)_eextratext)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int is_kernel_text(unsigned long addr)
|
||||
{
|
||||
if (addr >= (unsigned long)_stext && addr <= (unsigned long)_etext)
|
||||
return 1;
|
||||
return in_gate_area_no_task(addr);
|
||||
}
|
||||
|
||||
static inline int is_kernel(unsigned long addr)
|
||||
{
|
||||
if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end)
|
||||
return 1;
|
||||
return in_gate_area_no_task(addr);
|
||||
}
|
||||
|
||||
static int is_ksym_addr(unsigned long addr)
|
||||
{
|
||||
if (all_var)
|
||||
return is_kernel(addr);
|
||||
|
||||
return is_kernel_text(addr) || is_kernel_inittext(addr) ||
|
||||
is_kernel_extratext(addr);
|
||||
}
|
||||
|
||||
/* expand a compressed symbol data into the resulting uncompressed string,
|
||||
given the offset to where the symbol is in the compressed stream */
|
||||
static unsigned int kallsyms_expand_symbol(unsigned int off, char *result)
|
||||
{
|
||||
int len, skipped_first = 0;
|
||||
const u8 *tptr, *data;
|
||||
|
||||
/* get the compressed symbol length from the first symbol byte */
|
||||
data = &kallsyms_names[off];
|
||||
len = *data;
|
||||
data++;
|
||||
|
||||
/* update the offset to return the offset for the next symbol on
|
||||
* the compressed stream */
|
||||
off += len + 1;
|
||||
|
||||
/* for every byte on the compressed symbol data, copy the table
|
||||
entry for that byte */
|
||||
while(len) {
|
||||
tptr = &kallsyms_token_table[ kallsyms_token_index[*data] ];
|
||||
data++;
|
||||
len--;
|
||||
|
||||
while (*tptr) {
|
||||
if(skipped_first) {
|
||||
*result = *tptr;
|
||||
result++;
|
||||
} else
|
||||
skipped_first = 1;
|
||||
tptr++;
|
||||
}
|
||||
}
|
||||
|
||||
*result = '\0';
|
||||
|
||||
/* return to offset to the next symbol */
|
||||
return off;
|
||||
}
|
||||
|
||||
/* get symbol type information. This is encoded as a single char at the
|
||||
* begining of the symbol name */
|
||||
static char kallsyms_get_symbol_type(unsigned int off)
|
||||
{
|
||||
/* get just the first code, look it up in the token table, and return the
|
||||
* first char from this token */
|
||||
return kallsyms_token_table[ kallsyms_token_index[ kallsyms_names[off+1] ] ];
|
||||
}
|
||||
|
||||
|
||||
/* find the offset on the compressed stream given and index in the
|
||||
* kallsyms array */
|
||||
static unsigned int get_symbol_offset(unsigned long pos)
|
||||
{
|
||||
const u8 *name;
|
||||
int i;
|
||||
|
||||
/* use the closest marker we have. We have markers every 256 positions,
|
||||
* so that should be close enough */
|
||||
name = &kallsyms_names[ kallsyms_markers[pos>>8] ];
|
||||
|
||||
/* sequentially scan all the symbols up to the point we're searching for.
|
||||
* Every symbol is stored in a [<len>][<len> bytes of data] format, so we
|
||||
* just need to add the len to the current pointer for every symbol we
|
||||
* wish to skip */
|
||||
for(i = 0; i < (pos&0xFF); i++)
|
||||
name = name + (*name) + 1;
|
||||
|
||||
return name - kallsyms_names;
|
||||
}
|
||||
|
||||
/* Lookup the address for this symbol. Returns 0 if not found. */
|
||||
unsigned long kallsyms_lookup_name(const char *name)
|
||||
{
|
||||
char namebuf[KSYM_NAME_LEN+1];
|
||||
unsigned long i;
|
||||
unsigned int off;
|
||||
|
||||
for (i = 0, off = 0; i < kallsyms_num_syms; i++) {
|
||||
off = kallsyms_expand_symbol(off, namebuf);
|
||||
|
||||
if (strcmp(namebuf, name) == 0)
|
||||
return kallsyms_addresses[i];
|
||||
}
|
||||
return module_kallsyms_lookup_name(name);
|
||||
}
|
||||
|
||||
static unsigned long get_symbol_pos(unsigned long addr,
|
||||
unsigned long *symbolsize,
|
||||
unsigned long *offset)
|
||||
{
|
||||
unsigned long symbol_start = 0, symbol_end = 0;
|
||||
unsigned long i, low, high, mid;
|
||||
|
||||
/* This kernel should never had been booted. */
|
||||
BUG_ON(!kallsyms_addresses);
|
||||
|
||||
/* do a binary search on the sorted kallsyms_addresses array */
|
||||
low = 0;
|
||||
high = kallsyms_num_syms;
|
||||
|
||||
while (high - low > 1) {
|
||||
mid = (low + high) / 2;
|
||||
if (kallsyms_addresses[mid] <= addr)
|
||||
low = mid;
|
||||
else
|
||||
high = mid;
|
||||
}
|
||||
|
||||
/*
|
||||
* search for the first aliased symbol. Aliased
|
||||
* symbols are symbols with the same address
|
||||
*/
|
||||
while (low && kallsyms_addresses[low-1] == kallsyms_addresses[low])
|
||||
--low;
|
||||
|
||||
symbol_start = kallsyms_addresses[low];
|
||||
|
||||
/* Search for next non-aliased symbol */
|
||||
for (i = low + 1; i < kallsyms_num_syms; i++) {
|
||||
if (kallsyms_addresses[i] > symbol_start) {
|
||||
symbol_end = kallsyms_addresses[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* if we found no next symbol, we use the end of the section */
|
||||
if (!symbol_end) {
|
||||
if (is_kernel_inittext(addr))
|
||||
symbol_end = (unsigned long)_einittext;
|
||||
else if (all_var)
|
||||
symbol_end = (unsigned long)_end;
|
||||
else
|
||||
symbol_end = (unsigned long)_etext;
|
||||
}
|
||||
|
||||
*symbolsize = symbol_end - symbol_start;
|
||||
*offset = addr - symbol_start;
|
||||
|
||||
return low;
|
||||
}
|
||||
|
||||
/*
|
||||
* Lookup an address but don't bother to find any names.
|
||||
*/
|
||||
int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize,
|
||||
unsigned long *offset)
|
||||
{
|
||||
if (is_ksym_addr(addr))
|
||||
return !!get_symbol_pos(addr, symbolsize, offset);
|
||||
|
||||
return !!module_address_lookup(addr, symbolsize, offset, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Lookup an address
|
||||
* - modname is set to NULL if it's in the kernel
|
||||
* - we guarantee that the returned name is valid until we reschedule even if
|
||||
* it resides in a module
|
||||
* - we also guarantee that modname will be valid until rescheduled
|
||||
*/
|
||||
const char *kallsyms_lookup(unsigned long addr,
|
||||
unsigned long *symbolsize,
|
||||
unsigned long *offset,
|
||||
char **modname, char *namebuf)
|
||||
{
|
||||
const char *msym;
|
||||
|
||||
namebuf[KSYM_NAME_LEN] = 0;
|
||||
namebuf[0] = 0;
|
||||
|
||||
if (is_ksym_addr(addr)) {
|
||||
unsigned long pos;
|
||||
|
||||
pos = get_symbol_pos(addr, symbolsize, offset);
|
||||
/* Grab name */
|
||||
kallsyms_expand_symbol(get_symbol_offset(pos), namebuf);
|
||||
*modname = NULL;
|
||||
return namebuf;
|
||||
}
|
||||
|
||||
/* see if it's in a module */
|
||||
msym = module_address_lookup(addr, symbolsize, offset, modname);
|
||||
if (msym)
|
||||
return strncpy(namebuf, msym, KSYM_NAME_LEN);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Replace "%s" in format with address, or returns -errno. */
|
||||
void __print_symbol(const char *fmt, unsigned long address)
|
||||
{
|
||||
char *modname;
|
||||
const char *name;
|
||||
unsigned long offset, size;
|
||||
char namebuf[KSYM_NAME_LEN+1];
|
||||
char buffer[sizeof("%s+%#lx/%#lx [%s]") + KSYM_NAME_LEN +
|
||||
2*(BITS_PER_LONG*3/10) + MODULE_NAME_LEN + 1];
|
||||
|
||||
name = kallsyms_lookup(address, &size, &offset, &modname, namebuf);
|
||||
|
||||
if (!name)
|
||||
sprintf(buffer, "0x%lx", address);
|
||||
else {
|
||||
if (modname)
|
||||
sprintf(buffer, "%s+%#lx/%#lx [%s]", name, offset,
|
||||
size, modname);
|
||||
else
|
||||
sprintf(buffer, "%s+%#lx/%#lx", name, offset, size);
|
||||
}
|
||||
printk(fmt, buffer);
|
||||
}
|
||||
|
||||
/* To avoid using get_symbol_offset for every symbol, we carry prefix along. */
|
||||
struct kallsym_iter
|
||||
{
|
||||
loff_t pos;
|
||||
struct module *owner;
|
||||
unsigned long value;
|
||||
unsigned int nameoff; /* If iterating in core kernel symbols */
|
||||
char type;
|
||||
char name[KSYM_NAME_LEN+1];
|
||||
};
|
||||
|
||||
static int get_ksymbol_mod(struct kallsym_iter *iter)
|
||||
{
|
||||
iter->owner = module_get_kallsym(iter->pos - kallsyms_num_syms,
|
||||
&iter->value, &iter->type,
|
||||
iter->name, sizeof(iter->name));
|
||||
if (iter->owner == NULL)
|
||||
return 0;
|
||||
|
||||
/* Label it "global" if it is exported, "local" if not exported. */
|
||||
iter->type = is_exported(iter->name, iter->owner)
|
||||
? toupper(iter->type) : tolower(iter->type);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Returns space to next name. */
|
||||
static unsigned long get_ksymbol_core(struct kallsym_iter *iter)
|
||||
{
|
||||
unsigned off = iter->nameoff;
|
||||
|
||||
iter->owner = NULL;
|
||||
iter->value = kallsyms_addresses[iter->pos];
|
||||
|
||||
iter->type = kallsyms_get_symbol_type(off);
|
||||
|
||||
off = kallsyms_expand_symbol(off, iter->name);
|
||||
|
||||
return off - iter->nameoff;
|
||||
}
|
||||
|
||||
static void reset_iter(struct kallsym_iter *iter, loff_t new_pos)
|
||||
{
|
||||
iter->name[0] = '\0';
|
||||
iter->nameoff = get_symbol_offset(new_pos);
|
||||
iter->pos = new_pos;
|
||||
}
|
||||
|
||||
/* Returns false if pos at or past end of file. */
|
||||
static int update_iter(struct kallsym_iter *iter, loff_t pos)
|
||||
{
|
||||
/* Module symbols can be accessed randomly. */
|
||||
if (pos >= kallsyms_num_syms) {
|
||||
iter->pos = pos;
|
||||
return get_ksymbol_mod(iter);
|
||||
}
|
||||
|
||||
/* If we're not on the desired position, reset to new position. */
|
||||
if (pos != iter->pos)
|
||||
reset_iter(iter, pos);
|
||||
|
||||
iter->nameoff += get_ksymbol_core(iter);
|
||||
iter->pos++;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void *s_next(struct seq_file *m, void *p, loff_t *pos)
|
||||
{
|
||||
(*pos)++;
|
||||
|
||||
if (!update_iter(m->private, *pos))
|
||||
return NULL;
|
||||
return p;
|
||||
}
|
||||
|
||||
static void *s_start(struct seq_file *m, loff_t *pos)
|
||||
{
|
||||
if (!update_iter(m->private, *pos))
|
||||
return NULL;
|
||||
return m->private;
|
||||
}
|
||||
|
||||
static void s_stop(struct seq_file *m, void *p)
|
||||
{
|
||||
}
|
||||
|
||||
static int s_show(struct seq_file *m, void *p)
|
||||
{
|
||||
struct kallsym_iter *iter = m->private;
|
||||
|
||||
/* Some debugging symbols have no name. Ignore them. */
|
||||
if (!iter->name[0])
|
||||
return 0;
|
||||
|
||||
if (iter->owner)
|
||||
seq_printf(m, "%0*lx %c %s\t[%s]\n",
|
||||
(int)(2*sizeof(void*)),
|
||||
iter->value, iter->type, iter->name,
|
||||
module_name(iter->owner));
|
||||
else
|
||||
seq_printf(m, "%0*lx %c %s\n",
|
||||
(int)(2*sizeof(void*)),
|
||||
iter->value, iter->type, iter->name);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct seq_operations kallsyms_op = {
|
||||
.start = s_start,
|
||||
.next = s_next,
|
||||
.stop = s_stop,
|
||||
.show = s_show
|
||||
};
|
||||
|
||||
static int kallsyms_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
/* We keep iterator in m->private, since normal case is to
|
||||
* s_start from where we left off, so we avoid doing
|
||||
* using get_symbol_offset for every symbol */
|
||||
struct kallsym_iter *iter;
|
||||
int ret;
|
||||
|
||||
iter = kmalloc(sizeof(*iter), GFP_KERNEL);
|
||||
if (!iter)
|
||||
return -ENOMEM;
|
||||
reset_iter(iter, 0);
|
||||
|
||||
ret = seq_open(file, &kallsyms_op);
|
||||
if (ret == 0)
|
||||
((struct seq_file *)file->private_data)->private = iter;
|
||||
else
|
||||
kfree(iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kallsyms_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct seq_file *m = (struct seq_file *)file->private_data;
|
||||
kfree(m->private);
|
||||
return seq_release(inode, file);
|
||||
}
|
||||
|
||||
static const struct file_operations kallsyms_operations = {
|
||||
.open = kallsyms_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = kallsyms_release,
|
||||
};
|
||||
|
||||
static int __init kallsyms_init(void)
|
||||
{
|
||||
struct proc_dir_entry *entry;
|
||||
|
||||
entry = create_proc_entry("kallsyms", 0444, NULL);
|
||||
if (entry)
|
||||
entry->proc_fops = &kallsyms_operations;
|
||||
return 0;
|
||||
}
|
||||
__initcall(kallsyms_init);
|
||||
|
||||
EXPORT_SYMBOL(__print_symbol);
|
||||
1137
kernel/kexec.c
Normal file
1137
kernel/kexec.c
Normal file
File diff suppressed because it is too large
Load Diff
196
kernel/kfifo.c
Normal file
196
kernel/kfifo.c
Normal file
@@ -0,0 +1,196 @@
|
||||
/*
|
||||
* A simple kernel FIFO implementation.
|
||||
*
|
||||
* Copyright (C) 2004 Stelian Pop <stelian@popies.net>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/kfifo.h>
|
||||
|
||||
/**
|
||||
* kfifo_init - allocates a new FIFO using a preallocated buffer
|
||||
* @buffer: the preallocated buffer to be used.
|
||||
* @size: the size of the internal buffer, this have to be a power of 2.
|
||||
* @gfp_mask: get_free_pages mask, passed to kmalloc()
|
||||
* @lock: the lock to be used to protect the fifo buffer
|
||||
*
|
||||
* Do NOT pass the kfifo to kfifo_free() after use! Simply free the
|
||||
* &struct kfifo with kfree().
|
||||
*/
|
||||
struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size,
|
||||
gfp_t gfp_mask, spinlock_t *lock)
|
||||
{
|
||||
struct kfifo *fifo;
|
||||
|
||||
/* size must be a power of 2 */
|
||||
BUG_ON(size & (size - 1));
|
||||
|
||||
fifo = kmalloc(sizeof(struct kfifo), gfp_mask);
|
||||
if (!fifo)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
fifo->buffer = buffer;
|
||||
fifo->size = size;
|
||||
fifo->in = fifo->out = 0;
|
||||
fifo->lock = lock;
|
||||
|
||||
return fifo;
|
||||
}
|
||||
EXPORT_SYMBOL(kfifo_init);
|
||||
|
||||
/**
|
||||
* kfifo_alloc - allocates a new FIFO and its internal buffer
|
||||
* @size: the size of the internal buffer to be allocated.
|
||||
* @gfp_mask: get_free_pages mask, passed to kmalloc()
|
||||
* @lock: the lock to be used to protect the fifo buffer
|
||||
*
|
||||
* The size will be rounded-up to a power of 2.
|
||||
*/
|
||||
struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, spinlock_t *lock)
|
||||
{
|
||||
unsigned char *buffer;
|
||||
struct kfifo *ret;
|
||||
|
||||
/*
|
||||
* round up to the next power of 2, since our 'let the indices
|
||||
* wrap' tachnique works only in this case.
|
||||
*/
|
||||
if (size & (size - 1)) {
|
||||
BUG_ON(size > 0x80000000);
|
||||
size = roundup_pow_of_two(size);
|
||||
}
|
||||
|
||||
buffer = kmalloc(size, gfp_mask);
|
||||
if (!buffer)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
ret = kfifo_init(buffer, size, gfp_mask, lock);
|
||||
|
||||
if (IS_ERR(ret))
|
||||
kfree(buffer);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(kfifo_alloc);
|
||||
|
||||
/**
|
||||
* kfifo_free - frees the FIFO
|
||||
* @fifo: the fifo to be freed.
|
||||
*/
|
||||
void kfifo_free(struct kfifo *fifo)
|
||||
{
|
||||
kfree(fifo->buffer);
|
||||
kfree(fifo);
|
||||
}
|
||||
EXPORT_SYMBOL(kfifo_free);
|
||||
|
||||
/**
|
||||
* __kfifo_put - puts some data into the FIFO, no locking version
|
||||
* @fifo: the fifo to be used.
|
||||
* @buffer: the data to be added.
|
||||
* @len: the length of the data to be added.
|
||||
*
|
||||
* This function copies at most @len bytes from the @buffer into
|
||||
* the FIFO depending on the free space, and returns the number of
|
||||
* bytes copied.
|
||||
*
|
||||
* Note that with only one concurrent reader and one concurrent
|
||||
* writer, you don't need extra locking to use these functions.
|
||||
*/
|
||||
unsigned int __kfifo_put(struct kfifo *fifo,
|
||||
unsigned char *buffer, unsigned int len)
|
||||
{
|
||||
unsigned int l;
|
||||
|
||||
len = min(len, fifo->size - fifo->in + fifo->out);
|
||||
|
||||
/*
|
||||
* Ensure that we sample the fifo->out index -before- we
|
||||
* start putting bytes into the kfifo.
|
||||
*/
|
||||
|
||||
smp_mb();
|
||||
|
||||
/* first put the data starting from fifo->in to buffer end */
|
||||
l = min(len, fifo->size - (fifo->in & (fifo->size - 1)));
|
||||
memcpy(fifo->buffer + (fifo->in & (fifo->size - 1)), buffer, l);
|
||||
|
||||
/* then put the rest (if any) at the beginning of the buffer */
|
||||
memcpy(fifo->buffer, buffer + l, len - l);
|
||||
|
||||
/*
|
||||
* Ensure that we add the bytes to the kfifo -before-
|
||||
* we update the fifo->in index.
|
||||
*/
|
||||
|
||||
smp_wmb();
|
||||
|
||||
fifo->in += len;
|
||||
|
||||
return len;
|
||||
}
|
||||
EXPORT_SYMBOL(__kfifo_put);
|
||||
|
||||
/**
|
||||
* __kfifo_get - gets some data from the FIFO, no locking version
|
||||
* @fifo: the fifo to be used.
|
||||
* @buffer: where the data must be copied.
|
||||
* @len: the size of the destination buffer.
|
||||
*
|
||||
* This function copies at most @len bytes from the FIFO into the
|
||||
* @buffer and returns the number of copied bytes.
|
||||
*
|
||||
* Note that with only one concurrent reader and one concurrent
|
||||
* writer, you don't need extra locking to use these functions.
|
||||
*/
|
||||
unsigned int __kfifo_get(struct kfifo *fifo,
|
||||
unsigned char *buffer, unsigned int len)
|
||||
{
|
||||
unsigned int l;
|
||||
|
||||
len = min(len, fifo->in - fifo->out);
|
||||
|
||||
/*
|
||||
* Ensure that we sample the fifo->in index -before- we
|
||||
* start removing bytes from the kfifo.
|
||||
*/
|
||||
|
||||
smp_rmb();
|
||||
|
||||
/* first get the data from fifo->out until the end of the buffer */
|
||||
l = min(len, fifo->size - (fifo->out & (fifo->size - 1)));
|
||||
memcpy(buffer, fifo->buffer + (fifo->out & (fifo->size - 1)), l);
|
||||
|
||||
/* then get the rest (if any) from the beginning of the buffer */
|
||||
memcpy(buffer + l, fifo->buffer, len - l);
|
||||
|
||||
/*
|
||||
* Ensure that we remove the bytes from the kfifo -before-
|
||||
* we update the fifo->out index.
|
||||
*/
|
||||
|
||||
smp_mb();
|
||||
|
||||
fifo->out += len;
|
||||
|
||||
return len;
|
||||
}
|
||||
EXPORT_SYMBOL(__kfifo_get);
|
||||
351
kernel/kmod.c
Normal file
351
kernel/kmod.c
Normal file
@@ -0,0 +1,351 @@
|
||||
/*
|
||||
kmod, the new module loader (replaces kerneld)
|
||||
Kirk Petersen
|
||||
|
||||
Reorganized not to be a daemon by Adam Richter, with guidance
|
||||
from Greg Zornetzer.
|
||||
|
||||
Modified to avoid chroot and file sharing problems.
|
||||
Mikael Pettersson
|
||||
|
||||
Limit the concurrent number of kmod modprobes to catch loops from
|
||||
"modprobe needs a service that is in a module".
|
||||
Keith Owens <kaos@ocs.com.au> December 1999
|
||||
|
||||
Unblock all signals when we exec a usermode process.
|
||||
Shuu Yamaguchi <shuu@wondernetworkresources.com> December 2000
|
||||
|
||||
call_usermodehelper wait flag, and remove exec_usermodehelper.
|
||||
Rusty Russell <rusty@rustcorp.com.au> Jan 2003
|
||||
*/
|
||||
#include <linux/module.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/unistd.h>
|
||||
#include <linux/kmod.h>
|
||||
#include <linux/smp_lock.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/mnt_namespace.h>
|
||||
#include <linux/completion.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/mount.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/resource.h>
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
extern int max_threads;
|
||||
|
||||
static struct workqueue_struct *khelper_wq;
|
||||
|
||||
#ifdef CONFIG_KMOD
|
||||
|
||||
/*
|
||||
modprobe_path is set via /proc/sys.
|
||||
*/
|
||||
char modprobe_path[KMOD_PATH_LEN] = "/sbin/modprobe";
|
||||
|
||||
/**
|
||||
* request_module - try to load a kernel module
|
||||
* @fmt: printf style format string for the name of the module
|
||||
* @varargs: arguements as specified in the format string
|
||||
*
|
||||
* Load a module using the user mode module loader. The function returns
|
||||
* zero on success or a negative errno code on failure. Note that a
|
||||
* successful module load does not mean the module did not then unload
|
||||
* and exit on an error of its own. Callers must check that the service
|
||||
* they requested is now available not blindly invoke it.
|
||||
*
|
||||
* If module auto-loading support is disabled then this function
|
||||
* becomes a no-operation.
|
||||
*/
|
||||
int request_module(const char *fmt, ...)
|
||||
{
|
||||
va_list args;
|
||||
char module_name[MODULE_NAME_LEN];
|
||||
unsigned int max_modprobes;
|
||||
int ret;
|
||||
char *argv[] = { modprobe_path, "-q", "--", module_name, NULL };
|
||||
static char *envp[] = { "HOME=/",
|
||||
"TERM=linux",
|
||||
"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
|
||||
NULL };
|
||||
static atomic_t kmod_concurrent = ATOMIC_INIT(0);
|
||||
#define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */
|
||||
static int kmod_loop_msg;
|
||||
|
||||
va_start(args, fmt);
|
||||
ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
|
||||
va_end(args);
|
||||
if (ret >= MODULE_NAME_LEN)
|
||||
return -ENAMETOOLONG;
|
||||
|
||||
/* If modprobe needs a service that is in a module, we get a recursive
|
||||
* loop. Limit the number of running kmod threads to max_threads/2 or
|
||||
* MAX_KMOD_CONCURRENT, whichever is the smaller. A cleaner method
|
||||
* would be to run the parents of this process, counting how many times
|
||||
* kmod was invoked. That would mean accessing the internals of the
|
||||
* process tables to get the command line, proc_pid_cmdline is static
|
||||
* and it is not worth changing the proc code just to handle this case.
|
||||
* KAO.
|
||||
*
|
||||
* "trace the ppid" is simple, but will fail if someone's
|
||||
* parent exits. I think this is as good as it gets. --RR
|
||||
*/
|
||||
max_modprobes = min(max_threads/2, MAX_KMOD_CONCURRENT);
|
||||
atomic_inc(&kmod_concurrent);
|
||||
if (atomic_read(&kmod_concurrent) > max_modprobes) {
|
||||
/* We may be blaming an innocent here, but unlikely */
|
||||
if (kmod_loop_msg++ < 5)
|
||||
printk(KERN_ERR
|
||||
"request_module: runaway loop modprobe %s\n",
|
||||
module_name);
|
||||
atomic_dec(&kmod_concurrent);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
ret = call_usermodehelper(modprobe_path, argv, envp, 1);
|
||||
atomic_dec(&kmod_concurrent);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(request_module);
|
||||
#endif /* CONFIG_KMOD */
|
||||
|
||||
struct subprocess_info {
|
||||
struct work_struct work;
|
||||
struct completion *complete;
|
||||
char *path;
|
||||
char **argv;
|
||||
char **envp;
|
||||
struct key *ring;
|
||||
int wait;
|
||||
int retval;
|
||||
struct file *stdin;
|
||||
};
|
||||
|
||||
/*
|
||||
* This is the task which runs the usermode application
|
||||
*/
|
||||
static int ____call_usermodehelper(void *data)
|
||||
{
|
||||
struct subprocess_info *sub_info = data;
|
||||
struct key *new_session, *old_session;
|
||||
int retval;
|
||||
|
||||
/* Unblock all signals and set the session keyring. */
|
||||
new_session = key_get(sub_info->ring);
|
||||
flush_signals(current);
|
||||
spin_lock_irq(¤t->sighand->siglock);
|
||||
old_session = __install_session_keyring(current, new_session);
|
||||
flush_signal_handlers(current, 1);
|
||||
sigemptyset(¤t->blocked);
|
||||
recalc_sigpending();
|
||||
spin_unlock_irq(¤t->sighand->siglock);
|
||||
|
||||
key_put(old_session);
|
||||
|
||||
/* Install input pipe when needed */
|
||||
if (sub_info->stdin) {
|
||||
struct files_struct *f = current->files;
|
||||
struct fdtable *fdt;
|
||||
/* no races because files should be private here */
|
||||
sys_close(0);
|
||||
fd_install(0, sub_info->stdin);
|
||||
spin_lock(&f->file_lock);
|
||||
fdt = files_fdtable(f);
|
||||
FD_SET(0, fdt->open_fds);
|
||||
FD_CLR(0, fdt->close_on_exec);
|
||||
spin_unlock(&f->file_lock);
|
||||
|
||||
/* and disallow core files too */
|
||||
current->signal->rlim[RLIMIT_CORE] = (struct rlimit){0, 0};
|
||||
}
|
||||
|
||||
/* We can run anywhere, unlike our parent keventd(). */
|
||||
set_cpus_allowed(current, CPU_MASK_ALL);
|
||||
|
||||
retval = -EPERM;
|
||||
if (current->fs->root)
|
||||
retval = kernel_execve(sub_info->path,
|
||||
sub_info->argv, sub_info->envp);
|
||||
|
||||
/* Exec failed? */
|
||||
sub_info->retval = retval;
|
||||
do_exit(0);
|
||||
}
|
||||
|
||||
/* Keventd can't block, but this (a child) can. */
|
||||
static int wait_for_helper(void *data)
|
||||
{
|
||||
struct subprocess_info *sub_info = data;
|
||||
pid_t pid;
|
||||
struct k_sigaction sa;
|
||||
|
||||
/* Install a handler: if SIGCLD isn't handled sys_wait4 won't
|
||||
* populate the status, but will return -ECHILD. */
|
||||
sa.sa.sa_handler = SIG_IGN;
|
||||
sa.sa.sa_flags = 0;
|
||||
siginitset(&sa.sa.sa_mask, sigmask(SIGCHLD));
|
||||
do_sigaction(SIGCHLD, &sa, NULL);
|
||||
allow_signal(SIGCHLD);
|
||||
|
||||
pid = kernel_thread(____call_usermodehelper, sub_info, SIGCHLD);
|
||||
if (pid < 0) {
|
||||
sub_info->retval = pid;
|
||||
} else {
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Normally it is bogus to call wait4() from in-kernel because
|
||||
* wait4() wants to write the exit code to a userspace address.
|
||||
* But wait_for_helper() always runs as keventd, and put_user()
|
||||
* to a kernel address works OK for kernel threads, due to their
|
||||
* having an mm_segment_t which spans the entire address space.
|
||||
*
|
||||
* Thus the __user pointer cast is valid here.
|
||||
*/
|
||||
sys_wait4(pid, (int __user *)&ret, 0, NULL);
|
||||
|
||||
/*
|
||||
* If ret is 0, either ____call_usermodehelper failed and the
|
||||
* real error code is already in sub_info->retval or
|
||||
* sub_info->retval is 0 anyway, so don't mess with it then.
|
||||
*/
|
||||
if (ret)
|
||||
sub_info->retval = ret;
|
||||
}
|
||||
|
||||
if (sub_info->wait < 0)
|
||||
kfree(sub_info);
|
||||
else
|
||||
complete(sub_info->complete);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* This is run by khelper thread */
|
||||
static void __call_usermodehelper(struct work_struct *work)
|
||||
{
|
||||
struct subprocess_info *sub_info =
|
||||
container_of(work, struct subprocess_info, work);
|
||||
pid_t pid;
|
||||
int wait = sub_info->wait;
|
||||
|
||||
/* CLONE_VFORK: wait until the usermode helper has execve'd
|
||||
* successfully We need the data structures to stay around
|
||||
* until that is done. */
|
||||
if (wait)
|
||||
pid = kernel_thread(wait_for_helper, sub_info,
|
||||
CLONE_FS | CLONE_FILES | SIGCHLD);
|
||||
else
|
||||
pid = kernel_thread(____call_usermodehelper, sub_info,
|
||||
CLONE_VFORK | SIGCHLD);
|
||||
|
||||
if (wait < 0)
|
||||
return;
|
||||
|
||||
if (pid < 0) {
|
||||
sub_info->retval = pid;
|
||||
complete(sub_info->complete);
|
||||
} else if (!wait)
|
||||
complete(sub_info->complete);
|
||||
}
|
||||
|
||||
/**
|
||||
* call_usermodehelper_keys - start a usermode application
|
||||
* @path: pathname for the application
|
||||
* @argv: null-terminated argument list
|
||||
* @envp: null-terminated environment list
|
||||
* @session_keyring: session keyring for process (NULL for an empty keyring)
|
||||
* @wait: wait for the application to finish and return status.
|
||||
* when -1 don't wait at all, but you get no useful error back when
|
||||
* the program couldn't be exec'ed. This makes it safe to call
|
||||
* from interrupt context.
|
||||
*
|
||||
* Runs a user-space application. The application is started
|
||||
* asynchronously if wait is not set, and runs as a child of keventd.
|
||||
* (ie. it runs with full root capabilities).
|
||||
*
|
||||
* Must be called from process context. Returns a negative error code
|
||||
* if program was not execed successfully, or 0.
|
||||
*/
|
||||
int call_usermodehelper_keys(char *path, char **argv, char **envp,
|
||||
struct key *session_keyring, int wait)
|
||||
{
|
||||
DECLARE_COMPLETION_ONSTACK(done);
|
||||
struct subprocess_info *sub_info;
|
||||
int retval;
|
||||
|
||||
if (!khelper_wq)
|
||||
return -EBUSY;
|
||||
|
||||
if (path[0] == '\0')
|
||||
return 0;
|
||||
|
||||
sub_info = kzalloc(sizeof(struct subprocess_info), GFP_ATOMIC);
|
||||
if (!sub_info)
|
||||
return -ENOMEM;
|
||||
|
||||
INIT_WORK(&sub_info->work, __call_usermodehelper);
|
||||
sub_info->complete = &done;
|
||||
sub_info->path = path;
|
||||
sub_info->argv = argv;
|
||||
sub_info->envp = envp;
|
||||
sub_info->ring = session_keyring;
|
||||
sub_info->wait = wait;
|
||||
|
||||
queue_work(khelper_wq, &sub_info->work);
|
||||
if (wait < 0) /* task has freed sub_info */
|
||||
return 0;
|
||||
wait_for_completion(&done);
|
||||
retval = sub_info->retval;
|
||||
kfree(sub_info);
|
||||
return retval;
|
||||
}
|
||||
EXPORT_SYMBOL(call_usermodehelper_keys);
|
||||
|
||||
int call_usermodehelper_pipe(char *path, char **argv, char **envp,
|
||||
struct file **filp)
|
||||
{
|
||||
DECLARE_COMPLETION(done);
|
||||
struct subprocess_info sub_info = {
|
||||
.work = __WORK_INITIALIZER(sub_info.work,
|
||||
__call_usermodehelper),
|
||||
.complete = &done,
|
||||
.path = path,
|
||||
.argv = argv,
|
||||
.envp = envp,
|
||||
.retval = 0,
|
||||
};
|
||||
struct file *f;
|
||||
|
||||
if (!khelper_wq)
|
||||
return -EBUSY;
|
||||
|
||||
if (path[0] == '\0')
|
||||
return 0;
|
||||
|
||||
f = create_write_pipe();
|
||||
if (IS_ERR(f))
|
||||
return PTR_ERR(f);
|
||||
*filp = f;
|
||||
|
||||
f = create_read_pipe(f);
|
||||
if (IS_ERR(f)) {
|
||||
free_write_pipe(*filp);
|
||||
return PTR_ERR(f);
|
||||
}
|
||||
sub_info.stdin = f;
|
||||
|
||||
queue_work(khelper_wq, &sub_info.work);
|
||||
wait_for_completion(&done);
|
||||
return sub_info.retval;
|
||||
}
|
||||
EXPORT_SYMBOL(call_usermodehelper_pipe);
|
||||
|
||||
void __init usermodehelper_init(void)
|
||||
{
|
||||
khelper_wq = create_singlethread_workqueue("khelper");
|
||||
BUG_ON(!khelper_wq);
|
||||
}
|
||||
936
kernel/kprobes.c
Normal file
936
kernel/kprobes.c
Normal file
@@ -0,0 +1,936 @@
|
||||
/*
|
||||
* Kernel Probes (KProbes)
|
||||
* kernel/kprobes.c
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
* Copyright (C) IBM Corporation, 2002, 2004
|
||||
*
|
||||
* 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
|
||||
* Probes initial implementation (includes suggestions from
|
||||
* Rusty Russell).
|
||||
* 2004-Aug Updated by Prasanna S Panchamukhi <prasanna@in.ibm.com> with
|
||||
* hlists and exceptions notifier as suggested by Andi Kleen.
|
||||
* 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
|
||||
* interface to access function arguments.
|
||||
* 2004-Sep Prasanna S Panchamukhi <prasanna@in.ibm.com> Changed Kprobes
|
||||
* exceptions notifier to be first on the priority list.
|
||||
* 2005-May Hien Nguyen <hien@us.ibm.com>, Jim Keniston
|
||||
* <jkenisto@us.ibm.com> and Prasanna S Panchamukhi
|
||||
* <prasanna@in.ibm.com> added function-return probes.
|
||||
*/
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/hash.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/moduleloader.h>
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/freezer.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <asm-generic/sections.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/errno.h>
|
||||
#include <asm/kdebug.h>
|
||||
|
||||
#define KPROBE_HASH_BITS 6
|
||||
#define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)
|
||||
|
||||
|
||||
/*
|
||||
* Some oddball architectures like 64bit powerpc have function descriptors
|
||||
* so this must be overridable.
|
||||
*/
|
||||
#ifndef kprobe_lookup_name
|
||||
#define kprobe_lookup_name(name, addr) \
|
||||
addr = ((kprobe_opcode_t *)(kallsyms_lookup_name(name)))
|
||||
#endif
|
||||
|
||||
static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
|
||||
static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
|
||||
static atomic_t kprobe_count;
|
||||
|
||||
DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */
|
||||
DEFINE_SPINLOCK(kretprobe_lock); /* Protects kretprobe_inst_table */
|
||||
static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
|
||||
|
||||
static struct notifier_block kprobe_page_fault_nb = {
|
||||
.notifier_call = kprobe_exceptions_notify,
|
||||
.priority = 0x7fffffff /* we need to notified first */
|
||||
};
|
||||
|
||||
#ifdef __ARCH_WANT_KPROBES_INSN_SLOT
|
||||
/*
|
||||
* kprobe->ainsn.insn points to the copy of the instruction to be
|
||||
* single-stepped. x86_64, POWER4 and above have no-exec support and
|
||||
* stepping on the instruction on a vmalloced/kmalloced/data page
|
||||
* is a recipe for disaster
|
||||
*/
|
||||
#define INSNS_PER_PAGE (PAGE_SIZE/(MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
|
||||
|
||||
struct kprobe_insn_page {
|
||||
struct hlist_node hlist;
|
||||
kprobe_opcode_t *insns; /* Page of instruction slots */
|
||||
char slot_used[INSNS_PER_PAGE];
|
||||
int nused;
|
||||
int ngarbage;
|
||||
};
|
||||
|
||||
enum kprobe_slot_state {
|
||||
SLOT_CLEAN = 0,
|
||||
SLOT_DIRTY = 1,
|
||||
SLOT_USED = 2,
|
||||
};
|
||||
|
||||
static struct hlist_head kprobe_insn_pages;
|
||||
static int kprobe_garbage_slots;
|
||||
static int collect_garbage_slots(void);
|
||||
|
||||
static int __kprobes check_safety(void)
|
||||
{
|
||||
int ret = 0;
|
||||
#if defined(CONFIG_PREEMPT) && defined(CONFIG_PM)
|
||||
ret = freeze_processes();
|
||||
if (ret == 0) {
|
||||
struct task_struct *p, *q;
|
||||
do_each_thread(p, q) {
|
||||
if (p != current && p->state == TASK_RUNNING &&
|
||||
p->pid != 0) {
|
||||
printk("Check failed: %s is running\n",p->comm);
|
||||
ret = -1;
|
||||
goto loop_end;
|
||||
}
|
||||
} while_each_thread(p, q);
|
||||
}
|
||||
loop_end:
|
||||
thaw_processes();
|
||||
#else
|
||||
synchronize_sched();
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* get_insn_slot() - Find a slot on an executable page for an instruction.
|
||||
* We allocate an executable page if there's no room on existing ones.
|
||||
*/
|
||||
kprobe_opcode_t __kprobes *get_insn_slot(void)
|
||||
{
|
||||
struct kprobe_insn_page *kip;
|
||||
struct hlist_node *pos;
|
||||
|
||||
retry:
|
||||
hlist_for_each(pos, &kprobe_insn_pages) {
|
||||
kip = hlist_entry(pos, struct kprobe_insn_page, hlist);
|
||||
if (kip->nused < INSNS_PER_PAGE) {
|
||||
int i;
|
||||
for (i = 0; i < INSNS_PER_PAGE; i++) {
|
||||
if (kip->slot_used[i] == SLOT_CLEAN) {
|
||||
kip->slot_used[i] = SLOT_USED;
|
||||
kip->nused++;
|
||||
return kip->insns + (i * MAX_INSN_SIZE);
|
||||
}
|
||||
}
|
||||
/* Surprise! No unused slots. Fix kip->nused. */
|
||||
kip->nused = INSNS_PER_PAGE;
|
||||
}
|
||||
}
|
||||
|
||||
/* If there are any garbage slots, collect it and try again. */
|
||||
if (kprobe_garbage_slots && collect_garbage_slots() == 0) {
|
||||
goto retry;
|
||||
}
|
||||
/* All out of space. Need to allocate a new page. Use slot 0. */
|
||||
kip = kmalloc(sizeof(struct kprobe_insn_page), GFP_KERNEL);
|
||||
if (!kip) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Use module_alloc so this page is within +/- 2GB of where the
|
||||
* kernel image and loaded module images reside. This is required
|
||||
* so x86_64 can correctly handle the %rip-relative fixups.
|
||||
*/
|
||||
kip->insns = module_alloc(PAGE_SIZE);
|
||||
if (!kip->insns) {
|
||||
kfree(kip);
|
||||
return NULL;
|
||||
}
|
||||
INIT_HLIST_NODE(&kip->hlist);
|
||||
hlist_add_head(&kip->hlist, &kprobe_insn_pages);
|
||||
memset(kip->slot_used, SLOT_CLEAN, INSNS_PER_PAGE);
|
||||
kip->slot_used[0] = SLOT_USED;
|
||||
kip->nused = 1;
|
||||
kip->ngarbage = 0;
|
||||
return kip->insns;
|
||||
}
|
||||
|
||||
/* Return 1 if all garbages are collected, otherwise 0. */
|
||||
static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx)
|
||||
{
|
||||
kip->slot_used[idx] = SLOT_CLEAN;
|
||||
kip->nused--;
|
||||
if (kip->nused == 0) {
|
||||
/*
|
||||
* Page is no longer in use. Free it unless
|
||||
* it's the last one. We keep the last one
|
||||
* so as not to have to set it up again the
|
||||
* next time somebody inserts a probe.
|
||||
*/
|
||||
hlist_del(&kip->hlist);
|
||||
if (hlist_empty(&kprobe_insn_pages)) {
|
||||
INIT_HLIST_NODE(&kip->hlist);
|
||||
hlist_add_head(&kip->hlist,
|
||||
&kprobe_insn_pages);
|
||||
} else {
|
||||
module_free(NULL, kip->insns);
|
||||
kfree(kip);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __kprobes collect_garbage_slots(void)
|
||||
{
|
||||
struct kprobe_insn_page *kip;
|
||||
struct hlist_node *pos, *next;
|
||||
|
||||
/* Ensure no-one is preepmted on the garbages */
|
||||
if (check_safety() != 0)
|
||||
return -EAGAIN;
|
||||
|
||||
hlist_for_each_safe(pos, next, &kprobe_insn_pages) {
|
||||
int i;
|
||||
kip = hlist_entry(pos, struct kprobe_insn_page, hlist);
|
||||
if (kip->ngarbage == 0)
|
||||
continue;
|
||||
kip->ngarbage = 0; /* we will collect all garbages */
|
||||
for (i = 0; i < INSNS_PER_PAGE; i++) {
|
||||
if (kip->slot_used[i] == SLOT_DIRTY &&
|
||||
collect_one_slot(kip, i))
|
||||
break;
|
||||
}
|
||||
}
|
||||
kprobe_garbage_slots = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty)
|
||||
{
|
||||
struct kprobe_insn_page *kip;
|
||||
struct hlist_node *pos;
|
||||
|
||||
hlist_for_each(pos, &kprobe_insn_pages) {
|
||||
kip = hlist_entry(pos, struct kprobe_insn_page, hlist);
|
||||
if (kip->insns <= slot &&
|
||||
slot < kip->insns + (INSNS_PER_PAGE * MAX_INSN_SIZE)) {
|
||||
int i = (slot - kip->insns) / MAX_INSN_SIZE;
|
||||
if (dirty) {
|
||||
kip->slot_used[i] = SLOT_DIRTY;
|
||||
kip->ngarbage++;
|
||||
} else {
|
||||
collect_one_slot(kip, i);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (dirty && (++kprobe_garbage_slots > INSNS_PER_PAGE)) {
|
||||
collect_garbage_slots();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* We have preemption disabled.. so it is safe to use __ versions */
|
||||
static inline void set_kprobe_instance(struct kprobe *kp)
|
||||
{
|
||||
__get_cpu_var(kprobe_instance) = kp;
|
||||
}
|
||||
|
||||
static inline void reset_kprobe_instance(void)
|
||||
{
|
||||
__get_cpu_var(kprobe_instance) = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* This routine is called either:
|
||||
* - under the kprobe_mutex - during kprobe_[un]register()
|
||||
* OR
|
||||
* - with preemption disabled - from arch/xxx/kernel/kprobes.c
|
||||
*/
|
||||
struct kprobe __kprobes *get_kprobe(void *addr)
|
||||
{
|
||||
struct hlist_head *head;
|
||||
struct hlist_node *node;
|
||||
struct kprobe *p;
|
||||
|
||||
head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)];
|
||||
hlist_for_each_entry_rcu(p, node, head, hlist) {
|
||||
if (p->addr == addr)
|
||||
return p;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Aggregate handlers for multiple kprobes support - these handlers
|
||||
* take care of invoking the individual kprobe handlers on p->list
|
||||
*/
|
||||
static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
|
||||
{
|
||||
struct kprobe *kp;
|
||||
|
||||
list_for_each_entry_rcu(kp, &p->list, list) {
|
||||
if (kp->pre_handler) {
|
||||
set_kprobe_instance(kp);
|
||||
if (kp->pre_handler(kp, regs))
|
||||
return 1;
|
||||
}
|
||||
reset_kprobe_instance();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
|
||||
unsigned long flags)
|
||||
{
|
||||
struct kprobe *kp;
|
||||
|
||||
list_for_each_entry_rcu(kp, &p->list, list) {
|
||||
if (kp->post_handler) {
|
||||
set_kprobe_instance(kp);
|
||||
kp->post_handler(kp, regs, flags);
|
||||
reset_kprobe_instance();
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
|
||||
int trapnr)
|
||||
{
|
||||
struct kprobe *cur = __get_cpu_var(kprobe_instance);
|
||||
|
||||
/*
|
||||
* if we faulted "during" the execution of a user specified
|
||||
* probe handler, invoke just that probe's fault handler
|
||||
*/
|
||||
if (cur && cur->fault_handler) {
|
||||
if (cur->fault_handler(cur, regs, trapnr))
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
|
||||
{
|
||||
struct kprobe *cur = __get_cpu_var(kprobe_instance);
|
||||
int ret = 0;
|
||||
|
||||
if (cur && cur->break_handler) {
|
||||
if (cur->break_handler(cur, regs))
|
||||
ret = 1;
|
||||
}
|
||||
reset_kprobe_instance();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Walks the list and increments nmissed count for multiprobe case */
|
||||
void __kprobes kprobes_inc_nmissed_count(struct kprobe *p)
|
||||
{
|
||||
struct kprobe *kp;
|
||||
if (p->pre_handler != aggr_pre_handler) {
|
||||
p->nmissed++;
|
||||
} else {
|
||||
list_for_each_entry_rcu(kp, &p->list, list)
|
||||
kp->nmissed++;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* Called with kretprobe_lock held */
|
||||
struct kretprobe_instance __kprobes *get_free_rp_inst(struct kretprobe *rp)
|
||||
{
|
||||
struct hlist_node *node;
|
||||
struct kretprobe_instance *ri;
|
||||
hlist_for_each_entry(ri, node, &rp->free_instances, uflist)
|
||||
return ri;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Called with kretprobe_lock held */
|
||||
static struct kretprobe_instance __kprobes *get_used_rp_inst(struct kretprobe
|
||||
*rp)
|
||||
{
|
||||
struct hlist_node *node;
|
||||
struct kretprobe_instance *ri;
|
||||
hlist_for_each_entry(ri, node, &rp->used_instances, uflist)
|
||||
return ri;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Called with kretprobe_lock held */
|
||||
void __kprobes add_rp_inst(struct kretprobe_instance *ri)
|
||||
{
|
||||
/*
|
||||
* Remove rp inst off the free list -
|
||||
* Add it back when probed function returns
|
||||
*/
|
||||
hlist_del(&ri->uflist);
|
||||
|
||||
/* Add rp inst onto table */
|
||||
INIT_HLIST_NODE(&ri->hlist);
|
||||
hlist_add_head(&ri->hlist,
|
||||
&kretprobe_inst_table[hash_ptr(ri->task, KPROBE_HASH_BITS)]);
|
||||
|
||||
/* Also add this rp inst to the used list. */
|
||||
INIT_HLIST_NODE(&ri->uflist);
|
||||
hlist_add_head(&ri->uflist, &ri->rp->used_instances);
|
||||
}
|
||||
|
||||
/* Called with kretprobe_lock held */
|
||||
void __kprobes recycle_rp_inst(struct kretprobe_instance *ri,
|
||||
struct hlist_head *head)
|
||||
{
|
||||
/* remove rp inst off the rprobe_inst_table */
|
||||
hlist_del(&ri->hlist);
|
||||
if (ri->rp) {
|
||||
/* remove rp inst off the used list */
|
||||
hlist_del(&ri->uflist);
|
||||
/* put rp inst back onto the free list */
|
||||
INIT_HLIST_NODE(&ri->uflist);
|
||||
hlist_add_head(&ri->uflist, &ri->rp->free_instances);
|
||||
} else
|
||||
/* Unregistering */
|
||||
hlist_add_head(&ri->hlist, head);
|
||||
}
|
||||
|
||||
struct hlist_head __kprobes *kretprobe_inst_table_head(struct task_struct *tsk)
|
||||
{
|
||||
return &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)];
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is called from finish_task_switch when task tk becomes dead,
|
||||
* so that we can recycle any function-return probe instances associated
|
||||
* with this task. These left over instances represent probed functions
|
||||
* that have been called but will never return.
|
||||
*/
|
||||
void __kprobes kprobe_flush_task(struct task_struct *tk)
|
||||
{
|
||||
struct kretprobe_instance *ri;
|
||||
struct hlist_head *head, empty_rp;
|
||||
struct hlist_node *node, *tmp;
|
||||
unsigned long flags = 0;
|
||||
|
||||
INIT_HLIST_HEAD(&empty_rp);
|
||||
spin_lock_irqsave(&kretprobe_lock, flags);
|
||||
head = kretprobe_inst_table_head(tk);
|
||||
hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
|
||||
if (ri->task == tk)
|
||||
recycle_rp_inst(ri, &empty_rp);
|
||||
}
|
||||
spin_unlock_irqrestore(&kretprobe_lock, flags);
|
||||
|
||||
hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
|
||||
hlist_del(&ri->hlist);
|
||||
kfree(ri);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void free_rp_inst(struct kretprobe *rp)
|
||||
{
|
||||
struct kretprobe_instance *ri;
|
||||
while ((ri = get_free_rp_inst(rp)) != NULL) {
|
||||
hlist_del(&ri->uflist);
|
||||
kfree(ri);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Keep all fields in the kprobe consistent
|
||||
*/
|
||||
static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
|
||||
{
|
||||
memcpy(&p->opcode, &old_p->opcode, sizeof(kprobe_opcode_t));
|
||||
memcpy(&p->ainsn, &old_p->ainsn, sizeof(struct arch_specific_insn));
|
||||
}
|
||||
|
||||
/*
|
||||
* Add the new probe to old_p->list. Fail if this is the
|
||||
* second jprobe at the address - two jprobes can't coexist
|
||||
*/
|
||||
static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p)
|
||||
{
|
||||
if (p->break_handler) {
|
||||
if (old_p->break_handler)
|
||||
return -EEXIST;
|
||||
list_add_tail_rcu(&p->list, &old_p->list);
|
||||
old_p->break_handler = aggr_break_handler;
|
||||
} else
|
||||
list_add_rcu(&p->list, &old_p->list);
|
||||
if (p->post_handler && !old_p->post_handler)
|
||||
old_p->post_handler = aggr_post_handler;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fill in the required fields of the "manager kprobe". Replace the
|
||||
* earlier kprobe in the hlist with the manager kprobe
|
||||
*/
|
||||
static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
|
||||
{
|
||||
copy_kprobe(p, ap);
|
||||
flush_insn_slot(ap);
|
||||
ap->addr = p->addr;
|
||||
ap->pre_handler = aggr_pre_handler;
|
||||
ap->fault_handler = aggr_fault_handler;
|
||||
if (p->post_handler)
|
||||
ap->post_handler = aggr_post_handler;
|
||||
if (p->break_handler)
|
||||
ap->break_handler = aggr_break_handler;
|
||||
|
||||
INIT_LIST_HEAD(&ap->list);
|
||||
list_add_rcu(&p->list, &ap->list);
|
||||
|
||||
hlist_replace_rcu(&p->hlist, &ap->hlist);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is the second or subsequent kprobe at the address - handle
|
||||
* the intricacies
|
||||
*/
|
||||
static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
|
||||
struct kprobe *p)
|
||||
{
|
||||
int ret = 0;
|
||||
struct kprobe *ap;
|
||||
|
||||
if (old_p->pre_handler == aggr_pre_handler) {
|
||||
copy_kprobe(old_p, p);
|
||||
ret = add_new_kprobe(old_p, p);
|
||||
} else {
|
||||
ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL);
|
||||
if (!ap)
|
||||
return -ENOMEM;
|
||||
add_aggr_kprobe(ap, old_p);
|
||||
copy_kprobe(ap, p);
|
||||
ret = add_new_kprobe(ap, p);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __kprobes in_kprobes_functions(unsigned long addr)
|
||||
{
|
||||
if (addr >= (unsigned long)__kprobes_text_start
|
||||
&& addr < (unsigned long)__kprobes_text_end)
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __kprobes __register_kprobe(struct kprobe *p,
|
||||
unsigned long called_from)
|
||||
{
|
||||
int ret = 0;
|
||||
struct kprobe *old_p;
|
||||
struct module *probed_mod;
|
||||
|
||||
/*
|
||||
* If we have a symbol_name argument look it up,
|
||||
* and add it to the address. That way the addr
|
||||
* field can either be global or relative to a symbol.
|
||||
*/
|
||||
if (p->symbol_name) {
|
||||
if (p->addr)
|
||||
return -EINVAL;
|
||||
kprobe_lookup_name(p->symbol_name, p->addr);
|
||||
}
|
||||
|
||||
if (!p->addr)
|
||||
return -EINVAL;
|
||||
p->addr = (kprobe_opcode_t *)(((char *)p->addr)+ p->offset);
|
||||
|
||||
if ((!kernel_text_address((unsigned long) p->addr)) ||
|
||||
in_kprobes_functions((unsigned long) p->addr))
|
||||
return -EINVAL;
|
||||
|
||||
p->mod_refcounted = 0;
|
||||
/* Check are we probing a module */
|
||||
if ((probed_mod = module_text_address((unsigned long) p->addr))) {
|
||||
struct module *calling_mod = module_text_address(called_from);
|
||||
/* We must allow modules to probe themself and
|
||||
* in this case avoid incrementing the module refcount,
|
||||
* so as to allow unloading of self probing modules.
|
||||
*/
|
||||
if (calling_mod && (calling_mod != probed_mod)) {
|
||||
if (unlikely(!try_module_get(probed_mod)))
|
||||
return -EINVAL;
|
||||
p->mod_refcounted = 1;
|
||||
} else
|
||||
probed_mod = NULL;
|
||||
}
|
||||
|
||||
p->nmissed = 0;
|
||||
mutex_lock(&kprobe_mutex);
|
||||
old_p = get_kprobe(p->addr);
|
||||
if (old_p) {
|
||||
ret = register_aggr_kprobe(old_p, p);
|
||||
if (!ret)
|
||||
atomic_inc(&kprobe_count);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ((ret = arch_prepare_kprobe(p)) != 0)
|
||||
goto out;
|
||||
|
||||
INIT_HLIST_NODE(&p->hlist);
|
||||
hlist_add_head_rcu(&p->hlist,
|
||||
&kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
|
||||
|
||||
if (atomic_add_return(1, &kprobe_count) == \
|
||||
(ARCH_INACTIVE_KPROBE_COUNT + 1))
|
||||
register_page_fault_notifier(&kprobe_page_fault_nb);
|
||||
|
||||
arch_arm_kprobe(p);
|
||||
|
||||
out:
|
||||
mutex_unlock(&kprobe_mutex);
|
||||
|
||||
if (ret && probed_mod)
|
||||
module_put(probed_mod);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __kprobes register_kprobe(struct kprobe *p)
|
||||
{
|
||||
return __register_kprobe(p,
|
||||
(unsigned long)__builtin_return_address(0));
|
||||
}
|
||||
|
||||
void __kprobes unregister_kprobe(struct kprobe *p)
|
||||
{
|
||||
struct module *mod;
|
||||
struct kprobe *old_p, *list_p;
|
||||
int cleanup_p;
|
||||
|
||||
mutex_lock(&kprobe_mutex);
|
||||
old_p = get_kprobe(p->addr);
|
||||
if (unlikely(!old_p)) {
|
||||
mutex_unlock(&kprobe_mutex);
|
||||
return;
|
||||
}
|
||||
if (p != old_p) {
|
||||
list_for_each_entry_rcu(list_p, &old_p->list, list)
|
||||
if (list_p == p)
|
||||
/* kprobe p is a valid probe */
|
||||
goto valid_p;
|
||||
mutex_unlock(&kprobe_mutex);
|
||||
return;
|
||||
}
|
||||
valid_p:
|
||||
if ((old_p == p) || ((old_p->pre_handler == aggr_pre_handler) &&
|
||||
(p->list.next == &old_p->list) &&
|
||||
(p->list.prev == &old_p->list))) {
|
||||
/* Only probe on the hash list */
|
||||
arch_disarm_kprobe(p);
|
||||
hlist_del_rcu(&old_p->hlist);
|
||||
cleanup_p = 1;
|
||||
} else {
|
||||
list_del_rcu(&p->list);
|
||||
cleanup_p = 0;
|
||||
}
|
||||
|
||||
mutex_unlock(&kprobe_mutex);
|
||||
|
||||
synchronize_sched();
|
||||
if (p->mod_refcounted &&
|
||||
(mod = module_text_address((unsigned long)p->addr)))
|
||||
module_put(mod);
|
||||
|
||||
if (cleanup_p) {
|
||||
if (p != old_p) {
|
||||
list_del_rcu(&p->list);
|
||||
kfree(old_p);
|
||||
}
|
||||
arch_remove_kprobe(p);
|
||||
} else {
|
||||
mutex_lock(&kprobe_mutex);
|
||||
if (p->break_handler)
|
||||
old_p->break_handler = NULL;
|
||||
if (p->post_handler){
|
||||
list_for_each_entry_rcu(list_p, &old_p->list, list){
|
||||
if (list_p->post_handler){
|
||||
cleanup_p = 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (cleanup_p == 0)
|
||||
old_p->post_handler = NULL;
|
||||
}
|
||||
mutex_unlock(&kprobe_mutex);
|
||||
}
|
||||
|
||||
/* Call unregister_page_fault_notifier()
|
||||
* if no probes are active
|
||||
*/
|
||||
mutex_lock(&kprobe_mutex);
|
||||
if (atomic_add_return(-1, &kprobe_count) == \
|
||||
ARCH_INACTIVE_KPROBE_COUNT)
|
||||
unregister_page_fault_notifier(&kprobe_page_fault_nb);
|
||||
mutex_unlock(&kprobe_mutex);
|
||||
return;
|
||||
}
|
||||
|
||||
static struct notifier_block kprobe_exceptions_nb = {
|
||||
.notifier_call = kprobe_exceptions_notify,
|
||||
.priority = 0x7fffffff /* we need to be notified first */
|
||||
};
|
||||
|
||||
|
||||
int __kprobes register_jprobe(struct jprobe *jp)
|
||||
{
|
||||
/* Todo: Verify probepoint is a function entry point */
|
||||
jp->kp.pre_handler = setjmp_pre_handler;
|
||||
jp->kp.break_handler = longjmp_break_handler;
|
||||
|
||||
return __register_kprobe(&jp->kp,
|
||||
(unsigned long)__builtin_return_address(0));
|
||||
}
|
||||
|
||||
void __kprobes unregister_jprobe(struct jprobe *jp)
|
||||
{
|
||||
unregister_kprobe(&jp->kp);
|
||||
}
|
||||
|
||||
#ifdef ARCH_SUPPORTS_KRETPROBES
|
||||
|
||||
/*
|
||||
* This kprobe pre_handler is registered with every kretprobe. When probe
|
||||
* hits it will set up the return probe.
|
||||
*/
|
||||
static int __kprobes pre_handler_kretprobe(struct kprobe *p,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
struct kretprobe *rp = container_of(p, struct kretprobe, kp);
|
||||
unsigned long flags = 0;
|
||||
|
||||
/*TODO: consider to only swap the RA after the last pre_handler fired */
|
||||
spin_lock_irqsave(&kretprobe_lock, flags);
|
||||
arch_prepare_kretprobe(rp, regs);
|
||||
spin_unlock_irqrestore(&kretprobe_lock, flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __kprobes register_kretprobe(struct kretprobe *rp)
|
||||
{
|
||||
int ret = 0;
|
||||
struct kretprobe_instance *inst;
|
||||
int i;
|
||||
|
||||
rp->kp.pre_handler = pre_handler_kretprobe;
|
||||
rp->kp.post_handler = NULL;
|
||||
rp->kp.fault_handler = NULL;
|
||||
rp->kp.break_handler = NULL;
|
||||
|
||||
/* Pre-allocate memory for max kretprobe instances */
|
||||
if (rp->maxactive <= 0) {
|
||||
#ifdef CONFIG_PREEMPT
|
||||
rp->maxactive = max(10, 2 * NR_CPUS);
|
||||
#else
|
||||
rp->maxactive = NR_CPUS;
|
||||
#endif
|
||||
}
|
||||
INIT_HLIST_HEAD(&rp->used_instances);
|
||||
INIT_HLIST_HEAD(&rp->free_instances);
|
||||
for (i = 0; i < rp->maxactive; i++) {
|
||||
inst = kmalloc(sizeof(struct kretprobe_instance), GFP_KERNEL);
|
||||
if (inst == NULL) {
|
||||
free_rp_inst(rp);
|
||||
return -ENOMEM;
|
||||
}
|
||||
INIT_HLIST_NODE(&inst->uflist);
|
||||
hlist_add_head(&inst->uflist, &rp->free_instances);
|
||||
}
|
||||
|
||||
rp->nmissed = 0;
|
||||
/* Establish function entry probe point */
|
||||
if ((ret = __register_kprobe(&rp->kp,
|
||||
(unsigned long)__builtin_return_address(0))) != 0)
|
||||
free_rp_inst(rp);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#else /* ARCH_SUPPORTS_KRETPROBES */
|
||||
|
||||
int __kprobes register_kretprobe(struct kretprobe *rp)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
static int __kprobes pre_handler_kretprobe(struct kprobe *p,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* ARCH_SUPPORTS_KRETPROBES */
|
||||
|
||||
void __kprobes unregister_kretprobe(struct kretprobe *rp)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct kretprobe_instance *ri;
|
||||
|
||||
unregister_kprobe(&rp->kp);
|
||||
/* No race here */
|
||||
spin_lock_irqsave(&kretprobe_lock, flags);
|
||||
while ((ri = get_used_rp_inst(rp)) != NULL) {
|
||||
ri->rp = NULL;
|
||||
hlist_del(&ri->uflist);
|
||||
}
|
||||
spin_unlock_irqrestore(&kretprobe_lock, flags);
|
||||
free_rp_inst(rp);
|
||||
}
|
||||
|
||||
static int __init init_kprobes(void)
|
||||
{
|
||||
int i, err = 0;
|
||||
|
||||
/* FIXME allocate the probe table, currently defined statically */
|
||||
/* initialize all list heads */
|
||||
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
|
||||
INIT_HLIST_HEAD(&kprobe_table[i]);
|
||||
INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
|
||||
}
|
||||
atomic_set(&kprobe_count, 0);
|
||||
|
||||
err = arch_init_kprobes();
|
||||
if (!err)
|
||||
err = register_die_notifier(&kprobe_exceptions_nb);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p,
|
||||
const char *sym, int offset,char *modname)
|
||||
{
|
||||
char *kprobe_type;
|
||||
|
||||
if (p->pre_handler == pre_handler_kretprobe)
|
||||
kprobe_type = "r";
|
||||
else if (p->pre_handler == setjmp_pre_handler)
|
||||
kprobe_type = "j";
|
||||
else
|
||||
kprobe_type = "k";
|
||||
if (sym)
|
||||
seq_printf(pi, "%p %s %s+0x%x %s\n", p->addr, kprobe_type,
|
||||
sym, offset, (modname ? modname : " "));
|
||||
else
|
||||
seq_printf(pi, "%p %s %p\n", p->addr, kprobe_type, p->addr);
|
||||
}
|
||||
|
||||
static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos)
|
||||
{
|
||||
return (*pos < KPROBE_TABLE_SIZE) ? pos : NULL;
|
||||
}
|
||||
|
||||
static void __kprobes *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos)
|
||||
{
|
||||
(*pos)++;
|
||||
if (*pos >= KPROBE_TABLE_SIZE)
|
||||
return NULL;
|
||||
return pos;
|
||||
}
|
||||
|
||||
static void __kprobes kprobe_seq_stop(struct seq_file *f, void *v)
|
||||
{
|
||||
/* Nothing to do */
|
||||
}
|
||||
|
||||
static int __kprobes show_kprobe_addr(struct seq_file *pi, void *v)
|
||||
{
|
||||
struct hlist_head *head;
|
||||
struct hlist_node *node;
|
||||
struct kprobe *p, *kp;
|
||||
const char *sym = NULL;
|
||||
unsigned int i = *(loff_t *) v;
|
||||
unsigned long size, offset = 0;
|
||||
char *modname, namebuf[128];
|
||||
|
||||
head = &kprobe_table[i];
|
||||
preempt_disable();
|
||||
hlist_for_each_entry_rcu(p, node, head, hlist) {
|
||||
sym = kallsyms_lookup((unsigned long)p->addr, &size,
|
||||
&offset, &modname, namebuf);
|
||||
if (p->pre_handler == aggr_pre_handler) {
|
||||
list_for_each_entry_rcu(kp, &p->list, list)
|
||||
report_probe(pi, kp, sym, offset, modname);
|
||||
} else
|
||||
report_probe(pi, p, sym, offset, modname);
|
||||
}
|
||||
preempt_enable();
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct seq_operations kprobes_seq_ops = {
|
||||
.start = kprobe_seq_start,
|
||||
.next = kprobe_seq_next,
|
||||
.stop = kprobe_seq_stop,
|
||||
.show = show_kprobe_addr
|
||||
};
|
||||
|
||||
static int __kprobes kprobes_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
return seq_open(filp, &kprobes_seq_ops);
|
||||
}
|
||||
|
||||
static struct file_operations debugfs_kprobes_operations = {
|
||||
.open = kprobes_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = seq_release,
|
||||
};
|
||||
|
||||
static int __kprobes debugfs_kprobe_init(void)
|
||||
{
|
||||
struct dentry *dir, *file;
|
||||
|
||||
dir = debugfs_create_dir("kprobes", NULL);
|
||||
if (!dir)
|
||||
return -ENOMEM;
|
||||
|
||||
file = debugfs_create_file("list", 0444, dir , 0 ,
|
||||
&debugfs_kprobes_operations);
|
||||
if (!file) {
|
||||
debugfs_remove(dir);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
late_initcall(debugfs_kprobe_init);
|
||||
#endif /* CONFIG_DEBUG_FS */
|
||||
|
||||
module_init(init_kprobes);
|
||||
|
||||
EXPORT_SYMBOL_GPL(register_kprobe);
|
||||
EXPORT_SYMBOL_GPL(unregister_kprobe);
|
||||
EXPORT_SYMBOL_GPL(register_jprobe);
|
||||
EXPORT_SYMBOL_GPL(unregister_jprobe);
|
||||
EXPORT_SYMBOL_GPL(jprobe_return);
|
||||
EXPORT_SYMBOL_GPL(register_kretprobe);
|
||||
EXPORT_SYMBOL_GPL(unregister_kretprobe);
|
||||
94
kernel/ksysfs.c
Normal file
94
kernel/ksysfs.c
Normal file
@@ -0,0 +1,94 @@
|
||||
/*
|
||||
* kernel/ksysfs.c - sysfs attributes in /sys/kernel, which
|
||||
* are not related to any other subsystem
|
||||
*
|
||||
* Copyright (C) 2004 Kay Sievers <kay.sievers@vrfy.org>
|
||||
*
|
||||
* This file is release under the GPLv2
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/kobject.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/sysfs.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kexec.h>
|
||||
|
||||
#define KERNEL_ATTR_RO(_name) \
|
||||
static struct subsys_attribute _name##_attr = __ATTR_RO(_name)
|
||||
|
||||
#define KERNEL_ATTR_RW(_name) \
|
||||
static struct subsys_attribute _name##_attr = \
|
||||
__ATTR(_name, 0644, _name##_show, _name##_store)
|
||||
|
||||
#if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
|
||||
/* current uevent sequence number */
|
||||
static ssize_t uevent_seqnum_show(struct subsystem *subsys, char *page)
|
||||
{
|
||||
return sprintf(page, "%llu\n", (unsigned long long)uevent_seqnum);
|
||||
}
|
||||
KERNEL_ATTR_RO(uevent_seqnum);
|
||||
|
||||
/* uevent helper program, used during early boo */
|
||||
static ssize_t uevent_helper_show(struct subsystem *subsys, char *page)
|
||||
{
|
||||
return sprintf(page, "%s\n", uevent_helper);
|
||||
}
|
||||
static ssize_t uevent_helper_store(struct subsystem *subsys, const char *page, size_t count)
|
||||
{
|
||||
if (count+1 > UEVENT_HELPER_PATH_LEN)
|
||||
return -ENOENT;
|
||||
memcpy(uevent_helper, page, count);
|
||||
uevent_helper[count] = '\0';
|
||||
if (count && uevent_helper[count-1] == '\n')
|
||||
uevent_helper[count-1] = '\0';
|
||||
return count;
|
||||
}
|
||||
KERNEL_ATTR_RW(uevent_helper);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KEXEC
|
||||
static ssize_t kexec_loaded_show(struct subsystem *subsys, char *page)
|
||||
{
|
||||
return sprintf(page, "%d\n", !!kexec_image);
|
||||
}
|
||||
KERNEL_ATTR_RO(kexec_loaded);
|
||||
|
||||
static ssize_t kexec_crash_loaded_show(struct subsystem *subsys, char *page)
|
||||
{
|
||||
return sprintf(page, "%d\n", !!kexec_crash_image);
|
||||
}
|
||||
KERNEL_ATTR_RO(kexec_crash_loaded);
|
||||
#endif /* CONFIG_KEXEC */
|
||||
|
||||
decl_subsys(kernel, NULL, NULL);
|
||||
EXPORT_SYMBOL_GPL(kernel_subsys);
|
||||
|
||||
static struct attribute * kernel_attrs[] = {
|
||||
#if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
|
||||
&uevent_seqnum_attr.attr,
|
||||
&uevent_helper_attr.attr,
|
||||
#endif
|
||||
#ifdef CONFIG_KEXEC
|
||||
&kexec_loaded_attr.attr,
|
||||
&kexec_crash_loaded_attr.attr,
|
||||
#endif
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct attribute_group kernel_attr_group = {
|
||||
.attrs = kernel_attrs,
|
||||
};
|
||||
|
||||
static int __init ksysfs_init(void)
|
||||
{
|
||||
int error = subsystem_register(&kernel_subsys);
|
||||
if (!error)
|
||||
error = sysfs_create_group(&kernel_subsys.kset.kobj,
|
||||
&kernel_attr_group);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
core_initcall(ksysfs_init);
|
||||
256
kernel/kthread.c
Normal file
256
kernel/kthread.c
Normal file
@@ -0,0 +1,256 @@
|
||||
/* Kernel thread helper functions.
|
||||
* Copyright (C) 2004 IBM Corporation, Rusty Russell.
|
||||
*
|
||||
* Creation is done via keventd, so that we get a clean environment
|
||||
* even if we're invoked from userspace (think modprobe, hotplug cpu,
|
||||
* etc.).
|
||||
*/
|
||||
#include <linux/sched.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/completion.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/unistd.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <asm/semaphore.h>
|
||||
|
||||
/*
|
||||
* We dont want to execute off keventd since it might
|
||||
* hold a semaphore our callers hold too:
|
||||
*/
|
||||
static struct workqueue_struct *helper_wq;
|
||||
|
||||
struct kthread_create_info
|
||||
{
|
||||
/* Information passed to kthread() from keventd. */
|
||||
int (*threadfn)(void *data);
|
||||
void *data;
|
||||
struct completion started;
|
||||
|
||||
/* Result passed back to kthread_create() from keventd. */
|
||||
struct task_struct *result;
|
||||
struct completion done;
|
||||
|
||||
struct work_struct work;
|
||||
};
|
||||
|
||||
struct kthread_stop_info
|
||||
{
|
||||
struct task_struct *k;
|
||||
int err;
|
||||
struct completion done;
|
||||
};
|
||||
|
||||
/* Thread stopping is done by setthing this var: lock serializes
|
||||
* multiple kthread_stop calls. */
|
||||
static DEFINE_MUTEX(kthread_stop_lock);
|
||||
static struct kthread_stop_info kthread_stop_info;
|
||||
|
||||
/**
|
||||
* kthread_should_stop - should this kthread return now?
|
||||
*
|
||||
* When someone calls kthread_stop() on your kthread, it will be woken
|
||||
* and this will return true. You should then return, and your return
|
||||
* value will be passed through to kthread_stop().
|
||||
*/
|
||||
int kthread_should_stop(void)
|
||||
{
|
||||
return (kthread_stop_info.k == current);
|
||||
}
|
||||
EXPORT_SYMBOL(kthread_should_stop);
|
||||
|
||||
static void kthread_exit_files(void)
|
||||
{
|
||||
struct fs_struct *fs;
|
||||
struct task_struct *tsk = current;
|
||||
|
||||
exit_fs(tsk); /* current->fs->count--; */
|
||||
fs = init_task.fs;
|
||||
tsk->fs = fs;
|
||||
atomic_inc(&fs->count);
|
||||
exit_files(tsk);
|
||||
current->files = init_task.files;
|
||||
atomic_inc(&tsk->files->count);
|
||||
}
|
||||
|
||||
static int kthread(void *_create)
|
||||
{
|
||||
struct kthread_create_info *create = _create;
|
||||
int (*threadfn)(void *data);
|
||||
void *data;
|
||||
sigset_t blocked;
|
||||
int ret = -EINTR;
|
||||
|
||||
kthread_exit_files();
|
||||
|
||||
/* Copy data: it's on keventd's stack */
|
||||
threadfn = create->threadfn;
|
||||
data = create->data;
|
||||
|
||||
/* Block and flush all signals (in case we're not from keventd). */
|
||||
sigfillset(&blocked);
|
||||
sigprocmask(SIG_BLOCK, &blocked, NULL);
|
||||
flush_signals(current);
|
||||
|
||||
/* By default we can run anywhere, unlike keventd. */
|
||||
set_cpus_allowed(current, CPU_MASK_ALL);
|
||||
|
||||
/* OK, tell user we're spawned, wait for stop or wakeup */
|
||||
__set_current_state(TASK_INTERRUPTIBLE);
|
||||
complete(&create->started);
|
||||
schedule();
|
||||
|
||||
if (!kthread_should_stop())
|
||||
ret = threadfn(data);
|
||||
|
||||
/* It might have exited on its own, w/o kthread_stop. Check. */
|
||||
if (kthread_should_stop()) {
|
||||
kthread_stop_info.err = ret;
|
||||
complete(&kthread_stop_info.done);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* We are keventd: create a thread. */
|
||||
static void keventd_create_kthread(struct work_struct *work)
|
||||
{
|
||||
struct kthread_create_info *create =
|
||||
container_of(work, struct kthread_create_info, work);
|
||||
int pid;
|
||||
|
||||
/* We want our own signal handler (we take no signals by default). */
|
||||
pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD);
|
||||
if (pid < 0) {
|
||||
create->result = ERR_PTR(pid);
|
||||
} else {
|
||||
wait_for_completion(&create->started);
|
||||
read_lock(&tasklist_lock);
|
||||
create->result = find_task_by_pid(pid);
|
||||
read_unlock(&tasklist_lock);
|
||||
}
|
||||
complete(&create->done);
|
||||
}
|
||||
|
||||
/**
|
||||
* kthread_create - create a kthread.
|
||||
* @threadfn: the function to run until signal_pending(current).
|
||||
* @data: data ptr for @threadfn.
|
||||
* @namefmt: printf-style name for the thread.
|
||||
*
|
||||
* Description: This helper function creates and names a kernel
|
||||
* thread. The thread will be stopped: use wake_up_process() to start
|
||||
* it. See also kthread_run(), kthread_create_on_cpu().
|
||||
*
|
||||
* When woken, the thread will run @threadfn() with @data as its
|
||||
* argument. @threadfn() can either call do_exit() directly if it is a
|
||||
* standalone thread for which noone will call kthread_stop(), or
|
||||
* return when 'kthread_should_stop()' is true (which means
|
||||
* kthread_stop() has been called). The return value should be zero
|
||||
* or a negative error number; it will be passed to kthread_stop().
|
||||
*
|
||||
* Returns a task_struct or ERR_PTR(-ENOMEM).
|
||||
*/
|
||||
struct task_struct *kthread_create(int (*threadfn)(void *data),
|
||||
void *data,
|
||||
const char namefmt[],
|
||||
...)
|
||||
{
|
||||
struct kthread_create_info create;
|
||||
|
||||
create.threadfn = threadfn;
|
||||
create.data = data;
|
||||
init_completion(&create.started);
|
||||
init_completion(&create.done);
|
||||
INIT_WORK(&create.work, keventd_create_kthread);
|
||||
|
||||
/*
|
||||
* The workqueue needs to start up first:
|
||||
*/
|
||||
if (!helper_wq)
|
||||
create.work.func(&create.work);
|
||||
else {
|
||||
queue_work(helper_wq, &create.work);
|
||||
wait_for_completion(&create.done);
|
||||
}
|
||||
if (!IS_ERR(create.result)) {
|
||||
va_list args;
|
||||
va_start(args, namefmt);
|
||||
vsnprintf(create.result->comm, sizeof(create.result->comm),
|
||||
namefmt, args);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
return create.result;
|
||||
}
|
||||
EXPORT_SYMBOL(kthread_create);
|
||||
|
||||
/**
|
||||
* kthread_bind - bind a just-created kthread to a cpu.
|
||||
* @k: thread created by kthread_create().
|
||||
* @cpu: cpu (might not be online, must be possible) for @k to run on.
|
||||
*
|
||||
* Description: This function is equivalent to set_cpus_allowed(),
|
||||
* except that @cpu doesn't need to be online, and the thread must be
|
||||
* stopped (i.e., just returned from kthread_create()).
|
||||
*/
|
||||
void kthread_bind(struct task_struct *k, unsigned int cpu)
|
||||
{
|
||||
BUG_ON(k->state != TASK_INTERRUPTIBLE);
|
||||
/* Must have done schedule() in kthread() before we set_task_cpu */
|
||||
wait_task_inactive(k);
|
||||
set_task_cpu(k, cpu);
|
||||
k->cpus_allowed = cpumask_of_cpu(cpu);
|
||||
}
|
||||
EXPORT_SYMBOL(kthread_bind);
|
||||
|
||||
/**
|
||||
* kthread_stop - stop a thread created by kthread_create().
|
||||
* @k: thread created by kthread_create().
|
||||
*
|
||||
* Sets kthread_should_stop() for @k to return true, wakes it, and
|
||||
* waits for it to exit. Your threadfn() must not call do_exit()
|
||||
* itself if you use this function! This can also be called after
|
||||
* kthread_create() instead of calling wake_up_process(): the thread
|
||||
* will exit without calling threadfn().
|
||||
*
|
||||
* Returns the result of threadfn(), or %-EINTR if wake_up_process()
|
||||
* was never called.
|
||||
*/
|
||||
int kthread_stop(struct task_struct *k)
|
||||
{
|
||||
int ret;
|
||||
|
||||
mutex_lock(&kthread_stop_lock);
|
||||
|
||||
/* It could exit after stop_info.k set, but before wake_up_process. */
|
||||
get_task_struct(k);
|
||||
|
||||
/* Must init completion *before* thread sees kthread_stop_info.k */
|
||||
init_completion(&kthread_stop_info.done);
|
||||
smp_wmb();
|
||||
|
||||
/* Now set kthread_should_stop() to true, and wake it up. */
|
||||
kthread_stop_info.k = k;
|
||||
wake_up_process(k);
|
||||
put_task_struct(k);
|
||||
|
||||
/* Once it dies, reset stop ptr, gather result and we're done. */
|
||||
wait_for_completion(&kthread_stop_info.done);
|
||||
kthread_stop_info.k = NULL;
|
||||
ret = kthread_stop_info.err;
|
||||
mutex_unlock(&kthread_stop_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(kthread_stop);
|
||||
|
||||
static __init int helper_init(void)
|
||||
{
|
||||
helper_wq = create_singlethread_workqueue("kthread");
|
||||
BUG_ON(!helper_wq);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
core_initcall(helper_init);
|
||||
280
kernel/latency.c
Normal file
280
kernel/latency.c
Normal file
@@ -0,0 +1,280 @@
|
||||
/*
|
||||
* latency.c: Explicit system-wide latency-expectation infrastructure
|
||||
*
|
||||
* The purpose of this infrastructure is to allow device drivers to set
|
||||
* latency constraint they have and to collect and summarize these
|
||||
* expectations globally. The cummulated result can then be used by
|
||||
* power management and similar users to make decisions that have
|
||||
* tradoffs with a latency component.
|
||||
*
|
||||
* An example user of this are the x86 C-states; each higher C state saves
|
||||
* more power, but has a higher exit latency. For the idle loop power
|
||||
* code to make a good decision which C-state to use, information about
|
||||
* acceptable latencies is required.
|
||||
*
|
||||
* An example announcer of latency is an audio driver that knowns it
|
||||
* will get an interrupt when the hardware has 200 usec of samples
|
||||
* left in the DMA buffer; in that case the driver can set a latency
|
||||
* constraint of, say, 150 usec.
|
||||
*
|
||||
* Multiple drivers can each announce their maximum accepted latency,
|
||||
* to keep these appart, a string based identifier is used.
|
||||
*
|
||||
*
|
||||
* (C) Copyright 2006 Intel Corporation
|
||||
* Author: Arjan van de Ven <arjan@linux.intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; version 2
|
||||
* of the License.
|
||||
*/
|
||||
|
||||
#include <linux/latency.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <asm/atomic.h>
|
||||
|
||||
struct latency_info {
|
||||
struct list_head list;
|
||||
int usecs;
|
||||
char *identifier;
|
||||
};
|
||||
|
||||
/*
|
||||
* locking rule: all modifications to current_max_latency and
|
||||
* latency_list need to be done while holding the latency_lock.
|
||||
* latency_lock needs to be taken _irqsave.
|
||||
*/
|
||||
static atomic_t current_max_latency;
|
||||
static DEFINE_SPINLOCK(latency_lock);
|
||||
|
||||
static LIST_HEAD(latency_list);
|
||||
static BLOCKING_NOTIFIER_HEAD(latency_notifier);
|
||||
|
||||
/*
|
||||
* This function returns the maximum latency allowed, which
|
||||
* happens to be the minimum of all maximum latencies on the
|
||||
* list.
|
||||
*/
|
||||
static int __find_max_latency(void)
|
||||
{
|
||||
int min = INFINITE_LATENCY;
|
||||
struct latency_info *info;
|
||||
|
||||
list_for_each_entry(info, &latency_list, list) {
|
||||
if (info->usecs < min)
|
||||
min = info->usecs;
|
||||
}
|
||||
return min;
|
||||
}
|
||||
|
||||
/**
|
||||
* set_acceptable_latency - sets the maximum latency acceptable
|
||||
* @identifier: string that identifies this driver
|
||||
* @usecs: maximum acceptable latency for this driver
|
||||
*
|
||||
* This function informs the kernel that this device(driver)
|
||||
* can accept at most usecs latency. This setting is used for
|
||||
* power management and similar tradeoffs.
|
||||
*
|
||||
* This function sleeps and can only be called from process
|
||||
* context.
|
||||
* Calling this function with an existing identifier is valid
|
||||
* and will cause the existing latency setting to be changed.
|
||||
*/
|
||||
void set_acceptable_latency(char *identifier, int usecs)
|
||||
{
|
||||
struct latency_info *info, *iter;
|
||||
unsigned long flags;
|
||||
int found_old = 0;
|
||||
|
||||
info = kzalloc(sizeof(struct latency_info), GFP_KERNEL);
|
||||
if (!info)
|
||||
return;
|
||||
info->usecs = usecs;
|
||||
info->identifier = kstrdup(identifier, GFP_KERNEL);
|
||||
if (!info->identifier)
|
||||
goto free_info;
|
||||
|
||||
spin_lock_irqsave(&latency_lock, flags);
|
||||
list_for_each_entry(iter, &latency_list, list) {
|
||||
if (strcmp(iter->identifier, identifier)==0) {
|
||||
found_old = 1;
|
||||
iter->usecs = usecs;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found_old)
|
||||
list_add(&info->list, &latency_list);
|
||||
|
||||
if (usecs < atomic_read(¤t_max_latency))
|
||||
atomic_set(¤t_max_latency, usecs);
|
||||
|
||||
spin_unlock_irqrestore(&latency_lock, flags);
|
||||
|
||||
blocking_notifier_call_chain(&latency_notifier,
|
||||
atomic_read(¤t_max_latency), NULL);
|
||||
|
||||
/*
|
||||
* if we inserted the new one, we're done; otherwise there was
|
||||
* an existing one so we need to free the redundant data
|
||||
*/
|
||||
if (!found_old)
|
||||
return;
|
||||
|
||||
kfree(info->identifier);
|
||||
free_info:
|
||||
kfree(info);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(set_acceptable_latency);
|
||||
|
||||
/**
|
||||
* modify_acceptable_latency - changes the maximum latency acceptable
|
||||
* @identifier: string that identifies this driver
|
||||
* @usecs: maximum acceptable latency for this driver
|
||||
*
|
||||
* This function informs the kernel that this device(driver)
|
||||
* can accept at most usecs latency. This setting is used for
|
||||
* power management and similar tradeoffs.
|
||||
*
|
||||
* This function does not sleep and can be called in any context.
|
||||
* Trying to use a non-existing identifier silently gets ignored.
|
||||
*
|
||||
* Due to the atomic nature of this function, the modified latency
|
||||
* value will only be used for future decisions; past decisions
|
||||
* can still lead to longer latencies in the near future.
|
||||
*/
|
||||
void modify_acceptable_latency(char *identifier, int usecs)
|
||||
{
|
||||
struct latency_info *iter;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&latency_lock, flags);
|
||||
list_for_each_entry(iter, &latency_list, list) {
|
||||
if (strcmp(iter->identifier, identifier) == 0) {
|
||||
iter->usecs = usecs;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (usecs < atomic_read(¤t_max_latency))
|
||||
atomic_set(¤t_max_latency, usecs);
|
||||
spin_unlock_irqrestore(&latency_lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(modify_acceptable_latency);
|
||||
|
||||
/**
|
||||
* remove_acceptable_latency - removes the maximum latency acceptable
|
||||
* @identifier: string that identifies this driver
|
||||
*
|
||||
* This function removes a previously set maximum latency setting
|
||||
* for the driver and frees up any resources associated with the
|
||||
* bookkeeping needed for this.
|
||||
*
|
||||
* This function does not sleep and can be called in any context.
|
||||
* Trying to use a non-existing identifier silently gets ignored.
|
||||
*/
|
||||
void remove_acceptable_latency(char *identifier)
|
||||
{
|
||||
unsigned long flags;
|
||||
int newmax = 0;
|
||||
struct latency_info *iter, *temp;
|
||||
|
||||
spin_lock_irqsave(&latency_lock, flags);
|
||||
|
||||
list_for_each_entry_safe(iter, temp, &latency_list, list) {
|
||||
if (strcmp(iter->identifier, identifier) == 0) {
|
||||
list_del(&iter->list);
|
||||
newmax = iter->usecs;
|
||||
kfree(iter->identifier);
|
||||
kfree(iter);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* If we just deleted the system wide value, we need to
|
||||
* recalculate with a full search
|
||||
*/
|
||||
if (newmax == atomic_read(¤t_max_latency)) {
|
||||
newmax = __find_max_latency();
|
||||
atomic_set(¤t_max_latency, newmax);
|
||||
}
|
||||
spin_unlock_irqrestore(&latency_lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(remove_acceptable_latency);
|
||||
|
||||
/**
|
||||
* system_latency_constraint - queries the system wide latency maximum
|
||||
*
|
||||
* This function returns the system wide maximum latency in
|
||||
* microseconds.
|
||||
*
|
||||
* This function does not sleep and can be called in any context.
|
||||
*/
|
||||
int system_latency_constraint(void)
|
||||
{
|
||||
return atomic_read(¤t_max_latency);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(system_latency_constraint);
|
||||
|
||||
/**
|
||||
* synchronize_acceptable_latency - recalculates all latency decisions
|
||||
*
|
||||
* This function will cause a callback to various kernel pieces that
|
||||
* will make those pieces rethink their latency decisions. This implies
|
||||
* that if there are overlong latencies in hardware state already, those
|
||||
* latencies get taken right now. When this call completes no overlong
|
||||
* latency decisions should be active anymore.
|
||||
*
|
||||
* Typical usecase of this is after a modify_acceptable_latency() call,
|
||||
* which in itself is non-blocking and non-synchronizing.
|
||||
*
|
||||
* This function blocks and should not be called with locks held.
|
||||
*/
|
||||
|
||||
void synchronize_acceptable_latency(void)
|
||||
{
|
||||
blocking_notifier_call_chain(&latency_notifier,
|
||||
atomic_read(¤t_max_latency), NULL);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(synchronize_acceptable_latency);
|
||||
|
||||
/*
|
||||
* Latency notifier: this notifier gets called when a non-atomic new
|
||||
* latency value gets set. The expectation nof the caller of the
|
||||
* non-atomic set is that when the call returns, future latencies
|
||||
* are within bounds, so the functions on the notifier list are
|
||||
* expected to take the overlong latencies immediately, inside the
|
||||
* callback, and not make a overlong latency decision anymore.
|
||||
*
|
||||
* The callback gets called when the new latency value is made
|
||||
* active so system_latency_constraint() returns the new latency.
|
||||
*/
|
||||
int register_latency_notifier(struct notifier_block * nb)
|
||||
{
|
||||
return blocking_notifier_chain_register(&latency_notifier, nb);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(register_latency_notifier);
|
||||
|
||||
int unregister_latency_notifier(struct notifier_block * nb)
|
||||
{
|
||||
return blocking_notifier_chain_unregister(&latency_notifier, nb);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(unregister_latency_notifier);
|
||||
|
||||
static __init int latency_init(void)
|
||||
{
|
||||
atomic_set(¤t_max_latency, INFINITE_LATENCY);
|
||||
/*
|
||||
* we don't want by default to have longer latencies than 2 ticks,
|
||||
* since that would cause lost ticks
|
||||
*/
|
||||
set_acceptable_latency("kernel", 2*1000000/HZ);
|
||||
return 0;
|
||||
}
|
||||
|
||||
module_init(latency_init);
|
||||
2799
kernel/lockdep.c
Normal file
2799
kernel/lockdep.c
Normal file
File diff suppressed because it is too large
Load Diff
78
kernel/lockdep_internals.h
Normal file
78
kernel/lockdep_internals.h
Normal file
@@ -0,0 +1,78 @@
|
||||
/*
|
||||
* kernel/lockdep_internals.h
|
||||
*
|
||||
* Runtime locking correctness validator
|
||||
*
|
||||
* lockdep subsystem internal functions and variables.
|
||||
*/
|
||||
|
||||
/*
|
||||
* MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies
|
||||
* we track.
|
||||
*
|
||||
* We use the per-lock dependency maps in two ways: we grow it by adding
|
||||
* every to-be-taken lock to all currently held lock's own dependency
|
||||
* table (if it's not there yet), and we check it for lock order
|
||||
* conflicts and deadlocks.
|
||||
*/
|
||||
#define MAX_LOCKDEP_ENTRIES 8192UL
|
||||
|
||||
#define MAX_LOCKDEP_KEYS_BITS 11
|
||||
#define MAX_LOCKDEP_KEYS (1UL << MAX_LOCKDEP_KEYS_BITS)
|
||||
|
||||
#define MAX_LOCKDEP_CHAINS_BITS 14
|
||||
#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS)
|
||||
|
||||
/*
|
||||
* Stack-trace: tightly packed array of stack backtrace
|
||||
* addresses. Protected by the hash_lock.
|
||||
*/
|
||||
#define MAX_STACK_TRACE_ENTRIES 262144UL
|
||||
|
||||
extern struct list_head all_lock_classes;
|
||||
|
||||
extern void
|
||||
get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4);
|
||||
|
||||
extern const char * __get_key_name(struct lockdep_subclass_key *key, char *str);
|
||||
|
||||
extern unsigned long nr_lock_classes;
|
||||
extern unsigned long nr_list_entries;
|
||||
extern unsigned long nr_lock_chains;
|
||||
extern unsigned long nr_stack_trace_entries;
|
||||
|
||||
extern unsigned int nr_hardirq_chains;
|
||||
extern unsigned int nr_softirq_chains;
|
||||
extern unsigned int nr_process_chains;
|
||||
extern unsigned int max_lockdep_depth;
|
||||
extern unsigned int max_recursion_depth;
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCKDEP
|
||||
/*
|
||||
* Various lockdep statistics:
|
||||
*/
|
||||
extern atomic_t chain_lookup_hits;
|
||||
extern atomic_t chain_lookup_misses;
|
||||
extern atomic_t hardirqs_on_events;
|
||||
extern atomic_t hardirqs_off_events;
|
||||
extern atomic_t redundant_hardirqs_on;
|
||||
extern atomic_t redundant_hardirqs_off;
|
||||
extern atomic_t softirqs_on_events;
|
||||
extern atomic_t softirqs_off_events;
|
||||
extern atomic_t redundant_softirqs_on;
|
||||
extern atomic_t redundant_softirqs_off;
|
||||
extern atomic_t nr_unused_locks;
|
||||
extern atomic_t nr_cyclic_checks;
|
||||
extern atomic_t nr_cyclic_check_recursions;
|
||||
extern atomic_t nr_find_usage_forwards_checks;
|
||||
extern atomic_t nr_find_usage_forwards_recursions;
|
||||
extern atomic_t nr_find_usage_backwards_checks;
|
||||
extern atomic_t nr_find_usage_backwards_recursions;
|
||||
# define debug_atomic_inc(ptr) atomic_inc(ptr)
|
||||
# define debug_atomic_dec(ptr) atomic_dec(ptr)
|
||||
# define debug_atomic_read(ptr) atomic_read(ptr)
|
||||
#else
|
||||
# define debug_atomic_inc(ptr) do { } while (0)
|
||||
# define debug_atomic_dec(ptr) do { } while (0)
|
||||
# define debug_atomic_read(ptr) 0
|
||||
#endif
|
||||
361
kernel/lockdep_proc.c
Normal file
361
kernel/lockdep_proc.c
Normal file
@@ -0,0 +1,361 @@
|
||||
/*
|
||||
* kernel/lockdep_proc.c
|
||||
*
|
||||
* Runtime locking correctness validator
|
||||
*
|
||||
* Started by Ingo Molnar:
|
||||
*
|
||||
* Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
|
||||
*
|
||||
* Code for /proc/lockdep and /proc/lockdep_stats:
|
||||
*
|
||||
*/
|
||||
#include <linux/module.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/debug_locks.h>
|
||||
|
||||
#include "lockdep_internals.h"
|
||||
|
||||
static void *l_next(struct seq_file *m, void *v, loff_t *pos)
|
||||
{
|
||||
struct lock_class *class = v;
|
||||
|
||||
(*pos)++;
|
||||
|
||||
if (class->lock_entry.next != &all_lock_classes)
|
||||
class = list_entry(class->lock_entry.next, struct lock_class,
|
||||
lock_entry);
|
||||
else
|
||||
class = NULL;
|
||||
m->private = class;
|
||||
|
||||
return class;
|
||||
}
|
||||
|
||||
static void *l_start(struct seq_file *m, loff_t *pos)
|
||||
{
|
||||
struct lock_class *class = m->private;
|
||||
|
||||
if (&class->lock_entry == all_lock_classes.next)
|
||||
seq_printf(m, "all lock classes:\n");
|
||||
|
||||
return class;
|
||||
}
|
||||
|
||||
static void l_stop(struct seq_file *m, void *v)
|
||||
{
|
||||
}
|
||||
|
||||
static unsigned long count_forward_deps(struct lock_class *class)
|
||||
{
|
||||
struct lock_list *entry;
|
||||
unsigned long ret = 1;
|
||||
|
||||
/*
|
||||
* Recurse this class's dependency list:
|
||||
*/
|
||||
list_for_each_entry(entry, &class->locks_after, entry)
|
||||
ret += count_forward_deps(entry->class);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static unsigned long count_backward_deps(struct lock_class *class)
|
||||
{
|
||||
struct lock_list *entry;
|
||||
unsigned long ret = 1;
|
||||
|
||||
/*
|
||||
* Recurse this class's dependency list:
|
||||
*/
|
||||
list_for_each_entry(entry, &class->locks_before, entry)
|
||||
ret += count_backward_deps(entry->class);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void print_name(struct seq_file *m, struct lock_class *class)
|
||||
{
|
||||
char str[128];
|
||||
const char *name = class->name;
|
||||
|
||||
if (!name) {
|
||||
name = __get_key_name(class->key, str);
|
||||
seq_printf(m, "%s", name);
|
||||
} else{
|
||||
seq_printf(m, "%s", name);
|
||||
if (class->name_version > 1)
|
||||
seq_printf(m, "#%d", class->name_version);
|
||||
if (class->subclass)
|
||||
seq_printf(m, "/%d", class->subclass);
|
||||
}
|
||||
}
|
||||
|
||||
static int l_show(struct seq_file *m, void *v)
|
||||
{
|
||||
unsigned long nr_forward_deps, nr_backward_deps;
|
||||
struct lock_class *class = m->private;
|
||||
struct lock_list *entry;
|
||||
char c1, c2, c3, c4;
|
||||
|
||||
seq_printf(m, "%p", class->key);
|
||||
#ifdef CONFIG_DEBUG_LOCKDEP
|
||||
seq_printf(m, " OPS:%8ld", class->ops);
|
||||
#endif
|
||||
nr_forward_deps = count_forward_deps(class);
|
||||
seq_printf(m, " FD:%5ld", nr_forward_deps);
|
||||
|
||||
nr_backward_deps = count_backward_deps(class);
|
||||
seq_printf(m, " BD:%5ld", nr_backward_deps);
|
||||
|
||||
get_usage_chars(class, &c1, &c2, &c3, &c4);
|
||||
seq_printf(m, " %c%c%c%c", c1, c2, c3, c4);
|
||||
|
||||
seq_printf(m, ": ");
|
||||
print_name(m, class);
|
||||
seq_puts(m, "\n");
|
||||
|
||||
list_for_each_entry(entry, &class->locks_after, entry) {
|
||||
if (entry->distance == 1) {
|
||||
seq_printf(m, " -> [%p] ", entry->class);
|
||||
print_name(m, entry->class);
|
||||
seq_puts(m, "\n");
|
||||
}
|
||||
}
|
||||
seq_puts(m, "\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct seq_operations lockdep_ops = {
|
||||
.start = l_start,
|
||||
.next = l_next,
|
||||
.stop = l_stop,
|
||||
.show = l_show,
|
||||
};
|
||||
|
||||
static int lockdep_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
int res = seq_open(file, &lockdep_ops);
|
||||
if (!res) {
|
||||
struct seq_file *m = file->private_data;
|
||||
|
||||
if (!list_empty(&all_lock_classes))
|
||||
m->private = list_entry(all_lock_classes.next,
|
||||
struct lock_class, lock_entry);
|
||||
else
|
||||
m->private = NULL;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
static const struct file_operations proc_lockdep_operations = {
|
||||
.open = lockdep_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = seq_release,
|
||||
};
|
||||
|
||||
static void lockdep_stats_debug_show(struct seq_file *m)
|
||||
{
|
||||
#ifdef CONFIG_DEBUG_LOCKDEP
|
||||
unsigned int hi1 = debug_atomic_read(&hardirqs_on_events),
|
||||
hi2 = debug_atomic_read(&hardirqs_off_events),
|
||||
hr1 = debug_atomic_read(&redundant_hardirqs_on),
|
||||
hr2 = debug_atomic_read(&redundant_hardirqs_off),
|
||||
si1 = debug_atomic_read(&softirqs_on_events),
|
||||
si2 = debug_atomic_read(&softirqs_off_events),
|
||||
sr1 = debug_atomic_read(&redundant_softirqs_on),
|
||||
sr2 = debug_atomic_read(&redundant_softirqs_off);
|
||||
|
||||
seq_printf(m, " chain lookup misses: %11u\n",
|
||||
debug_atomic_read(&chain_lookup_misses));
|
||||
seq_printf(m, " chain lookup hits: %11u\n",
|
||||
debug_atomic_read(&chain_lookup_hits));
|
||||
seq_printf(m, " cyclic checks: %11u\n",
|
||||
debug_atomic_read(&nr_cyclic_checks));
|
||||
seq_printf(m, " cyclic-check recursions: %11u\n",
|
||||
debug_atomic_read(&nr_cyclic_check_recursions));
|
||||
seq_printf(m, " find-mask forwards checks: %11u\n",
|
||||
debug_atomic_read(&nr_find_usage_forwards_checks));
|
||||
seq_printf(m, " find-mask forwards recursions: %11u\n",
|
||||
debug_atomic_read(&nr_find_usage_forwards_recursions));
|
||||
seq_printf(m, " find-mask backwards checks: %11u\n",
|
||||
debug_atomic_read(&nr_find_usage_backwards_checks));
|
||||
seq_printf(m, " find-mask backwards recursions:%11u\n",
|
||||
debug_atomic_read(&nr_find_usage_backwards_recursions));
|
||||
|
||||
seq_printf(m, " hardirq on events: %11u\n", hi1);
|
||||
seq_printf(m, " hardirq off events: %11u\n", hi2);
|
||||
seq_printf(m, " redundant hardirq ons: %11u\n", hr1);
|
||||
seq_printf(m, " redundant hardirq offs: %11u\n", hr2);
|
||||
seq_printf(m, " softirq on events: %11u\n", si1);
|
||||
seq_printf(m, " softirq off events: %11u\n", si2);
|
||||
seq_printf(m, " redundant softirq ons: %11u\n", sr1);
|
||||
seq_printf(m, " redundant softirq offs: %11u\n", sr2);
|
||||
#endif
|
||||
}
|
||||
|
||||
static int lockdep_stats_show(struct seq_file *m, void *v)
|
||||
{
|
||||
struct lock_class *class;
|
||||
unsigned long nr_unused = 0, nr_uncategorized = 0,
|
||||
nr_irq_safe = 0, nr_irq_unsafe = 0,
|
||||
nr_softirq_safe = 0, nr_softirq_unsafe = 0,
|
||||
nr_hardirq_safe = 0, nr_hardirq_unsafe = 0,
|
||||
nr_irq_read_safe = 0, nr_irq_read_unsafe = 0,
|
||||
nr_softirq_read_safe = 0, nr_softirq_read_unsafe = 0,
|
||||
nr_hardirq_read_safe = 0, nr_hardirq_read_unsafe = 0,
|
||||
sum_forward_deps = 0, factor = 0;
|
||||
|
||||
list_for_each_entry(class, &all_lock_classes, lock_entry) {
|
||||
|
||||
if (class->usage_mask == 0)
|
||||
nr_unused++;
|
||||
if (class->usage_mask == LOCKF_USED)
|
||||
nr_uncategorized++;
|
||||
if (class->usage_mask & LOCKF_USED_IN_IRQ)
|
||||
nr_irq_safe++;
|
||||
if (class->usage_mask & LOCKF_ENABLED_IRQS)
|
||||
nr_irq_unsafe++;
|
||||
if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ)
|
||||
nr_softirq_safe++;
|
||||
if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS)
|
||||
nr_softirq_unsafe++;
|
||||
if (class->usage_mask & LOCKF_USED_IN_HARDIRQ)
|
||||
nr_hardirq_safe++;
|
||||
if (class->usage_mask & LOCKF_ENABLED_HARDIRQS)
|
||||
nr_hardirq_unsafe++;
|
||||
if (class->usage_mask & LOCKF_USED_IN_IRQ_READ)
|
||||
nr_irq_read_safe++;
|
||||
if (class->usage_mask & LOCKF_ENABLED_IRQS_READ)
|
||||
nr_irq_read_unsafe++;
|
||||
if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ)
|
||||
nr_softirq_read_safe++;
|
||||
if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ)
|
||||
nr_softirq_read_unsafe++;
|
||||
if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ)
|
||||
nr_hardirq_read_safe++;
|
||||
if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ)
|
||||
nr_hardirq_read_unsafe++;
|
||||
|
||||
sum_forward_deps += count_forward_deps(class);
|
||||
}
|
||||
#ifdef CONFIG_DEBUG_LOCKDEP
|
||||
DEBUG_LOCKS_WARN_ON(debug_atomic_read(&nr_unused_locks) != nr_unused);
|
||||
#endif
|
||||
seq_printf(m, " lock-classes: %11lu [max: %lu]\n",
|
||||
nr_lock_classes, MAX_LOCKDEP_KEYS);
|
||||
seq_printf(m, " direct dependencies: %11lu [max: %lu]\n",
|
||||
nr_list_entries, MAX_LOCKDEP_ENTRIES);
|
||||
seq_printf(m, " indirect dependencies: %11lu\n",
|
||||
sum_forward_deps);
|
||||
|
||||
/*
|
||||
* Total number of dependencies:
|
||||
*
|
||||
* All irq-safe locks may nest inside irq-unsafe locks,
|
||||
* plus all the other known dependencies:
|
||||
*/
|
||||
seq_printf(m, " all direct dependencies: %11lu\n",
|
||||
nr_irq_unsafe * nr_irq_safe +
|
||||
nr_hardirq_unsafe * nr_hardirq_safe +
|
||||
nr_list_entries);
|
||||
|
||||
/*
|
||||
* Estimated factor between direct and indirect
|
||||
* dependencies:
|
||||
*/
|
||||
if (nr_list_entries)
|
||||
factor = sum_forward_deps / nr_list_entries;
|
||||
|
||||
seq_printf(m, " dependency chains: %11lu [max: %lu]\n",
|
||||
nr_lock_chains, MAX_LOCKDEP_CHAINS);
|
||||
|
||||
#ifdef CONFIG_TRACE_IRQFLAGS
|
||||
seq_printf(m, " in-hardirq chains: %11u\n",
|
||||
nr_hardirq_chains);
|
||||
seq_printf(m, " in-softirq chains: %11u\n",
|
||||
nr_softirq_chains);
|
||||
#endif
|
||||
seq_printf(m, " in-process chains: %11u\n",
|
||||
nr_process_chains);
|
||||
seq_printf(m, " stack-trace entries: %11lu [max: %lu]\n",
|
||||
nr_stack_trace_entries, MAX_STACK_TRACE_ENTRIES);
|
||||
seq_printf(m, " combined max dependencies: %11u\n",
|
||||
(nr_hardirq_chains + 1) *
|
||||
(nr_softirq_chains + 1) *
|
||||
(nr_process_chains + 1)
|
||||
);
|
||||
seq_printf(m, " hardirq-safe locks: %11lu\n",
|
||||
nr_hardirq_safe);
|
||||
seq_printf(m, " hardirq-unsafe locks: %11lu\n",
|
||||
nr_hardirq_unsafe);
|
||||
seq_printf(m, " softirq-safe locks: %11lu\n",
|
||||
nr_softirq_safe);
|
||||
seq_printf(m, " softirq-unsafe locks: %11lu\n",
|
||||
nr_softirq_unsafe);
|
||||
seq_printf(m, " irq-safe locks: %11lu\n",
|
||||
nr_irq_safe);
|
||||
seq_printf(m, " irq-unsafe locks: %11lu\n",
|
||||
nr_irq_unsafe);
|
||||
|
||||
seq_printf(m, " hardirq-read-safe locks: %11lu\n",
|
||||
nr_hardirq_read_safe);
|
||||
seq_printf(m, " hardirq-read-unsafe locks: %11lu\n",
|
||||
nr_hardirq_read_unsafe);
|
||||
seq_printf(m, " softirq-read-safe locks: %11lu\n",
|
||||
nr_softirq_read_safe);
|
||||
seq_printf(m, " softirq-read-unsafe locks: %11lu\n",
|
||||
nr_softirq_read_unsafe);
|
||||
seq_printf(m, " irq-read-safe locks: %11lu\n",
|
||||
nr_irq_read_safe);
|
||||
seq_printf(m, " irq-read-unsafe locks: %11lu\n",
|
||||
nr_irq_read_unsafe);
|
||||
|
||||
seq_printf(m, " uncategorized locks: %11lu\n",
|
||||
nr_uncategorized);
|
||||
seq_printf(m, " unused locks: %11lu\n",
|
||||
nr_unused);
|
||||
seq_printf(m, " max locking depth: %11u\n",
|
||||
max_lockdep_depth);
|
||||
seq_printf(m, " max recursion depth: %11u\n",
|
||||
max_recursion_depth);
|
||||
lockdep_stats_debug_show(m);
|
||||
seq_printf(m, " debug_locks: %11u\n",
|
||||
debug_locks);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int lockdep_stats_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, lockdep_stats_show, NULL);
|
||||
}
|
||||
|
||||
static const struct file_operations proc_lockdep_stats_operations = {
|
||||
.open = lockdep_stats_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = seq_release,
|
||||
};
|
||||
|
||||
static int __init lockdep_proc_init(void)
|
||||
{
|
||||
struct proc_dir_entry *entry;
|
||||
|
||||
entry = create_proc_entry("lockdep", S_IRUSR, NULL);
|
||||
if (entry)
|
||||
entry->proc_fops = &proc_lockdep_operations;
|
||||
|
||||
entry = create_proc_entry("lockdep_stats", S_IRUSR, NULL);
|
||||
if (entry)
|
||||
entry->proc_fops = &proc_lockdep_stats_operations;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
__initcall(lockdep_proc_init);
|
||||
|
||||
2440
kernel/module.c
Normal file
2440
kernel/module.c
Normal file
File diff suppressed because it is too large
Load Diff
116
kernel/mutex-debug.c
Normal file
116
kernel/mutex-debug.c
Normal file
@@ -0,0 +1,116 @@
|
||||
/*
|
||||
* kernel/mutex-debug.c
|
||||
*
|
||||
* Debugging code for mutexes
|
||||
*
|
||||
* Started by Ingo Molnar:
|
||||
*
|
||||
* Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
|
||||
*
|
||||
* lock debugging, locking tree, deadlock detection started by:
|
||||
*
|
||||
* Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey
|
||||
* Released under the General Public License (GPL).
|
||||
*/
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/poison.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/debug_locks.h>
|
||||
|
||||
#include "mutex-debug.h"
|
||||
|
||||
/*
|
||||
* Must be called with lock->wait_lock held.
|
||||
*/
|
||||
void debug_mutex_set_owner(struct mutex *lock, struct thread_info *new_owner)
|
||||
{
|
||||
lock->owner = new_owner;
|
||||
}
|
||||
|
||||
void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
|
||||
{
|
||||
memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter));
|
||||
waiter->magic = waiter;
|
||||
INIT_LIST_HEAD(&waiter->list);
|
||||
}
|
||||
|
||||
void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter)
|
||||
{
|
||||
SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock));
|
||||
DEBUG_LOCKS_WARN_ON(list_empty(&lock->wait_list));
|
||||
DEBUG_LOCKS_WARN_ON(waiter->magic != waiter);
|
||||
DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
|
||||
}
|
||||
|
||||
void debug_mutex_free_waiter(struct mutex_waiter *waiter)
|
||||
{
|
||||
DEBUG_LOCKS_WARN_ON(!list_empty(&waiter->list));
|
||||
memset(waiter, MUTEX_DEBUG_FREE, sizeof(*waiter));
|
||||
}
|
||||
|
||||
void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
|
||||
struct thread_info *ti)
|
||||
{
|
||||
SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock));
|
||||
|
||||
/* Mark the current thread as blocked on the lock: */
|
||||
ti->task->blocked_on = waiter;
|
||||
waiter->lock = lock;
|
||||
}
|
||||
|
||||
void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
|
||||
struct thread_info *ti)
|
||||
{
|
||||
DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
|
||||
DEBUG_LOCKS_WARN_ON(waiter->task != ti->task);
|
||||
DEBUG_LOCKS_WARN_ON(ti->task->blocked_on != waiter);
|
||||
ti->task->blocked_on = NULL;
|
||||
|
||||
list_del_init(&waiter->list);
|
||||
waiter->task = NULL;
|
||||
}
|
||||
|
||||
void debug_mutex_unlock(struct mutex *lock)
|
||||
{
|
||||
if (unlikely(!debug_locks))
|
||||
return;
|
||||
|
||||
DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info());
|
||||
DEBUG_LOCKS_WARN_ON(lock->magic != lock);
|
||||
DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
|
||||
DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info());
|
||||
}
|
||||
|
||||
void debug_mutex_init(struct mutex *lock, const char *name,
|
||||
struct lock_class_key *key)
|
||||
{
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
/*
|
||||
* Make sure we are not reinitializing a held lock:
|
||||
*/
|
||||
debug_check_no_locks_freed((void *)lock, sizeof(*lock));
|
||||
lockdep_init_map(&lock->dep_map, name, key, 0);
|
||||
#endif
|
||||
lock->owner = NULL;
|
||||
lock->magic = lock;
|
||||
}
|
||||
|
||||
/***
|
||||
* mutex_destroy - mark a mutex unusable
|
||||
* @lock: the mutex to be destroyed
|
||||
*
|
||||
* This function marks the mutex uninitialized, and any subsequent
|
||||
* use of the mutex is forbidden. The mutex must not be locked when
|
||||
* this function is called.
|
||||
*/
|
||||
void fastcall mutex_destroy(struct mutex *lock)
|
||||
{
|
||||
DEBUG_LOCKS_WARN_ON(mutex_is_locked(lock));
|
||||
lock->magic = NULL;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(mutex_destroy);
|
||||
53
kernel/mutex-debug.h
Normal file
53
kernel/mutex-debug.h
Normal file
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
* Mutexes: blocking mutual exclusion locks
|
||||
*
|
||||
* started by Ingo Molnar:
|
||||
*
|
||||
* Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
|
||||
*
|
||||
* This file contains mutex debugging related internal declarations,
|
||||
* prototypes and inline functions, for the CONFIG_DEBUG_MUTEXES case.
|
||||
* More details are in kernel/mutex-debug.c.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This must be called with lock->wait_lock held.
|
||||
*/
|
||||
extern void
|
||||
debug_mutex_set_owner(struct mutex *lock, struct thread_info *new_owner);
|
||||
|
||||
static inline void debug_mutex_clear_owner(struct mutex *lock)
|
||||
{
|
||||
lock->owner = NULL;
|
||||
}
|
||||
|
||||
extern void debug_mutex_lock_common(struct mutex *lock,
|
||||
struct mutex_waiter *waiter);
|
||||
extern void debug_mutex_wake_waiter(struct mutex *lock,
|
||||
struct mutex_waiter *waiter);
|
||||
extern void debug_mutex_free_waiter(struct mutex_waiter *waiter);
|
||||
extern void debug_mutex_add_waiter(struct mutex *lock,
|
||||
struct mutex_waiter *waiter,
|
||||
struct thread_info *ti);
|
||||
extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
|
||||
struct thread_info *ti);
|
||||
extern void debug_mutex_unlock(struct mutex *lock);
|
||||
extern void debug_mutex_init(struct mutex *lock, const char *name,
|
||||
struct lock_class_key *key);
|
||||
|
||||
#define spin_lock_mutex(lock, flags) \
|
||||
do { \
|
||||
struct mutex *l = container_of(lock, struct mutex, wait_lock); \
|
||||
\
|
||||
DEBUG_LOCKS_WARN_ON(in_interrupt()); \
|
||||
local_irq_save(flags); \
|
||||
__raw_spin_lock(&(lock)->raw_lock); \
|
||||
DEBUG_LOCKS_WARN_ON(l->magic != l); \
|
||||
} while (0)
|
||||
|
||||
#define spin_unlock_mutex(lock, flags) \
|
||||
do { \
|
||||
__raw_spin_unlock(&(lock)->raw_lock); \
|
||||
local_irq_restore(flags); \
|
||||
preempt_check_resched(); \
|
||||
} while (0)
|
||||
347
kernel/mutex.c
Normal file
347
kernel/mutex.c
Normal file
@@ -0,0 +1,347 @@
|
||||
/*
|
||||
* kernel/mutex.c
|
||||
*
|
||||
* Mutexes: blocking mutual exclusion locks
|
||||
*
|
||||
* Started by Ingo Molnar:
|
||||
*
|
||||
* Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
|
||||
*
|
||||
* Many thanks to Arjan van de Ven, Thomas Gleixner, Steven Rostedt and
|
||||
* David Howells for suggestions and improvements.
|
||||
*
|
||||
* Also see Documentation/mutex-design.txt.
|
||||
*/
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/debug_locks.h>
|
||||
|
||||
/*
|
||||
* In the DEBUG case we are using the "NULL fastpath" for mutexes,
|
||||
* which forces all calls into the slowpath:
|
||||
*/
|
||||
#ifdef CONFIG_DEBUG_MUTEXES
|
||||
# include "mutex-debug.h"
|
||||
# include <asm-generic/mutex-null.h>
|
||||
#else
|
||||
# include "mutex.h"
|
||||
# include <asm/mutex.h>
|
||||
#endif
|
||||
|
||||
/***
|
||||
* mutex_init - initialize the mutex
|
||||
* @lock: the mutex to be initialized
|
||||
*
|
||||
* Initialize the mutex to unlocked state.
|
||||
*
|
||||
* It is not allowed to initialize an already locked mutex.
|
||||
*/
|
||||
void
|
||||
__mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
|
||||
{
|
||||
atomic_set(&lock->count, 1);
|
||||
spin_lock_init(&lock->wait_lock);
|
||||
INIT_LIST_HEAD(&lock->wait_list);
|
||||
|
||||
debug_mutex_init(lock, name, key);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(__mutex_init);
|
||||
|
||||
/*
|
||||
* We split the mutex lock/unlock logic into separate fastpath and
|
||||
* slowpath functions, to reduce the register pressure on the fastpath.
|
||||
* We also put the fastpath first in the kernel image, to make sure the
|
||||
* branch is predicted by the CPU as default-untaken.
|
||||
*/
|
||||
static void fastcall noinline __sched
|
||||
__mutex_lock_slowpath(atomic_t *lock_count);
|
||||
|
||||
/***
|
||||
* mutex_lock - acquire the mutex
|
||||
* @lock: the mutex to be acquired
|
||||
*
|
||||
* Lock the mutex exclusively for this task. If the mutex is not
|
||||
* available right now, it will sleep until it can get it.
|
||||
*
|
||||
* The mutex must later on be released by the same task that
|
||||
* acquired it. Recursive locking is not allowed. The task
|
||||
* may not exit without first unlocking the mutex. Also, kernel
|
||||
* memory where the mutex resides mutex must not be freed with
|
||||
* the mutex still locked. The mutex must first be initialized
|
||||
* (or statically defined) before it can be locked. memset()-ing
|
||||
* the mutex to 0 is not allowed.
|
||||
*
|
||||
* ( The CONFIG_DEBUG_MUTEXES .config option turns on debugging
|
||||
* checks that will enforce the restrictions and will also do
|
||||
* deadlock debugging. )
|
||||
*
|
||||
* This function is similar to (but not equivalent to) down().
|
||||
*/
|
||||
void inline fastcall __sched mutex_lock(struct mutex *lock)
|
||||
{
|
||||
might_sleep();
|
||||
/*
|
||||
* The locking fastpath is the 1->0 transition from
|
||||
* 'unlocked' into 'locked' state.
|
||||
*/
|
||||
__mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(mutex_lock);
|
||||
|
||||
static void fastcall noinline __sched
|
||||
__mutex_unlock_slowpath(atomic_t *lock_count);
|
||||
|
||||
/***
|
||||
* mutex_unlock - release the mutex
|
||||
* @lock: the mutex to be released
|
||||
*
|
||||
* Unlock a mutex that has been locked by this task previously.
|
||||
*
|
||||
* This function must not be used in interrupt context. Unlocking
|
||||
* of a not locked mutex is not allowed.
|
||||
*
|
||||
* This function is similar to (but not equivalent to) up().
|
||||
*/
|
||||
void fastcall __sched mutex_unlock(struct mutex *lock)
|
||||
{
|
||||
/*
|
||||
* The unlocking fastpath is the 0->1 transition from 'locked'
|
||||
* into 'unlocked' state:
|
||||
*/
|
||||
__mutex_fastpath_unlock(&lock->count, __mutex_unlock_slowpath);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(mutex_unlock);
|
||||
|
||||
/*
|
||||
* Lock a mutex (possibly interruptible), slowpath:
|
||||
*/
|
||||
static inline int __sched
|
||||
__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass)
|
||||
{
|
||||
struct task_struct *task = current;
|
||||
struct mutex_waiter waiter;
|
||||
unsigned int old_val;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_mutex(&lock->wait_lock, flags);
|
||||
|
||||
debug_mutex_lock_common(lock, &waiter);
|
||||
mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
|
||||
debug_mutex_add_waiter(lock, &waiter, task->thread_info);
|
||||
|
||||
/* add waiting tasks to the end of the waitqueue (FIFO): */
|
||||
list_add_tail(&waiter.list, &lock->wait_list);
|
||||
waiter.task = task;
|
||||
|
||||
for (;;) {
|
||||
/*
|
||||
* Lets try to take the lock again - this is needed even if
|
||||
* we get here for the first time (shortly after failing to
|
||||
* acquire the lock), to make sure that we get a wakeup once
|
||||
* it's unlocked. Later on, if we sleep, this is the
|
||||
* operation that gives us the lock. We xchg it to -1, so
|
||||
* that when we release the lock, we properly wake up the
|
||||
* other waiters:
|
||||
*/
|
||||
old_val = atomic_xchg(&lock->count, -1);
|
||||
if (old_val == 1)
|
||||
break;
|
||||
|
||||
/*
|
||||
* got a signal? (This code gets eliminated in the
|
||||
* TASK_UNINTERRUPTIBLE case.)
|
||||
*/
|
||||
if (unlikely(state == TASK_INTERRUPTIBLE &&
|
||||
signal_pending(task))) {
|
||||
mutex_remove_waiter(lock, &waiter, task->thread_info);
|
||||
mutex_release(&lock->dep_map, 1, _RET_IP_);
|
||||
spin_unlock_mutex(&lock->wait_lock, flags);
|
||||
|
||||
debug_mutex_free_waiter(&waiter);
|
||||
return -EINTR;
|
||||
}
|
||||
__set_task_state(task, state);
|
||||
|
||||
/* didnt get the lock, go to sleep: */
|
||||
spin_unlock_mutex(&lock->wait_lock, flags);
|
||||
schedule();
|
||||
spin_lock_mutex(&lock->wait_lock, flags);
|
||||
}
|
||||
|
||||
/* got the lock - rejoice! */
|
||||
mutex_remove_waiter(lock, &waiter, task->thread_info);
|
||||
debug_mutex_set_owner(lock, task->thread_info);
|
||||
|
||||
/* set it to 0 if there are no waiters left: */
|
||||
if (likely(list_empty(&lock->wait_list)))
|
||||
atomic_set(&lock->count, 0);
|
||||
|
||||
spin_unlock_mutex(&lock->wait_lock, flags);
|
||||
|
||||
debug_mutex_free_waiter(&waiter);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void fastcall noinline __sched
|
||||
__mutex_lock_slowpath(atomic_t *lock_count)
|
||||
{
|
||||
struct mutex *lock = container_of(lock_count, struct mutex, count);
|
||||
|
||||
__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
void __sched
|
||||
mutex_lock_nested(struct mutex *lock, unsigned int subclass)
|
||||
{
|
||||
might_sleep();
|
||||
__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(mutex_lock_nested);
|
||||
|
||||
int __sched
|
||||
mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass)
|
||||
{
|
||||
might_sleep();
|
||||
return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Release the lock, slowpath:
|
||||
*/
|
||||
static fastcall inline void
|
||||
__mutex_unlock_common_slowpath(atomic_t *lock_count, int nested)
|
||||
{
|
||||
struct mutex *lock = container_of(lock_count, struct mutex, count);
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_mutex(&lock->wait_lock, flags);
|
||||
mutex_release(&lock->dep_map, nested, _RET_IP_);
|
||||
debug_mutex_unlock(lock);
|
||||
|
||||
/*
|
||||
* some architectures leave the lock unlocked in the fastpath failure
|
||||
* case, others need to leave it locked. In the later case we have to
|
||||
* unlock it here
|
||||
*/
|
||||
if (__mutex_slowpath_needs_to_unlock())
|
||||
atomic_set(&lock->count, 1);
|
||||
|
||||
if (!list_empty(&lock->wait_list)) {
|
||||
/* get the first entry from the wait-list: */
|
||||
struct mutex_waiter *waiter =
|
||||
list_entry(lock->wait_list.next,
|
||||
struct mutex_waiter, list);
|
||||
|
||||
debug_mutex_wake_waiter(lock, waiter);
|
||||
|
||||
wake_up_process(waiter->task);
|
||||
}
|
||||
|
||||
debug_mutex_clear_owner(lock);
|
||||
|
||||
spin_unlock_mutex(&lock->wait_lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Release the lock, slowpath:
|
||||
*/
|
||||
static fastcall noinline void
|
||||
__mutex_unlock_slowpath(atomic_t *lock_count)
|
||||
{
|
||||
__mutex_unlock_common_slowpath(lock_count, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Here come the less common (and hence less performance-critical) APIs:
|
||||
* mutex_lock_interruptible() and mutex_trylock().
|
||||
*/
|
||||
static int fastcall noinline __sched
|
||||
__mutex_lock_interruptible_slowpath(atomic_t *lock_count);
|
||||
|
||||
/***
|
||||
* mutex_lock_interruptible - acquire the mutex, interruptable
|
||||
* @lock: the mutex to be acquired
|
||||
*
|
||||
* Lock the mutex like mutex_lock(), and return 0 if the mutex has
|
||||
* been acquired or sleep until the mutex becomes available. If a
|
||||
* signal arrives while waiting for the lock then this function
|
||||
* returns -EINTR.
|
||||
*
|
||||
* This function is similar to (but not equivalent to) down_interruptible().
|
||||
*/
|
||||
int fastcall __sched mutex_lock_interruptible(struct mutex *lock)
|
||||
{
|
||||
might_sleep();
|
||||
return __mutex_fastpath_lock_retval
|
||||
(&lock->count, __mutex_lock_interruptible_slowpath);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(mutex_lock_interruptible);
|
||||
|
||||
static int fastcall noinline __sched
|
||||
__mutex_lock_interruptible_slowpath(atomic_t *lock_count)
|
||||
{
|
||||
struct mutex *lock = container_of(lock_count, struct mutex, count);
|
||||
|
||||
return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Spinlock based trylock, we take the spinlock and check whether we
|
||||
* can get the lock:
|
||||
*/
|
||||
static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
|
||||
{
|
||||
struct mutex *lock = container_of(lock_count, struct mutex, count);
|
||||
unsigned long flags;
|
||||
int prev;
|
||||
|
||||
spin_lock_mutex(&lock->wait_lock, flags);
|
||||
|
||||
prev = atomic_xchg(&lock->count, -1);
|
||||
if (likely(prev == 1)) {
|
||||
debug_mutex_set_owner(lock, current_thread_info());
|
||||
mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
|
||||
}
|
||||
/* Set it back to 0 if there are no waiters: */
|
||||
if (likely(list_empty(&lock->wait_list)))
|
||||
atomic_set(&lock->count, 0);
|
||||
|
||||
spin_unlock_mutex(&lock->wait_lock, flags);
|
||||
|
||||
return prev == 1;
|
||||
}
|
||||
|
||||
/***
|
||||
* mutex_trylock - try acquire the mutex, without waiting
|
||||
* @lock: the mutex to be acquired
|
||||
*
|
||||
* Try to acquire the mutex atomically. Returns 1 if the mutex
|
||||
* has been acquired successfully, and 0 on contention.
|
||||
*
|
||||
* NOTE: this function follows the spin_trylock() convention, so
|
||||
* it is negated to the down_trylock() return values! Be careful
|
||||
* about this when converting semaphore users to mutexes.
|
||||
*
|
||||
* This function must not be used in interrupt context. The
|
||||
* mutex must be released by the same task that acquired it.
|
||||
*/
|
||||
int fastcall __sched mutex_trylock(struct mutex *lock)
|
||||
{
|
||||
return __mutex_fastpath_trylock(&lock->count,
|
||||
__mutex_trylock_slowpath);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(mutex_trylock);
|
||||
30
kernel/mutex.h
Normal file
30
kernel/mutex.h
Normal file
@@ -0,0 +1,30 @@
|
||||
/*
|
||||
* Mutexes: blocking mutual exclusion locks
|
||||
*
|
||||
* started by Ingo Molnar:
|
||||
*
|
||||
* Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
|
||||
*
|
||||
* This file contains mutex debugging related internal prototypes, for the
|
||||
* !CONFIG_DEBUG_MUTEXES case. Most of them are NOPs:
|
||||
*/
|
||||
|
||||
#define spin_lock_mutex(lock, flags) \
|
||||
do { spin_lock(lock); (void)(flags); } while (0)
|
||||
#define spin_unlock_mutex(lock, flags) \
|
||||
do { spin_unlock(lock); (void)(flags); } while (0)
|
||||
#define mutex_remove_waiter(lock, waiter, ti) \
|
||||
__list_del((waiter)->list.prev, (waiter)->list.next)
|
||||
|
||||
#define debug_mutex_set_owner(lock, new_owner) do { } while (0)
|
||||
#define debug_mutex_clear_owner(lock) do { } while (0)
|
||||
#define debug_mutex_wake_waiter(lock, waiter) do { } while (0)
|
||||
#define debug_mutex_free_waiter(waiter) do { } while (0)
|
||||
#define debug_mutex_add_waiter(lock, waiter, ti) do { } while (0)
|
||||
#define debug_mutex_unlock(lock) do { } while (0)
|
||||
#define debug_mutex_init(lock, name, key) do { } while (0)
|
||||
|
||||
static inline void
|
||||
debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
|
||||
{
|
||||
}
|
||||
149
kernel/nsproxy.c
Normal file
149
kernel/nsproxy.c
Normal file
@@ -0,0 +1,149 @@
|
||||
/*
|
||||
* Copyright (C) 2006 IBM Corporation
|
||||
*
|
||||
* Author: Serge Hallyn <serue@us.ibm.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation, version 2 of the
|
||||
* License.
|
||||
*
|
||||
* Jun 2006 - namespaces support
|
||||
* OpenVZ, SWsoft Inc.
|
||||
* Pavel Emelianov <xemul@openvz.org>
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/version.h>
|
||||
#include <linux/nsproxy.h>
|
||||
#include <linux/init_task.h>
|
||||
#include <linux/mnt_namespace.h>
|
||||
#include <linux/utsname.h>
|
||||
#include <linux/pid_namespace.h>
|
||||
|
||||
struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
|
||||
|
||||
static inline void get_nsproxy(struct nsproxy *ns)
|
||||
{
|
||||
atomic_inc(&ns->count);
|
||||
}
|
||||
|
||||
void get_task_namespaces(struct task_struct *tsk)
|
||||
{
|
||||
struct nsproxy *ns = tsk->nsproxy;
|
||||
if (ns) {
|
||||
get_nsproxy(ns);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* creates a copy of "orig" with refcount 1.
|
||||
* This does not grab references to the contained namespaces,
|
||||
* so that needs to be done by dup_namespaces.
|
||||
*/
|
||||
static inline struct nsproxy *clone_namespaces(struct nsproxy *orig)
|
||||
{
|
||||
struct nsproxy *ns;
|
||||
|
||||
ns = kmemdup(orig, sizeof(struct nsproxy), GFP_KERNEL);
|
||||
if (ns)
|
||||
atomic_set(&ns->count, 1);
|
||||
return ns;
|
||||
}
|
||||
|
||||
/*
|
||||
* copies the nsproxy, setting refcount to 1, and grabbing a
|
||||
* reference to all contained namespaces. Called from
|
||||
* sys_unshare()
|
||||
*/
|
||||
struct nsproxy *dup_namespaces(struct nsproxy *orig)
|
||||
{
|
||||
struct nsproxy *ns = clone_namespaces(orig);
|
||||
|
||||
if (ns) {
|
||||
if (ns->mnt_ns)
|
||||
get_mnt_ns(ns->mnt_ns);
|
||||
if (ns->uts_ns)
|
||||
get_uts_ns(ns->uts_ns);
|
||||
if (ns->ipc_ns)
|
||||
get_ipc_ns(ns->ipc_ns);
|
||||
if (ns->pid_ns)
|
||||
get_pid_ns(ns->pid_ns);
|
||||
}
|
||||
|
||||
return ns;
|
||||
}
|
||||
|
||||
/*
|
||||
* called from clone. This now handles copy for nsproxy and all
|
||||
* namespaces therein.
|
||||
*/
|
||||
int copy_namespaces(int flags, struct task_struct *tsk)
|
||||
{
|
||||
struct nsproxy *old_ns = tsk->nsproxy;
|
||||
struct nsproxy *new_ns;
|
||||
int err = 0;
|
||||
|
||||
if (!old_ns)
|
||||
return 0;
|
||||
|
||||
get_nsproxy(old_ns);
|
||||
|
||||
if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC)))
|
||||
return 0;
|
||||
|
||||
new_ns = clone_namespaces(old_ns);
|
||||
if (!new_ns) {
|
||||
err = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
tsk->nsproxy = new_ns;
|
||||
|
||||
err = copy_mnt_ns(flags, tsk);
|
||||
if (err)
|
||||
goto out_ns;
|
||||
|
||||
err = copy_utsname(flags, tsk);
|
||||
if (err)
|
||||
goto out_uts;
|
||||
|
||||
err = copy_ipcs(flags, tsk);
|
||||
if (err)
|
||||
goto out_ipc;
|
||||
|
||||
err = copy_pid_ns(flags, tsk);
|
||||
if (err)
|
||||
goto out_pid;
|
||||
|
||||
out:
|
||||
put_nsproxy(old_ns);
|
||||
return err;
|
||||
|
||||
out_pid:
|
||||
if (new_ns->ipc_ns)
|
||||
put_ipc_ns(new_ns->ipc_ns);
|
||||
out_ipc:
|
||||
if (new_ns->uts_ns)
|
||||
put_uts_ns(new_ns->uts_ns);
|
||||
out_uts:
|
||||
if (new_ns->mnt_ns)
|
||||
put_mnt_ns(new_ns->mnt_ns);
|
||||
out_ns:
|
||||
tsk->nsproxy = old_ns;
|
||||
kfree(new_ns);
|
||||
goto out;
|
||||
}
|
||||
|
||||
void free_nsproxy(struct nsproxy *ns)
|
||||
{
|
||||
if (ns->mnt_ns)
|
||||
put_mnt_ns(ns->mnt_ns);
|
||||
if (ns->uts_ns)
|
||||
put_uts_ns(ns->uts_ns);
|
||||
if (ns->ipc_ns)
|
||||
put_ipc_ns(ns->ipc_ns);
|
||||
if (ns->pid_ns)
|
||||
put_pid_ns(ns->pid_ns);
|
||||
kfree(ns);
|
||||
}
|
||||
286
kernel/panic.c
Normal file
286
kernel/panic.c
Normal file
@@ -0,0 +1,286 @@
|
||||
/*
|
||||
* linux/kernel/panic.c
|
||||
*
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
*/
|
||||
|
||||
/*
|
||||
* This function is used through-out the kernel (including mm and fs)
|
||||
* to indicate a major problem.
|
||||
*/
|
||||
#include <linux/module.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/reboot.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/sysrq.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/nmi.h>
|
||||
#include <linux/kexec.h>
|
||||
#include <linux/debug_locks.h>
|
||||
|
||||
int panic_on_oops;
|
||||
int tainted;
|
||||
static int pause_on_oops;
|
||||
static int pause_on_oops_flag;
|
||||
static DEFINE_SPINLOCK(pause_on_oops_lock);
|
||||
|
||||
int panic_timeout;
|
||||
|
||||
ATOMIC_NOTIFIER_HEAD(panic_notifier_list);
|
||||
|
||||
EXPORT_SYMBOL(panic_notifier_list);
|
||||
|
||||
static int __init panic_setup(char *str)
|
||||
{
|
||||
panic_timeout = simple_strtoul(str, NULL, 0);
|
||||
return 1;
|
||||
}
|
||||
__setup("panic=", panic_setup);
|
||||
|
||||
static long no_blink(long time)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Returns how long it waited in ms */
|
||||
long (*panic_blink)(long time);
|
||||
EXPORT_SYMBOL(panic_blink);
|
||||
|
||||
/**
|
||||
* panic - halt the system
|
||||
* @fmt: The text string to print
|
||||
*
|
||||
* Display a message, then perform cleanups.
|
||||
*
|
||||
* This function never returns.
|
||||
*/
|
||||
|
||||
NORET_TYPE void panic(const char * fmt, ...)
|
||||
{
|
||||
long i;
|
||||
static char buf[1024];
|
||||
va_list args;
|
||||
#if defined(CONFIG_S390)
|
||||
unsigned long caller = (unsigned long) __builtin_return_address(0);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* It's possible to come here directly from a panic-assertion and not
|
||||
* have preempt disabled. Some functions called from here want
|
||||
* preempt to be disabled. No point enabling it later though...
|
||||
*/
|
||||
preempt_disable();
|
||||
|
||||
bust_spinlocks(1);
|
||||
va_start(args, fmt);
|
||||
vsnprintf(buf, sizeof(buf), fmt, args);
|
||||
va_end(args);
|
||||
printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf);
|
||||
bust_spinlocks(0);
|
||||
|
||||
/*
|
||||
* If we have crashed and we have a crash kernel loaded let it handle
|
||||
* everything else.
|
||||
* Do we want to call this before we try to display a message?
|
||||
*/
|
||||
crash_kexec(NULL);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
* Note smp_send_stop is the usual smp shutdown function, which
|
||||
* unfortunately means it may not be hardened to work in a panic
|
||||
* situation.
|
||||
*/
|
||||
smp_send_stop();
|
||||
#endif
|
||||
|
||||
atomic_notifier_call_chain(&panic_notifier_list, 0, buf);
|
||||
|
||||
if (!panic_blink)
|
||||
panic_blink = no_blink;
|
||||
|
||||
if (panic_timeout > 0) {
|
||||
/*
|
||||
* Delay timeout seconds before rebooting the machine.
|
||||
* We can't use the "normal" timers since we just panicked..
|
||||
*/
|
||||
printk(KERN_EMERG "Rebooting in %d seconds..",panic_timeout);
|
||||
for (i = 0; i < panic_timeout*1000; ) {
|
||||
touch_nmi_watchdog();
|
||||
i += panic_blink(i);
|
||||
mdelay(1);
|
||||
i++;
|
||||
}
|
||||
/* This will not be a clean reboot, with everything
|
||||
* shutting down. But if there is a chance of
|
||||
* rebooting the system it will be rebooted.
|
||||
*/
|
||||
emergency_restart();
|
||||
}
|
||||
#ifdef __sparc__
|
||||
{
|
||||
extern int stop_a_enabled;
|
||||
/* Make sure the user can actually press Stop-A (L1-A) */
|
||||
stop_a_enabled = 1;
|
||||
printk(KERN_EMERG "Press Stop-A (L1-A) to return to the boot prom\n");
|
||||
}
|
||||
#endif
|
||||
#if defined(CONFIG_S390)
|
||||
disabled_wait(caller);
|
||||
#endif
|
||||
local_irq_enable();
|
||||
for (i = 0;;) {
|
||||
touch_softlockup_watchdog();
|
||||
i += panic_blink(i);
|
||||
mdelay(1);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(panic);
|
||||
|
||||
/**
|
||||
* print_tainted - return a string to represent the kernel taint state.
|
||||
*
|
||||
* 'P' - Proprietary module has been loaded.
|
||||
* 'F' - Module has been forcibly loaded.
|
||||
* 'S' - SMP with CPUs not designed for SMP.
|
||||
* 'R' - User forced a module unload.
|
||||
* 'M' - Machine had a machine check experience.
|
||||
* 'B' - System has hit bad_page.
|
||||
* 'U' - Userspace-defined naughtiness.
|
||||
*
|
||||
* The string is overwritten by the next call to print_taint().
|
||||
*/
|
||||
|
||||
const char *print_tainted(void)
|
||||
{
|
||||
static char buf[20];
|
||||
if (tainted) {
|
||||
snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c",
|
||||
tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G',
|
||||
tainted & TAINT_FORCED_MODULE ? 'F' : ' ',
|
||||
tainted & TAINT_UNSAFE_SMP ? 'S' : ' ',
|
||||
tainted & TAINT_FORCED_RMMOD ? 'R' : ' ',
|
||||
tainted & TAINT_MACHINE_CHECK ? 'M' : ' ',
|
||||
tainted & TAINT_BAD_PAGE ? 'B' : ' ',
|
||||
tainted & TAINT_USER ? 'U' : ' ');
|
||||
}
|
||||
else
|
||||
snprintf(buf, sizeof(buf), "Not tainted");
|
||||
return(buf);
|
||||
}
|
||||
|
||||
void add_taint(unsigned flag)
|
||||
{
|
||||
debug_locks = 0; /* can't trust the integrity of the kernel anymore */
|
||||
tainted |= flag;
|
||||
}
|
||||
EXPORT_SYMBOL(add_taint);
|
||||
|
||||
static int __init pause_on_oops_setup(char *str)
|
||||
{
|
||||
pause_on_oops = simple_strtoul(str, NULL, 0);
|
||||
return 1;
|
||||
}
|
||||
__setup("pause_on_oops=", pause_on_oops_setup);
|
||||
|
||||
static void spin_msec(int msecs)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < msecs; i++) {
|
||||
touch_nmi_watchdog();
|
||||
mdelay(1);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* It just happens that oops_enter() and oops_exit() are identically
|
||||
* implemented...
|
||||
*/
|
||||
static void do_oops_enter_exit(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
static int spin_counter;
|
||||
|
||||
if (!pause_on_oops)
|
||||
return;
|
||||
|
||||
spin_lock_irqsave(&pause_on_oops_lock, flags);
|
||||
if (pause_on_oops_flag == 0) {
|
||||
/* This CPU may now print the oops message */
|
||||
pause_on_oops_flag = 1;
|
||||
} else {
|
||||
/* We need to stall this CPU */
|
||||
if (!spin_counter) {
|
||||
/* This CPU gets to do the counting */
|
||||
spin_counter = pause_on_oops;
|
||||
do {
|
||||
spin_unlock(&pause_on_oops_lock);
|
||||
spin_msec(MSEC_PER_SEC);
|
||||
spin_lock(&pause_on_oops_lock);
|
||||
} while (--spin_counter);
|
||||
pause_on_oops_flag = 0;
|
||||
} else {
|
||||
/* This CPU waits for a different one */
|
||||
while (spin_counter) {
|
||||
spin_unlock(&pause_on_oops_lock);
|
||||
spin_msec(1);
|
||||
spin_lock(&pause_on_oops_lock);
|
||||
}
|
||||
}
|
||||
}
|
||||
spin_unlock_irqrestore(&pause_on_oops_lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return true if the calling CPU is allowed to print oops-related info. This
|
||||
* is a bit racy..
|
||||
*/
|
||||
int oops_may_print(void)
|
||||
{
|
||||
return pause_on_oops_flag == 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Called when the architecture enters its oops handler, before it prints
|
||||
* anything. If this is the first CPU to oops, and it's oopsing the first time
|
||||
* then let it proceed.
|
||||
*
|
||||
* This is all enabled by the pause_on_oops kernel boot option. We do all this
|
||||
* to ensure that oopses don't scroll off the screen. It has the side-effect
|
||||
* of preventing later-oopsing CPUs from mucking up the display, too.
|
||||
*
|
||||
* It turns out that the CPU which is allowed to print ends up pausing for the
|
||||
* right duration, whereas all the other CPUs pause for twice as long: once in
|
||||
* oops_enter(), once in oops_exit().
|
||||
*/
|
||||
void oops_enter(void)
|
||||
{
|
||||
debug_locks_off(); /* can't trust the integrity of the kernel anymore */
|
||||
do_oops_enter_exit();
|
||||
}
|
||||
|
||||
/*
|
||||
* Called when the architecture exits its oops handler, after printing
|
||||
* everything.
|
||||
*/
|
||||
void oops_exit(void)
|
||||
{
|
||||
do_oops_enter_exit();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||
/*
|
||||
* Called when gcc's -fstack-protector feature is used, and
|
||||
* gcc detects corruption of the on-stack canary value
|
||||
*/
|
||||
void __stack_chk_fail(void)
|
||||
{
|
||||
panic("stack-protector: Kernel stack is corrupted");
|
||||
}
|
||||
EXPORT_SYMBOL(__stack_chk_fail);
|
||||
#endif
|
||||
751
kernel/params.c
Normal file
751
kernel/params.c
Normal file
@@ -0,0 +1,751 @@
|
||||
/* Helpers for initial module or kernel cmdline parsing
|
||||
Copyright (C) 2001 Rusty Russell.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
#include <linux/moduleparam.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#if 0
|
||||
#define DEBUGP printk
|
||||
#else
|
||||
#define DEBUGP(fmt, a...)
|
||||
#endif
|
||||
|
||||
static inline char dash2underscore(char c)
|
||||
{
|
||||
if (c == '-')
|
||||
return '_';
|
||||
return c;
|
||||
}
|
||||
|
||||
static inline int parameq(const char *input, const char *paramname)
|
||||
{
|
||||
unsigned int i;
|
||||
for (i = 0; dash2underscore(input[i]) == paramname[i]; i++)
|
||||
if (input[i] == '\0')
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int parse_one(char *param,
|
||||
char *val,
|
||||
struct kernel_param *params,
|
||||
unsigned num_params,
|
||||
int (*handle_unknown)(char *param, char *val))
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
/* Find parameter */
|
||||
for (i = 0; i < num_params; i++) {
|
||||
if (parameq(param, params[i].name)) {
|
||||
DEBUGP("They are equal! Calling %p\n",
|
||||
params[i].set);
|
||||
return params[i].set(val, ¶ms[i]);
|
||||
}
|
||||
}
|
||||
|
||||
if (handle_unknown) {
|
||||
DEBUGP("Unknown argument: calling %p\n", handle_unknown);
|
||||
return handle_unknown(param, val);
|
||||
}
|
||||
|
||||
DEBUGP("Unknown argument `%s'\n", param);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
/* You can use " around spaces, but can't escape ". */
|
||||
/* Hyphens and underscores equivalent in parameter names. */
|
||||
static char *next_arg(char *args, char **param, char **val)
|
||||
{
|
||||
unsigned int i, equals = 0;
|
||||
int in_quote = 0, quoted = 0;
|
||||
char *next;
|
||||
|
||||
if (*args == '"') {
|
||||
args++;
|
||||
in_quote = 1;
|
||||
quoted = 1;
|
||||
}
|
||||
|
||||
for (i = 0; args[i]; i++) {
|
||||
if (args[i] == ' ' && !in_quote)
|
||||
break;
|
||||
if (equals == 0) {
|
||||
if (args[i] == '=')
|
||||
equals = i;
|
||||
}
|
||||
if (args[i] == '"')
|
||||
in_quote = !in_quote;
|
||||
}
|
||||
|
||||
*param = args;
|
||||
if (!equals)
|
||||
*val = NULL;
|
||||
else {
|
||||
args[equals] = '\0';
|
||||
*val = args + equals + 1;
|
||||
|
||||
/* Don't include quotes in value. */
|
||||
if (**val == '"') {
|
||||
(*val)++;
|
||||
if (args[i-1] == '"')
|
||||
args[i-1] = '\0';
|
||||
}
|
||||
if (quoted && args[i-1] == '"')
|
||||
args[i-1] = '\0';
|
||||
}
|
||||
|
||||
if (args[i]) {
|
||||
args[i] = '\0';
|
||||
next = args + i + 1;
|
||||
} else
|
||||
next = args + i;
|
||||
|
||||
/* Chew up trailing spaces. */
|
||||
while (*next == ' ')
|
||||
next++;
|
||||
return next;
|
||||
}
|
||||
|
||||
/* Args looks like "foo=bar,bar2 baz=fuz wiz". */
|
||||
int parse_args(const char *name,
|
||||
char *args,
|
||||
struct kernel_param *params,
|
||||
unsigned num,
|
||||
int (*unknown)(char *param, char *val))
|
||||
{
|
||||
char *param, *val;
|
||||
|
||||
DEBUGP("Parsing ARGS: %s\n", args);
|
||||
|
||||
/* Chew leading spaces */
|
||||
while (*args == ' ')
|
||||
args++;
|
||||
|
||||
while (*args) {
|
||||
int ret;
|
||||
int irq_was_disabled;
|
||||
|
||||
args = next_arg(args, ¶m, &val);
|
||||
irq_was_disabled = irqs_disabled();
|
||||
ret = parse_one(param, val, params, num, unknown);
|
||||
if (irq_was_disabled && !irqs_disabled()) {
|
||||
printk(KERN_WARNING "parse_args(): option '%s' enabled "
|
||||
"irq's!\n", param);
|
||||
}
|
||||
switch (ret) {
|
||||
case -ENOENT:
|
||||
printk(KERN_ERR "%s: Unknown parameter `%s'\n",
|
||||
name, param);
|
||||
return ret;
|
||||
case -ENOSPC:
|
||||
printk(KERN_ERR
|
||||
"%s: `%s' too large for parameter `%s'\n",
|
||||
name, val ?: "", param);
|
||||
return ret;
|
||||
case 0:
|
||||
break;
|
||||
default:
|
||||
printk(KERN_ERR
|
||||
"%s: `%s' invalid for parameter `%s'\n",
|
||||
name, val ?: "", param);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
/* All parsed OK. */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Lazy bastard, eh? */
|
||||
#define STANDARD_PARAM_DEF(name, type, format, tmptype, strtolfn) \
|
||||
int param_set_##name(const char *val, struct kernel_param *kp) \
|
||||
{ \
|
||||
char *endp; \
|
||||
tmptype l; \
|
||||
\
|
||||
if (!val) return -EINVAL; \
|
||||
l = strtolfn(val, &endp, 0); \
|
||||
if (endp == val || ((type)l != l)) \
|
||||
return -EINVAL; \
|
||||
*((type *)kp->arg) = l; \
|
||||
return 0; \
|
||||
} \
|
||||
int param_get_##name(char *buffer, struct kernel_param *kp) \
|
||||
{ \
|
||||
return sprintf(buffer, format, *((type *)kp->arg)); \
|
||||
}
|
||||
|
||||
STANDARD_PARAM_DEF(byte, unsigned char, "%c", unsigned long, simple_strtoul);
|
||||
STANDARD_PARAM_DEF(short, short, "%hi", long, simple_strtol);
|
||||
STANDARD_PARAM_DEF(ushort, unsigned short, "%hu", unsigned long, simple_strtoul);
|
||||
STANDARD_PARAM_DEF(int, int, "%i", long, simple_strtol);
|
||||
STANDARD_PARAM_DEF(uint, unsigned int, "%u", unsigned long, simple_strtoul);
|
||||
STANDARD_PARAM_DEF(long, long, "%li", long, simple_strtol);
|
||||
STANDARD_PARAM_DEF(ulong, unsigned long, "%lu", unsigned long, simple_strtoul);
|
||||
|
||||
int param_set_charp(const char *val, struct kernel_param *kp)
|
||||
{
|
||||
if (!val) {
|
||||
printk(KERN_ERR "%s: string parameter expected\n",
|
||||
kp->name);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (strlen(val) > 1024) {
|
||||
printk(KERN_ERR "%s: string parameter too long\n",
|
||||
kp->name);
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
*(char **)kp->arg = (char *)val;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int param_get_charp(char *buffer, struct kernel_param *kp)
|
||||
{
|
||||
return sprintf(buffer, "%s", *((char **)kp->arg));
|
||||
}
|
||||
|
||||
int param_set_bool(const char *val, struct kernel_param *kp)
|
||||
{
|
||||
/* No equals means "set"... */
|
||||
if (!val) val = "1";
|
||||
|
||||
/* One of =[yYnN01] */
|
||||
switch (val[0]) {
|
||||
case 'y': case 'Y': case '1':
|
||||
*(int *)kp->arg = 1;
|
||||
return 0;
|
||||
case 'n': case 'N': case '0':
|
||||
*(int *)kp->arg = 0;
|
||||
return 0;
|
||||
}
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int param_get_bool(char *buffer, struct kernel_param *kp)
|
||||
{
|
||||
/* Y and N chosen as being relatively non-coder friendly */
|
||||
return sprintf(buffer, "%c", (*(int *)kp->arg) ? 'Y' : 'N');
|
||||
}
|
||||
|
||||
int param_set_invbool(const char *val, struct kernel_param *kp)
|
||||
{
|
||||
int boolval, ret;
|
||||
struct kernel_param dummy = { .arg = &boolval };
|
||||
|
||||
ret = param_set_bool(val, &dummy);
|
||||
if (ret == 0)
|
||||
*(int *)kp->arg = !boolval;
|
||||
return ret;
|
||||
}
|
||||
|
||||
int param_get_invbool(char *buffer, struct kernel_param *kp)
|
||||
{
|
||||
int val;
|
||||
struct kernel_param dummy = { .arg = &val };
|
||||
|
||||
val = !*(int *)kp->arg;
|
||||
return param_get_bool(buffer, &dummy);
|
||||
}
|
||||
|
||||
/* We cheat here and temporarily mangle the string. */
|
||||
static int param_array(const char *name,
|
||||
const char *val,
|
||||
unsigned int min, unsigned int max,
|
||||
void *elem, int elemsize,
|
||||
int (*set)(const char *, struct kernel_param *kp),
|
||||
int *num)
|
||||
{
|
||||
int ret;
|
||||
struct kernel_param kp;
|
||||
char save;
|
||||
|
||||
/* Get the name right for errors. */
|
||||
kp.name = name;
|
||||
kp.arg = elem;
|
||||
|
||||
/* No equals sign? */
|
||||
if (!val) {
|
||||
printk(KERN_ERR "%s: expects arguments\n", name);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
*num = 0;
|
||||
/* We expect a comma-separated list of values. */
|
||||
do {
|
||||
int len;
|
||||
|
||||
if (*num == max) {
|
||||
printk(KERN_ERR "%s: can only take %i arguments\n",
|
||||
name, max);
|
||||
return -EINVAL;
|
||||
}
|
||||
len = strcspn(val, ",");
|
||||
|
||||
/* nul-terminate and parse */
|
||||
save = val[len];
|
||||
((char *)val)[len] = '\0';
|
||||
ret = set(val, &kp);
|
||||
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
kp.arg += elemsize;
|
||||
val += len+1;
|
||||
(*num)++;
|
||||
} while (save == ',');
|
||||
|
||||
if (*num < min) {
|
||||
printk(KERN_ERR "%s: needs at least %i arguments\n",
|
||||
name, min);
|
||||
return -EINVAL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int param_array_set(const char *val, struct kernel_param *kp)
|
||||
{
|
||||
struct kparam_array *arr = kp->arg;
|
||||
unsigned int temp_num;
|
||||
|
||||
return param_array(kp->name, val, 1, arr->max, arr->elem,
|
||||
arr->elemsize, arr->set, arr->num ?: &temp_num);
|
||||
}
|
||||
|
||||
int param_array_get(char *buffer, struct kernel_param *kp)
|
||||
{
|
||||
int i, off, ret;
|
||||
struct kparam_array *arr = kp->arg;
|
||||
struct kernel_param p;
|
||||
|
||||
p = *kp;
|
||||
for (i = off = 0; i < (arr->num ? *arr->num : arr->max); i++) {
|
||||
if (i)
|
||||
buffer[off++] = ',';
|
||||
p.arg = arr->elem + arr->elemsize * i;
|
||||
ret = arr->get(buffer + off, &p);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
off += ret;
|
||||
}
|
||||
buffer[off] = '\0';
|
||||
return off;
|
||||
}
|
||||
|
||||
int param_set_copystring(const char *val, struct kernel_param *kp)
|
||||
{
|
||||
struct kparam_string *kps = kp->arg;
|
||||
|
||||
if (!val) {
|
||||
printk(KERN_ERR "%s: missing param set value\n", kp->name);
|
||||
return -EINVAL;
|
||||
}
|
||||
if (strlen(val)+1 > kps->maxlen) {
|
||||
printk(KERN_ERR "%s: string doesn't fit in %u chars.\n",
|
||||
kp->name, kps->maxlen-1);
|
||||
return -ENOSPC;
|
||||
}
|
||||
strcpy(kps->string, val);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int param_get_string(char *buffer, struct kernel_param *kp)
|
||||
{
|
||||
struct kparam_string *kps = kp->arg;
|
||||
return strlcpy(buffer, kps->string, kps->maxlen);
|
||||
}
|
||||
|
||||
/* sysfs output in /sys/modules/XYZ/parameters/ */
|
||||
|
||||
extern struct kernel_param __start___param[], __stop___param[];
|
||||
|
||||
#define MAX_KBUILD_MODNAME KOBJ_NAME_LEN
|
||||
|
||||
struct param_attribute
|
||||
{
|
||||
struct module_attribute mattr;
|
||||
struct kernel_param *param;
|
||||
};
|
||||
|
||||
struct module_param_attrs
|
||||
{
|
||||
struct attribute_group grp;
|
||||
struct param_attribute attrs[0];
|
||||
};
|
||||
|
||||
#ifdef CONFIG_SYSFS
|
||||
#define to_param_attr(n) container_of(n, struct param_attribute, mattr);
|
||||
|
||||
static ssize_t param_attr_show(struct module_attribute *mattr,
|
||||
struct module *mod, char *buf)
|
||||
{
|
||||
int count;
|
||||
struct param_attribute *attribute = to_param_attr(mattr);
|
||||
|
||||
if (!attribute->param->get)
|
||||
return -EPERM;
|
||||
|
||||
count = attribute->param->get(buf, attribute->param);
|
||||
if (count > 0) {
|
||||
strcat(buf, "\n");
|
||||
++count;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
/* sysfs always hands a nul-terminated string in buf. We rely on that. */
|
||||
static ssize_t param_attr_store(struct module_attribute *mattr,
|
||||
struct module *owner,
|
||||
const char *buf, size_t len)
|
||||
{
|
||||
int err;
|
||||
struct param_attribute *attribute = to_param_attr(mattr);
|
||||
|
||||
if (!attribute->param->set)
|
||||
return -EPERM;
|
||||
|
||||
err = attribute->param->set(buf, attribute->param);
|
||||
if (!err)
|
||||
return len;
|
||||
return err;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_MODULES
|
||||
#define __modinit
|
||||
#else
|
||||
#define __modinit __init
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SYSFS
|
||||
/*
|
||||
* param_sysfs_setup - setup sysfs support for one module or KBUILD_MODNAME
|
||||
* @mk: struct module_kobject (contains parent kobject)
|
||||
* @kparam: array of struct kernel_param, the actual parameter definitions
|
||||
* @num_params: number of entries in array
|
||||
* @name_skip: offset where the parameter name start in kparam[].name. Needed for built-in "modules"
|
||||
*
|
||||
* Create a kobject for a (per-module) group of parameters, and create files
|
||||
* in sysfs. A pointer to the param_kobject is returned on success,
|
||||
* NULL if there's no parameter to export, or other ERR_PTR(err).
|
||||
*/
|
||||
static __modinit struct module_param_attrs *
|
||||
param_sysfs_setup(struct module_kobject *mk,
|
||||
struct kernel_param *kparam,
|
||||
unsigned int num_params,
|
||||
unsigned int name_skip)
|
||||
{
|
||||
struct module_param_attrs *mp;
|
||||
unsigned int valid_attrs = 0;
|
||||
unsigned int i, size[2];
|
||||
struct param_attribute *pattr;
|
||||
struct attribute **gattr;
|
||||
int err;
|
||||
|
||||
for (i=0; i<num_params; i++) {
|
||||
if (kparam[i].perm)
|
||||
valid_attrs++;
|
||||
}
|
||||
|
||||
if (!valid_attrs)
|
||||
return NULL;
|
||||
|
||||
size[0] = ALIGN(sizeof(*mp) +
|
||||
valid_attrs * sizeof(mp->attrs[0]),
|
||||
sizeof(mp->grp.attrs[0]));
|
||||
size[1] = (valid_attrs + 1) * sizeof(mp->grp.attrs[0]);
|
||||
|
||||
mp = kmalloc(size[0] + size[1], GFP_KERNEL);
|
||||
if (!mp)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
mp->grp.name = "parameters";
|
||||
mp->grp.attrs = (void *)mp + size[0];
|
||||
|
||||
pattr = &mp->attrs[0];
|
||||
gattr = &mp->grp.attrs[0];
|
||||
for (i = 0; i < num_params; i++) {
|
||||
struct kernel_param *kp = &kparam[i];
|
||||
if (kp->perm) {
|
||||
pattr->param = kp;
|
||||
pattr->mattr.show = param_attr_show;
|
||||
pattr->mattr.store = param_attr_store;
|
||||
pattr->mattr.attr.name = (char *)&kp->name[name_skip];
|
||||
pattr->mattr.attr.owner = mk->mod;
|
||||
pattr->mattr.attr.mode = kp->perm;
|
||||
*(gattr++) = &(pattr++)->mattr.attr;
|
||||
}
|
||||
}
|
||||
*gattr = NULL;
|
||||
|
||||
if ((err = sysfs_create_group(&mk->kobj, &mp->grp))) {
|
||||
kfree(mp);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
return mp;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MODULES
|
||||
/*
|
||||
* module_param_sysfs_setup - setup sysfs support for one module
|
||||
* @mod: module
|
||||
* @kparam: module parameters (array)
|
||||
* @num_params: number of module parameters
|
||||
*
|
||||
* Adds sysfs entries for module parameters, and creates a link from
|
||||
* /sys/module/[mod->name]/parameters to /sys/parameters/[mod->name]/
|
||||
*/
|
||||
int module_param_sysfs_setup(struct module *mod,
|
||||
struct kernel_param *kparam,
|
||||
unsigned int num_params)
|
||||
{
|
||||
struct module_param_attrs *mp;
|
||||
|
||||
mp = param_sysfs_setup(&mod->mkobj, kparam, num_params, 0);
|
||||
if (IS_ERR(mp))
|
||||
return PTR_ERR(mp);
|
||||
|
||||
mod->param_attrs = mp;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* module_param_sysfs_remove - remove sysfs support for one module
|
||||
* @mod: module
|
||||
*
|
||||
* Remove sysfs entries for module parameters and the corresponding
|
||||
* kobject.
|
||||
*/
|
||||
void module_param_sysfs_remove(struct module *mod)
|
||||
{
|
||||
if (mod->param_attrs) {
|
||||
sysfs_remove_group(&mod->mkobj.kobj,
|
||||
&mod->param_attrs->grp);
|
||||
/* We are positive that no one is using any param
|
||||
* attrs at this point. Deallocate immediately. */
|
||||
kfree(mod->param_attrs);
|
||||
mod->param_attrs = NULL;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* kernel_param_sysfs_setup - wrapper for built-in params support
|
||||
*/
|
||||
static void __init kernel_param_sysfs_setup(const char *name,
|
||||
struct kernel_param *kparam,
|
||||
unsigned int num_params,
|
||||
unsigned int name_skip)
|
||||
{
|
||||
struct module_kobject *mk;
|
||||
int ret;
|
||||
|
||||
mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL);
|
||||
BUG_ON(!mk);
|
||||
|
||||
mk->mod = THIS_MODULE;
|
||||
kobj_set_kset_s(mk, module_subsys);
|
||||
kobject_set_name(&mk->kobj, name);
|
||||
kobject_init(&mk->kobj);
|
||||
ret = kobject_add(&mk->kobj);
|
||||
BUG_ON(ret < 0);
|
||||
param_sysfs_setup(mk, kparam, num_params, name_skip);
|
||||
kobject_uevent(&mk->kobj, KOBJ_ADD);
|
||||
}
|
||||
|
||||
/*
|
||||
* param_sysfs_builtin - add contents in /sys/parameters for built-in modules
|
||||
*
|
||||
* Add module_parameters to sysfs for "modules" built into the kernel.
|
||||
*
|
||||
* The "module" name (KBUILD_MODNAME) is stored before a dot, the
|
||||
* "parameter" name is stored behind a dot in kernel_param->name. So,
|
||||
* extract the "module" name for all built-in kernel_param-eters,
|
||||
* and for all who have the same, call kernel_param_sysfs_setup.
|
||||
*/
|
||||
static void __init param_sysfs_builtin(void)
|
||||
{
|
||||
struct kernel_param *kp, *kp_begin = NULL;
|
||||
unsigned int i, name_len, count = 0;
|
||||
char modname[MAX_KBUILD_MODNAME + 1] = "";
|
||||
|
||||
for (i=0; i < __stop___param - __start___param; i++) {
|
||||
char *dot;
|
||||
|
||||
kp = &__start___param[i];
|
||||
|
||||
/* We do not handle args without periods. */
|
||||
dot = memchr(kp->name, '.', MAX_KBUILD_MODNAME);
|
||||
if (!dot) {
|
||||
DEBUGP("couldn't find period in %s\n", kp->name);
|
||||
continue;
|
||||
}
|
||||
name_len = dot - kp->name;
|
||||
|
||||
/* new kbuild_modname? */
|
||||
if (strlen(modname) != name_len
|
||||
|| strncmp(modname, kp->name, name_len) != 0) {
|
||||
/* add a new kobject for previous kernel_params. */
|
||||
if (count)
|
||||
kernel_param_sysfs_setup(modname,
|
||||
kp_begin,
|
||||
count,
|
||||
strlen(modname)+1);
|
||||
|
||||
strncpy(modname, kp->name, name_len);
|
||||
modname[name_len] = '\0';
|
||||
count = 0;
|
||||
kp_begin = kp;
|
||||
}
|
||||
count++;
|
||||
}
|
||||
|
||||
/* last kernel_params need to be registered as well */
|
||||
if (count)
|
||||
kernel_param_sysfs_setup(modname, kp_begin, count,
|
||||
strlen(modname)+1);
|
||||
}
|
||||
|
||||
|
||||
/* module-related sysfs stuff */
|
||||
|
||||
#define to_module_attr(n) container_of(n, struct module_attribute, attr);
|
||||
#define to_module_kobject(n) container_of(n, struct module_kobject, kobj);
|
||||
|
||||
static ssize_t module_attr_show(struct kobject *kobj,
|
||||
struct attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct module_attribute *attribute;
|
||||
struct module_kobject *mk;
|
||||
int ret;
|
||||
|
||||
attribute = to_module_attr(attr);
|
||||
mk = to_module_kobject(kobj);
|
||||
|
||||
if (!attribute->show)
|
||||
return -EIO;
|
||||
|
||||
ret = attribute->show(attribute, mk->mod, buf);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t module_attr_store(struct kobject *kobj,
|
||||
struct attribute *attr,
|
||||
const char *buf, size_t len)
|
||||
{
|
||||
struct module_attribute *attribute;
|
||||
struct module_kobject *mk;
|
||||
int ret;
|
||||
|
||||
attribute = to_module_attr(attr);
|
||||
mk = to_module_kobject(kobj);
|
||||
|
||||
if (!attribute->store)
|
||||
return -EIO;
|
||||
|
||||
ret = attribute->store(attribute, mk->mod, buf, len);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct sysfs_ops module_sysfs_ops = {
|
||||
.show = module_attr_show,
|
||||
.store = module_attr_store,
|
||||
};
|
||||
|
||||
static struct kobj_type module_ktype;
|
||||
|
||||
static int uevent_filter(struct kset *kset, struct kobject *kobj)
|
||||
{
|
||||
struct kobj_type *ktype = get_ktype(kobj);
|
||||
|
||||
if (ktype == &module_ktype)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct kset_uevent_ops module_uevent_ops = {
|
||||
.filter = uevent_filter,
|
||||
};
|
||||
|
||||
decl_subsys(module, &module_ktype, &module_uevent_ops);
|
||||
|
||||
static struct kobj_type module_ktype = {
|
||||
.sysfs_ops = &module_sysfs_ops,
|
||||
};
|
||||
|
||||
/*
|
||||
* param_sysfs_init - wrapper for built-in params support
|
||||
*/
|
||||
static int __init param_sysfs_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = subsystem_register(&module_subsys);
|
||||
if (ret < 0) {
|
||||
printk(KERN_WARNING "%s (%d): subsystem_register error: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
param_sysfs_builtin();
|
||||
|
||||
return 0;
|
||||
}
|
||||
subsys_initcall(param_sysfs_init);
|
||||
|
||||
#else
|
||||
#if 0
|
||||
static struct sysfs_ops module_sysfs_ops = {
|
||||
.show = NULL,
|
||||
.store = NULL,
|
||||
};
|
||||
#endif
|
||||
#endif
|
||||
|
||||
EXPORT_SYMBOL(param_set_byte);
|
||||
EXPORT_SYMBOL(param_get_byte);
|
||||
EXPORT_SYMBOL(param_set_short);
|
||||
EXPORT_SYMBOL(param_get_short);
|
||||
EXPORT_SYMBOL(param_set_ushort);
|
||||
EXPORT_SYMBOL(param_get_ushort);
|
||||
EXPORT_SYMBOL(param_set_int);
|
||||
EXPORT_SYMBOL(param_get_int);
|
||||
EXPORT_SYMBOL(param_set_uint);
|
||||
EXPORT_SYMBOL(param_get_uint);
|
||||
EXPORT_SYMBOL(param_set_long);
|
||||
EXPORT_SYMBOL(param_get_long);
|
||||
EXPORT_SYMBOL(param_set_ulong);
|
||||
EXPORT_SYMBOL(param_get_ulong);
|
||||
EXPORT_SYMBOL(param_set_charp);
|
||||
EXPORT_SYMBOL(param_get_charp);
|
||||
EXPORT_SYMBOL(param_set_bool);
|
||||
EXPORT_SYMBOL(param_get_bool);
|
||||
EXPORT_SYMBOL(param_set_invbool);
|
||||
EXPORT_SYMBOL(param_get_invbool);
|
||||
EXPORT_SYMBOL(param_array_set);
|
||||
EXPORT_SYMBOL(param_array_get);
|
||||
EXPORT_SYMBOL(param_set_copystring);
|
||||
EXPORT_SYMBOL(param_get_string);
|
||||
418
kernel/pid.c
Normal file
418
kernel/pid.c
Normal file
@@ -0,0 +1,418 @@
|
||||
/*
|
||||
* Generic pidhash and scalable, time-bounded PID allocator
|
||||
*
|
||||
* (C) 2002-2003 William Irwin, IBM
|
||||
* (C) 2004 William Irwin, Oracle
|
||||
* (C) 2002-2004 Ingo Molnar, Red Hat
|
||||
*
|
||||
* pid-structures are backing objects for tasks sharing a given ID to chain
|
||||
* against. There is very little to them aside from hashing them and
|
||||
* parking tasks using given ID's on a list.
|
||||
*
|
||||
* The hash is always changed with the tasklist_lock write-acquired,
|
||||
* and the hash is only accessed with the tasklist_lock at least
|
||||
* read-acquired, so there's no additional SMP locking needed here.
|
||||
*
|
||||
* We have a list of bitmap pages, which bitmaps represent the PID space.
|
||||
* Allocating and freeing PIDs is completely lockless. The worst-case
|
||||
* allocation scenario when all but one out of 1 million PIDs possible are
|
||||
* allocated already: the scanning of 32 list entries and at most PAGE_SIZE
|
||||
* bytes. The typical fastpath is a single successful setbit. Freeing is O(1).
|
||||
*/
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/bootmem.h>
|
||||
#include <linux/hash.h>
|
||||
#include <linux/pid_namespace.h>
|
||||
|
||||
#define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift)
|
||||
static struct hlist_head *pid_hash;
|
||||
static int pidhash_shift;
|
||||
static struct kmem_cache *pid_cachep;
|
||||
|
||||
int pid_max = PID_MAX_DEFAULT;
|
||||
|
||||
#define RESERVED_PIDS 300
|
||||
|
||||
int pid_max_min = RESERVED_PIDS + 1;
|
||||
int pid_max_max = PID_MAX_LIMIT;
|
||||
|
||||
#define BITS_PER_PAGE (PAGE_SIZE*8)
|
||||
#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1)
|
||||
|
||||
static inline int mk_pid(struct pid_namespace *pid_ns,
|
||||
struct pidmap *map, int off)
|
||||
{
|
||||
return (map - pid_ns->pidmap)*BITS_PER_PAGE + off;
|
||||
}
|
||||
|
||||
#define find_next_offset(map, off) \
|
||||
find_next_zero_bit((map)->page, BITS_PER_PAGE, off)
|
||||
|
||||
/*
|
||||
* PID-map pages start out as NULL, they get allocated upon
|
||||
* first use and are never deallocated. This way a low pid_max
|
||||
* value does not cause lots of bitmaps to be allocated, but
|
||||
* the scheme scales to up to 4 million PIDs, runtime.
|
||||
*/
|
||||
struct pid_namespace init_pid_ns = {
|
||||
.kref = {
|
||||
.refcount = ATOMIC_INIT(2),
|
||||
},
|
||||
.pidmap = {
|
||||
[ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL }
|
||||
},
|
||||
.last_pid = 0,
|
||||
.child_reaper = &init_task
|
||||
};
|
||||
|
||||
/*
|
||||
* Note: disable interrupts while the pidmap_lock is held as an
|
||||
* interrupt might come in and do read_lock(&tasklist_lock).
|
||||
*
|
||||
* If we don't disable interrupts there is a nasty deadlock between
|
||||
* detach_pid()->free_pid() and another cpu that does
|
||||
* spin_lock(&pidmap_lock) followed by an interrupt routine that does
|
||||
* read_lock(&tasklist_lock);
|
||||
*
|
||||
* After we clean up the tasklist_lock and know there are no
|
||||
* irq handlers that take it we can leave the interrupts enabled.
|
||||
* For now it is easier to be safe than to prove it can't happen.
|
||||
*/
|
||||
|
||||
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
|
||||
|
||||
static fastcall void free_pidmap(struct pid_namespace *pid_ns, int pid)
|
||||
{
|
||||
struct pidmap *map = pid_ns->pidmap + pid / BITS_PER_PAGE;
|
||||
int offset = pid & BITS_PER_PAGE_MASK;
|
||||
|
||||
clear_bit(offset, map->page);
|
||||
atomic_inc(&map->nr_free);
|
||||
}
|
||||
|
||||
static int alloc_pidmap(struct pid_namespace *pid_ns)
|
||||
{
|
||||
int i, offset, max_scan, pid, last = pid_ns->last_pid;
|
||||
struct pidmap *map;
|
||||
|
||||
pid = last + 1;
|
||||
if (pid >= pid_max)
|
||||
pid = RESERVED_PIDS;
|
||||
offset = pid & BITS_PER_PAGE_MASK;
|
||||
map = &pid_ns->pidmap[pid/BITS_PER_PAGE];
|
||||
max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
|
||||
for (i = 0; i <= max_scan; ++i) {
|
||||
if (unlikely(!map->page)) {
|
||||
void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
|
||||
/*
|
||||
* Free the page if someone raced with us
|
||||
* installing it:
|
||||
*/
|
||||
spin_lock_irq(&pidmap_lock);
|
||||
if (map->page)
|
||||
kfree(page);
|
||||
else
|
||||
map->page = page;
|
||||
spin_unlock_irq(&pidmap_lock);
|
||||
if (unlikely(!map->page))
|
||||
break;
|
||||
}
|
||||
if (likely(atomic_read(&map->nr_free))) {
|
||||
do {
|
||||
if (!test_and_set_bit(offset, map->page)) {
|
||||
atomic_dec(&map->nr_free);
|
||||
pid_ns->last_pid = pid;
|
||||
return pid;
|
||||
}
|
||||
offset = find_next_offset(map, offset);
|
||||
pid = mk_pid(pid_ns, map, offset);
|
||||
/*
|
||||
* find_next_offset() found a bit, the pid from it
|
||||
* is in-bounds, and if we fell back to the last
|
||||
* bitmap block and the final block was the same
|
||||
* as the starting point, pid is before last_pid.
|
||||
*/
|
||||
} while (offset < BITS_PER_PAGE && pid < pid_max &&
|
||||
(i != max_scan || pid < last ||
|
||||
!((last+1) & BITS_PER_PAGE_MASK)));
|
||||
}
|
||||
if (map < &pid_ns->pidmap[(pid_max-1)/BITS_PER_PAGE]) {
|
||||
++map;
|
||||
offset = 0;
|
||||
} else {
|
||||
map = &pid_ns->pidmap[0];
|
||||
offset = RESERVED_PIDS;
|
||||
if (unlikely(last == offset))
|
||||
break;
|
||||
}
|
||||
pid = mk_pid(pid_ns, map, offset);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int next_pidmap(struct pid_namespace *pid_ns, int last)
|
||||
{
|
||||
int offset;
|
||||
struct pidmap *map, *end;
|
||||
|
||||
offset = (last + 1) & BITS_PER_PAGE_MASK;
|
||||
map = &pid_ns->pidmap[(last + 1)/BITS_PER_PAGE];
|
||||
end = &pid_ns->pidmap[PIDMAP_ENTRIES];
|
||||
for (; map < end; map++, offset = 0) {
|
||||
if (unlikely(!map->page))
|
||||
continue;
|
||||
offset = find_next_bit((map)->page, BITS_PER_PAGE, offset);
|
||||
if (offset < BITS_PER_PAGE)
|
||||
return mk_pid(pid_ns, map, offset);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
fastcall void put_pid(struct pid *pid)
|
||||
{
|
||||
if (!pid)
|
||||
return;
|
||||
if ((atomic_read(&pid->count) == 1) ||
|
||||
atomic_dec_and_test(&pid->count))
|
||||
kmem_cache_free(pid_cachep, pid);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(put_pid);
|
||||
|
||||
static void delayed_put_pid(struct rcu_head *rhp)
|
||||
{
|
||||
struct pid *pid = container_of(rhp, struct pid, rcu);
|
||||
put_pid(pid);
|
||||
}
|
||||
|
||||
fastcall void free_pid(struct pid *pid)
|
||||
{
|
||||
/* We can be called with write_lock_irq(&tasklist_lock) held */
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&pidmap_lock, flags);
|
||||
hlist_del_rcu(&pid->pid_chain);
|
||||
spin_unlock_irqrestore(&pidmap_lock, flags);
|
||||
|
||||
free_pidmap(&init_pid_ns, pid->nr);
|
||||
call_rcu(&pid->rcu, delayed_put_pid);
|
||||
}
|
||||
|
||||
struct pid *alloc_pid(void)
|
||||
{
|
||||
struct pid *pid;
|
||||
enum pid_type type;
|
||||
int nr = -1;
|
||||
|
||||
pid = kmem_cache_alloc(pid_cachep, GFP_KERNEL);
|
||||
if (!pid)
|
||||
goto out;
|
||||
|
||||
nr = alloc_pidmap(current->nsproxy->pid_ns);
|
||||
if (nr < 0)
|
||||
goto out_free;
|
||||
|
||||
atomic_set(&pid->count, 1);
|
||||
pid->nr = nr;
|
||||
for (type = 0; type < PIDTYPE_MAX; ++type)
|
||||
INIT_HLIST_HEAD(&pid->tasks[type]);
|
||||
|
||||
spin_lock_irq(&pidmap_lock);
|
||||
hlist_add_head_rcu(&pid->pid_chain, &pid_hash[pid_hashfn(pid->nr)]);
|
||||
spin_unlock_irq(&pidmap_lock);
|
||||
|
||||
out:
|
||||
return pid;
|
||||
|
||||
out_free:
|
||||
kmem_cache_free(pid_cachep, pid);
|
||||
pid = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
struct pid * fastcall find_pid(int nr)
|
||||
{
|
||||
struct hlist_node *elem;
|
||||
struct pid *pid;
|
||||
|
||||
hlist_for_each_entry_rcu(pid, elem,
|
||||
&pid_hash[pid_hashfn(nr)], pid_chain) {
|
||||
if (pid->nr == nr)
|
||||
return pid;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(find_pid);
|
||||
|
||||
int fastcall attach_pid(struct task_struct *task, enum pid_type type, int nr)
|
||||
{
|
||||
struct pid_link *link;
|
||||
struct pid *pid;
|
||||
|
||||
link = &task->pids[type];
|
||||
link->pid = pid = find_pid(nr);
|
||||
hlist_add_head_rcu(&link->node, &pid->tasks[type]);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void fastcall detach_pid(struct task_struct *task, enum pid_type type)
|
||||
{
|
||||
struct pid_link *link;
|
||||
struct pid *pid;
|
||||
int tmp;
|
||||
|
||||
link = &task->pids[type];
|
||||
pid = link->pid;
|
||||
|
||||
hlist_del_rcu(&link->node);
|
||||
link->pid = NULL;
|
||||
|
||||
for (tmp = PIDTYPE_MAX; --tmp >= 0; )
|
||||
if (!hlist_empty(&pid->tasks[tmp]))
|
||||
return;
|
||||
|
||||
free_pid(pid);
|
||||
}
|
||||
|
||||
/* transfer_pid is an optimization of attach_pid(new), detach_pid(old) */
|
||||
void fastcall transfer_pid(struct task_struct *old, struct task_struct *new,
|
||||
enum pid_type type)
|
||||
{
|
||||
new->pids[type].pid = old->pids[type].pid;
|
||||
hlist_replace_rcu(&old->pids[type].node, &new->pids[type].node);
|
||||
old->pids[type].pid = NULL;
|
||||
}
|
||||
|
||||
struct task_struct * fastcall pid_task(struct pid *pid, enum pid_type type)
|
||||
{
|
||||
struct task_struct *result = NULL;
|
||||
if (pid) {
|
||||
struct hlist_node *first;
|
||||
first = rcu_dereference(pid->tasks[type].first);
|
||||
if (first)
|
||||
result = hlist_entry(first, struct task_struct, pids[(type)].node);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Must be called under rcu_read_lock() or with tasklist_lock read-held.
|
||||
*/
|
||||
struct task_struct *find_task_by_pid_type(int type, int nr)
|
||||
{
|
||||
return pid_task(find_pid(nr), type);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(find_task_by_pid_type);
|
||||
|
||||
struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
|
||||
{
|
||||
struct pid *pid;
|
||||
rcu_read_lock();
|
||||
pid = get_pid(task->pids[type].pid);
|
||||
rcu_read_unlock();
|
||||
return pid;
|
||||
}
|
||||
|
||||
struct task_struct *fastcall get_pid_task(struct pid *pid, enum pid_type type)
|
||||
{
|
||||
struct task_struct *result;
|
||||
rcu_read_lock();
|
||||
result = pid_task(pid, type);
|
||||
if (result)
|
||||
get_task_struct(result);
|
||||
rcu_read_unlock();
|
||||
return result;
|
||||
}
|
||||
|
||||
struct pid *find_get_pid(pid_t nr)
|
||||
{
|
||||
struct pid *pid;
|
||||
|
||||
rcu_read_lock();
|
||||
pid = get_pid(find_pid(nr));
|
||||
rcu_read_unlock();
|
||||
|
||||
return pid;
|
||||
}
|
||||
|
||||
/*
|
||||
* Used by proc to find the first pid that is greater then or equal to nr.
|
||||
*
|
||||
* If there is a pid at nr this function is exactly the same as find_pid.
|
||||
*/
|
||||
struct pid *find_ge_pid(int nr)
|
||||
{
|
||||
struct pid *pid;
|
||||
|
||||
do {
|
||||
pid = find_pid(nr);
|
||||
if (pid)
|
||||
break;
|
||||
nr = next_pidmap(current->nsproxy->pid_ns, nr);
|
||||
} while (nr > 0);
|
||||
|
||||
return pid;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(find_get_pid);
|
||||
|
||||
int copy_pid_ns(int flags, struct task_struct *tsk)
|
||||
{
|
||||
struct pid_namespace *old_ns = tsk->nsproxy->pid_ns;
|
||||
int err = 0;
|
||||
|
||||
if (!old_ns)
|
||||
return 0;
|
||||
|
||||
get_pid_ns(old_ns);
|
||||
return err;
|
||||
}
|
||||
|
||||
void free_pid_ns(struct kref *kref)
|
||||
{
|
||||
struct pid_namespace *ns;
|
||||
|
||||
ns = container_of(kref, struct pid_namespace, kref);
|
||||
kfree(ns);
|
||||
}
|
||||
|
||||
/*
|
||||
* The pid hash table is scaled according to the amount of memory in the
|
||||
* machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or
|
||||
* more.
|
||||
*/
|
||||
void __init pidhash_init(void)
|
||||
{
|
||||
int i, pidhash_size;
|
||||
unsigned long megabytes = nr_kernel_pages >> (20 - PAGE_SHIFT);
|
||||
|
||||
pidhash_shift = max(4, fls(megabytes * 4));
|
||||
pidhash_shift = min(12, pidhash_shift);
|
||||
pidhash_size = 1 << pidhash_shift;
|
||||
|
||||
printk("PID hash table entries: %d (order: %d, %Zd bytes)\n",
|
||||
pidhash_size, pidhash_shift,
|
||||
pidhash_size * sizeof(struct hlist_head));
|
||||
|
||||
pid_hash = alloc_bootmem(pidhash_size * sizeof(*(pid_hash)));
|
||||
if (!pid_hash)
|
||||
panic("Could not alloc pidhash!\n");
|
||||
for (i = 0; i < pidhash_size; i++)
|
||||
INIT_HLIST_HEAD(&pid_hash[i]);
|
||||
}
|
||||
|
||||
void __init pidmap_init(void)
|
||||
{
|
||||
init_pid_ns.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
|
||||
/* Reserve PID 0. We never call free_pidmap(0) */
|
||||
set_bit(0, init_pid_ns.pidmap[0].page);
|
||||
atomic_dec(&init_pid_ns.pidmap[0].nr_free);
|
||||
|
||||
pid_cachep = kmem_cache_create("pid", sizeof(struct pid),
|
||||
__alignof__(struct pid),
|
||||
SLAB_PANIC, NULL, NULL);
|
||||
}
|
||||
1629
kernel/posix-cpu-timers.c
Normal file
1629
kernel/posix-cpu-timers.c
Normal file
File diff suppressed because it is too large
Load Diff
996
kernel/posix-timers.c
Normal file
996
kernel/posix-timers.c
Normal file
@@ -0,0 +1,996 @@
|
||||
/*
|
||||
* linux/kernel/posix-timers.c
|
||||
*
|
||||
*
|
||||
* 2002-10-15 Posix Clocks & timers
|
||||
* by George Anzinger george@mvista.com
|
||||
*
|
||||
* Copyright (C) 2002 2003 by MontaVista Software.
|
||||
*
|
||||
* 2004-06-01 Fix CLOCK_REALTIME clock/timer TIMER_ABSTIME bug.
|
||||
* Copyright (C) 2004 Boris Hu
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or (at
|
||||
* your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
*
|
||||
* MontaVista Software | 1237 East Arques Avenue | Sunnyvale | CA 94085 | USA
|
||||
*/
|
||||
|
||||
/* These are all the functions necessary to implement
|
||||
* POSIX clocks & timers
|
||||
*/
|
||||
#include <linux/mm.h>
|
||||
#include <linux/smp_lock.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/time.h>
|
||||
#include <linux/mutex.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/semaphore.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/idr.h>
|
||||
#include <linux/posix-timers.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
/*
|
||||
* Management arrays for POSIX timers. Timers are kept in slab memory
|
||||
* Timer ids are allocated by an external routine that keeps track of the
|
||||
* id and the timer. The external interface is:
|
||||
*
|
||||
* void *idr_find(struct idr *idp, int id); to find timer_id <id>
|
||||
* int idr_get_new(struct idr *idp, void *ptr); to get a new id and
|
||||
* related it to <ptr>
|
||||
* void idr_remove(struct idr *idp, int id); to release <id>
|
||||
* void idr_init(struct idr *idp); to initialize <idp>
|
||||
* which we supply.
|
||||
* The idr_get_new *may* call slab for more memory so it must not be
|
||||
* called under a spin lock. Likewise idr_remore may release memory
|
||||
* (but it may be ok to do this under a lock...).
|
||||
* idr_find is just a memory look up and is quite fast. A -1 return
|
||||
* indicates that the requested id does not exist.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Lets keep our timers in a slab cache :-)
|
||||
*/
|
||||
static struct kmem_cache *posix_timers_cache;
|
||||
static struct idr posix_timers_id;
|
||||
static DEFINE_SPINLOCK(idr_lock);
|
||||
|
||||
/*
|
||||
* we assume that the new SIGEV_THREAD_ID shares no bits with the other
|
||||
* SIGEV values. Here we put out an error if this assumption fails.
|
||||
*/
|
||||
#if SIGEV_THREAD_ID != (SIGEV_THREAD_ID & \
|
||||
~(SIGEV_SIGNAL | SIGEV_NONE | SIGEV_THREAD))
|
||||
#error "SIGEV_THREAD_ID must not share bit with other SIGEV values!"
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* The timer ID is turned into a timer address by idr_find().
|
||||
* Verifying a valid ID consists of:
|
||||
*
|
||||
* a) checking that idr_find() returns other than -1.
|
||||
* b) checking that the timer id matches the one in the timer itself.
|
||||
* c) that the timer owner is in the callers thread group.
|
||||
*/
|
||||
|
||||
/*
|
||||
* CLOCKs: The POSIX standard calls for a couple of clocks and allows us
|
||||
* to implement others. This structure defines the various
|
||||
* clocks and allows the possibility of adding others. We
|
||||
* provide an interface to add clocks to the table and expect
|
||||
* the "arch" code to add at least one clock that is high
|
||||
* resolution. Here we define the standard CLOCK_REALTIME as a
|
||||
* 1/HZ resolution clock.
|
||||
*
|
||||
* RESOLUTION: Clock resolution is used to round up timer and interval
|
||||
* times, NOT to report clock times, which are reported with as
|
||||
* much resolution as the system can muster. In some cases this
|
||||
* resolution may depend on the underlying clock hardware and
|
||||
* may not be quantifiable until run time, and only then is the
|
||||
* necessary code is written. The standard says we should say
|
||||
* something about this issue in the documentation...
|
||||
*
|
||||
* FUNCTIONS: The CLOCKs structure defines possible functions to handle
|
||||
* various clock functions. For clocks that use the standard
|
||||
* system timer code these entries should be NULL. This will
|
||||
* allow dispatch without the overhead of indirect function
|
||||
* calls. CLOCKS that depend on other sources (e.g. WWV or GPS)
|
||||
* must supply functions here, even if the function just returns
|
||||
* ENOSYS. The standard POSIX timer management code assumes the
|
||||
* following: 1.) The k_itimer struct (sched.h) is used for the
|
||||
* timer. 2.) The list, it_lock, it_clock, it_id and it_process
|
||||
* fields are not modified by timer code.
|
||||
*
|
||||
* At this time all functions EXCEPT clock_nanosleep can be
|
||||
* redirected by the CLOCKS structure. Clock_nanosleep is in
|
||||
* there, but the code ignores it.
|
||||
*
|
||||
* Permissions: It is assumed that the clock_settime() function defined
|
||||
* for each clock will take care of permission checks. Some
|
||||
* clocks may be set able by any user (i.e. local process
|
||||
* clocks) others not. Currently the only set able clock we
|
||||
* have is CLOCK_REALTIME and its high res counter part, both of
|
||||
* which we beg off on and pass to do_sys_settimeofday().
|
||||
*/
|
||||
|
||||
static struct k_clock posix_clocks[MAX_CLOCKS];
|
||||
|
||||
/*
|
||||
* These ones are defined below.
|
||||
*/
|
||||
static int common_nsleep(const clockid_t, int flags, struct timespec *t,
|
||||
struct timespec __user *rmtp);
|
||||
static void common_timer_get(struct k_itimer *, struct itimerspec *);
|
||||
static int common_timer_set(struct k_itimer *, int,
|
||||
struct itimerspec *, struct itimerspec *);
|
||||
static int common_timer_del(struct k_itimer *timer);
|
||||
|
||||
static enum hrtimer_restart posix_timer_fn(struct hrtimer *data);
|
||||
|
||||
static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags);
|
||||
|
||||
static inline void unlock_timer(struct k_itimer *timr, unsigned long flags)
|
||||
{
|
||||
spin_unlock_irqrestore(&timr->it_lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Call the k_clock hook function if non-null, or the default function.
|
||||
*/
|
||||
#define CLOCK_DISPATCH(clock, call, arglist) \
|
||||
((clock) < 0 ? posix_cpu_##call arglist : \
|
||||
(posix_clocks[clock].call != NULL \
|
||||
? (*posix_clocks[clock].call) arglist : common_##call arglist))
|
||||
|
||||
/*
|
||||
* Default clock hook functions when the struct k_clock passed
|
||||
* to register_posix_clock leaves a function pointer null.
|
||||
*
|
||||
* The function common_CALL is the default implementation for
|
||||
* the function pointer CALL in struct k_clock.
|
||||
*/
|
||||
|
||||
static inline int common_clock_getres(const clockid_t which_clock,
|
||||
struct timespec *tp)
|
||||
{
|
||||
tp->tv_sec = 0;
|
||||
tp->tv_nsec = posix_clocks[which_clock].res;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get real time for posix timers
|
||||
*/
|
||||
static int common_clock_get(clockid_t which_clock, struct timespec *tp)
|
||||
{
|
||||
ktime_get_real_ts(tp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int common_clock_set(const clockid_t which_clock,
|
||||
struct timespec *tp)
|
||||
{
|
||||
return do_sys_settimeofday(tp, NULL);
|
||||
}
|
||||
|
||||
static int common_timer_create(struct k_itimer *new_timer)
|
||||
{
|
||||
hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return nonzero if we know a priori this clockid_t value is bogus.
|
||||
*/
|
||||
static inline int invalid_clockid(const clockid_t which_clock)
|
||||
{
|
||||
if (which_clock < 0) /* CPU clock, posix_cpu_* will check it */
|
||||
return 0;
|
||||
if ((unsigned) which_clock >= MAX_CLOCKS)
|
||||
return 1;
|
||||
if (posix_clocks[which_clock].clock_getres != NULL)
|
||||
return 0;
|
||||
if (posix_clocks[which_clock].res != 0)
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get monotonic time for posix timers
|
||||
*/
|
||||
static int posix_ktime_get_ts(clockid_t which_clock, struct timespec *tp)
|
||||
{
|
||||
ktime_get_ts(tp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize everything, well, just everything in Posix clocks/timers ;)
|
||||
*/
|
||||
static __init int init_posix_timers(void)
|
||||
{
|
||||
struct k_clock clock_realtime = {
|
||||
.clock_getres = hrtimer_get_res,
|
||||
};
|
||||
struct k_clock clock_monotonic = {
|
||||
.clock_getres = hrtimer_get_res,
|
||||
.clock_get = posix_ktime_get_ts,
|
||||
.clock_set = do_posix_clock_nosettime,
|
||||
};
|
||||
|
||||
register_posix_clock(CLOCK_REALTIME, &clock_realtime);
|
||||
register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic);
|
||||
|
||||
posix_timers_cache = kmem_cache_create("posix_timers_cache",
|
||||
sizeof (struct k_itimer), 0, 0, NULL, NULL);
|
||||
idr_init(&posix_timers_id);
|
||||
return 0;
|
||||
}
|
||||
|
||||
__initcall(init_posix_timers);
|
||||
|
||||
static void schedule_next_timer(struct k_itimer *timr)
|
||||
{
|
||||
struct hrtimer *timer = &timr->it.real.timer;
|
||||
|
||||
if (timr->it.real.interval.tv64 == 0)
|
||||
return;
|
||||
|
||||
timr->it_overrun += hrtimer_forward(timer, timer->base->get_time(),
|
||||
timr->it.real.interval);
|
||||
|
||||
timr->it_overrun_last = timr->it_overrun;
|
||||
timr->it_overrun = -1;
|
||||
++timr->it_requeue_pending;
|
||||
hrtimer_restart(timer);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is exported for use by the signal deliver code. It is
|
||||
* called just prior to the info block being released and passes that
|
||||
* block to us. It's function is to update the overrun entry AND to
|
||||
* restart the timer. It should only be called if the timer is to be
|
||||
* restarted (i.e. we have flagged this in the sys_private entry of the
|
||||
* info block).
|
||||
*
|
||||
* To protect aginst the timer going away while the interrupt is queued,
|
||||
* we require that the it_requeue_pending flag be set.
|
||||
*/
|
||||
void do_schedule_next_timer(struct siginfo *info)
|
||||
{
|
||||
struct k_itimer *timr;
|
||||
unsigned long flags;
|
||||
|
||||
timr = lock_timer(info->si_tid, &flags);
|
||||
|
||||
if (timr && timr->it_requeue_pending == info->si_sys_private) {
|
||||
if (timr->it_clock < 0)
|
||||
posix_cpu_timer_schedule(timr);
|
||||
else
|
||||
schedule_next_timer(timr);
|
||||
|
||||
info->si_overrun = timr->it_overrun_last;
|
||||
}
|
||||
|
||||
if (timr)
|
||||
unlock_timer(timr, flags);
|
||||
}
|
||||
|
||||
int posix_timer_event(struct k_itimer *timr,int si_private)
|
||||
{
|
||||
memset(&timr->sigq->info, 0, sizeof(siginfo_t));
|
||||
timr->sigq->info.si_sys_private = si_private;
|
||||
/* Send signal to the process that owns this timer.*/
|
||||
|
||||
timr->sigq->info.si_signo = timr->it_sigev_signo;
|
||||
timr->sigq->info.si_errno = 0;
|
||||
timr->sigq->info.si_code = SI_TIMER;
|
||||
timr->sigq->info.si_tid = timr->it_id;
|
||||
timr->sigq->info.si_value = timr->it_sigev_value;
|
||||
|
||||
if (timr->it_sigev_notify & SIGEV_THREAD_ID) {
|
||||
struct task_struct *leader;
|
||||
int ret = send_sigqueue(timr->it_sigev_signo, timr->sigq,
|
||||
timr->it_process);
|
||||
|
||||
if (likely(ret >= 0))
|
||||
return ret;
|
||||
|
||||
timr->it_sigev_notify = SIGEV_SIGNAL;
|
||||
leader = timr->it_process->group_leader;
|
||||
put_task_struct(timr->it_process);
|
||||
timr->it_process = leader;
|
||||
}
|
||||
|
||||
return send_group_sigqueue(timr->it_sigev_signo, timr->sigq,
|
||||
timr->it_process);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(posix_timer_event);
|
||||
|
||||
/*
|
||||
* This function gets called when a POSIX.1b interval timer expires. It
|
||||
* is used as a callback from the kernel internal timer. The
|
||||
* run_timer_list code ALWAYS calls with interrupts on.
|
||||
|
||||
* This code is for CLOCK_REALTIME* and CLOCK_MONOTONIC* timers.
|
||||
*/
|
||||
static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
|
||||
{
|
||||
struct k_itimer *timr;
|
||||
unsigned long flags;
|
||||
int si_private = 0;
|
||||
enum hrtimer_restart ret = HRTIMER_NORESTART;
|
||||
|
||||
timr = container_of(timer, struct k_itimer, it.real.timer);
|
||||
spin_lock_irqsave(&timr->it_lock, flags);
|
||||
|
||||
if (timr->it.real.interval.tv64 != 0)
|
||||
si_private = ++timr->it_requeue_pending;
|
||||
|
||||
if (posix_timer_event(timr, si_private)) {
|
||||
/*
|
||||
* signal was not sent because of sig_ignor
|
||||
* we will not get a call back to restart it AND
|
||||
* it should be restarted.
|
||||
*/
|
||||
if (timr->it.real.interval.tv64 != 0) {
|
||||
timr->it_overrun +=
|
||||
hrtimer_forward(timer,
|
||||
hrtimer_cb_get_time(timer),
|
||||
timr->it.real.interval);
|
||||
ret = HRTIMER_RESTART;
|
||||
++timr->it_requeue_pending;
|
||||
}
|
||||
}
|
||||
|
||||
unlock_timer(timr, flags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct task_struct * good_sigevent(sigevent_t * event)
|
||||
{
|
||||
struct task_struct *rtn = current->group_leader;
|
||||
|
||||
if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
|
||||
(!(rtn = find_task_by_pid(event->sigev_notify_thread_id)) ||
|
||||
rtn->tgid != current->tgid ||
|
||||
(event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL))
|
||||
return NULL;
|
||||
|
||||
if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) &&
|
||||
((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX)))
|
||||
return NULL;
|
||||
|
||||
return rtn;
|
||||
}
|
||||
|
||||
void register_posix_clock(const clockid_t clock_id, struct k_clock *new_clock)
|
||||
{
|
||||
if ((unsigned) clock_id >= MAX_CLOCKS) {
|
||||
printk("POSIX clock register failed for clock_id %d\n",
|
||||
clock_id);
|
||||
return;
|
||||
}
|
||||
|
||||
posix_clocks[clock_id] = *new_clock;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(register_posix_clock);
|
||||
|
||||
static struct k_itimer * alloc_posix_timer(void)
|
||||
{
|
||||
struct k_itimer *tmr;
|
||||
tmr = kmem_cache_zalloc(posix_timers_cache, GFP_KERNEL);
|
||||
if (!tmr)
|
||||
return tmr;
|
||||
if (unlikely(!(tmr->sigq = sigqueue_alloc()))) {
|
||||
kmem_cache_free(posix_timers_cache, tmr);
|
||||
tmr = NULL;
|
||||
}
|
||||
return tmr;
|
||||
}
|
||||
|
||||
#define IT_ID_SET 1
|
||||
#define IT_ID_NOT_SET 0
|
||||
static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
|
||||
{
|
||||
if (it_id_set) {
|
||||
unsigned long flags;
|
||||
spin_lock_irqsave(&idr_lock, flags);
|
||||
idr_remove(&posix_timers_id, tmr->it_id);
|
||||
spin_unlock_irqrestore(&idr_lock, flags);
|
||||
}
|
||||
sigqueue_free(tmr->sigq);
|
||||
if (unlikely(tmr->it_process) &&
|
||||
tmr->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
|
||||
put_task_struct(tmr->it_process);
|
||||
kmem_cache_free(posix_timers_cache, tmr);
|
||||
}
|
||||
|
||||
/* Create a POSIX.1b interval timer. */
|
||||
|
||||
asmlinkage long
|
||||
sys_timer_create(const clockid_t which_clock,
|
||||
struct sigevent __user *timer_event_spec,
|
||||
timer_t __user * created_timer_id)
|
||||
{
|
||||
int error = 0;
|
||||
struct k_itimer *new_timer = NULL;
|
||||
int new_timer_id;
|
||||
struct task_struct *process = NULL;
|
||||
unsigned long flags;
|
||||
sigevent_t event;
|
||||
int it_id_set = IT_ID_NOT_SET;
|
||||
|
||||
if (invalid_clockid(which_clock))
|
||||
return -EINVAL;
|
||||
|
||||
new_timer = alloc_posix_timer();
|
||||
if (unlikely(!new_timer))
|
||||
return -EAGAIN;
|
||||
|
||||
spin_lock_init(&new_timer->it_lock);
|
||||
retry:
|
||||
if (unlikely(!idr_pre_get(&posix_timers_id, GFP_KERNEL))) {
|
||||
error = -EAGAIN;
|
||||
goto out;
|
||||
}
|
||||
spin_lock_irq(&idr_lock);
|
||||
error = idr_get_new(&posix_timers_id, (void *) new_timer,
|
||||
&new_timer_id);
|
||||
spin_unlock_irq(&idr_lock);
|
||||
if (error == -EAGAIN)
|
||||
goto retry;
|
||||
else if (error) {
|
||||
/*
|
||||
* Wierd looking, but we return EAGAIN if the IDR is
|
||||
* full (proper POSIX return value for this)
|
||||
*/
|
||||
error = -EAGAIN;
|
||||
goto out;
|
||||
}
|
||||
|
||||
it_id_set = IT_ID_SET;
|
||||
new_timer->it_id = (timer_t) new_timer_id;
|
||||
new_timer->it_clock = which_clock;
|
||||
new_timer->it_overrun = -1;
|
||||
error = CLOCK_DISPATCH(which_clock, timer_create, (new_timer));
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* return the timer_id now. The next step is hard to
|
||||
* back out if there is an error.
|
||||
*/
|
||||
if (copy_to_user(created_timer_id,
|
||||
&new_timer_id, sizeof (new_timer_id))) {
|
||||
error = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
if (timer_event_spec) {
|
||||
if (copy_from_user(&event, timer_event_spec, sizeof (event))) {
|
||||
error = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
new_timer->it_sigev_notify = event.sigev_notify;
|
||||
new_timer->it_sigev_signo = event.sigev_signo;
|
||||
new_timer->it_sigev_value = event.sigev_value;
|
||||
|
||||
read_lock(&tasklist_lock);
|
||||
if ((process = good_sigevent(&event))) {
|
||||
/*
|
||||
* We may be setting up this process for another
|
||||
* thread. It may be exiting. To catch this
|
||||
* case the we check the PF_EXITING flag. If
|
||||
* the flag is not set, the siglock will catch
|
||||
* him before it is too late (in exit_itimers).
|
||||
*
|
||||
* The exec case is a bit more invloved but easy
|
||||
* to code. If the process is in our thread
|
||||
* group (and it must be or we would not allow
|
||||
* it here) and is doing an exec, it will cause
|
||||
* us to be killed. In this case it will wait
|
||||
* for us to die which means we can finish this
|
||||
* linkage with our last gasp. I.e. no code :)
|
||||
*/
|
||||
spin_lock_irqsave(&process->sighand->siglock, flags);
|
||||
if (!(process->flags & PF_EXITING)) {
|
||||
new_timer->it_process = process;
|
||||
list_add(&new_timer->list,
|
||||
&process->signal->posix_timers);
|
||||
spin_unlock_irqrestore(&process->sighand->siglock, flags);
|
||||
if (new_timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
|
||||
get_task_struct(process);
|
||||
} else {
|
||||
spin_unlock_irqrestore(&process->sighand->siglock, flags);
|
||||
process = NULL;
|
||||
}
|
||||
}
|
||||
read_unlock(&tasklist_lock);
|
||||
if (!process) {
|
||||
error = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
new_timer->it_sigev_notify = SIGEV_SIGNAL;
|
||||
new_timer->it_sigev_signo = SIGALRM;
|
||||
new_timer->it_sigev_value.sival_int = new_timer->it_id;
|
||||
process = current->group_leader;
|
||||
spin_lock_irqsave(&process->sighand->siglock, flags);
|
||||
new_timer->it_process = process;
|
||||
list_add(&new_timer->list, &process->signal->posix_timers);
|
||||
spin_unlock_irqrestore(&process->sighand->siglock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* In the case of the timer belonging to another task, after
|
||||
* the task is unlocked, the timer is owned by the other task
|
||||
* and may cease to exist at any time. Don't use or modify
|
||||
* new_timer after the unlock call.
|
||||
*/
|
||||
|
||||
out:
|
||||
if (error)
|
||||
release_posix_timer(new_timer, it_id_set);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Locking issues: We need to protect the result of the id look up until
|
||||
* we get the timer locked down so it is not deleted under us. The
|
||||
* removal is done under the idr spinlock so we use that here to bridge
|
||||
* the find to the timer lock. To avoid a dead lock, the timer id MUST
|
||||
* be release with out holding the timer lock.
|
||||
*/
|
||||
static struct k_itimer * lock_timer(timer_t timer_id, unsigned long *flags)
|
||||
{
|
||||
struct k_itimer *timr;
|
||||
/*
|
||||
* Watch out here. We do a irqsave on the idr_lock and pass the
|
||||
* flags part over to the timer lock. Must not let interrupts in
|
||||
* while we are moving the lock.
|
||||
*/
|
||||
|
||||
spin_lock_irqsave(&idr_lock, *flags);
|
||||
timr = (struct k_itimer *) idr_find(&posix_timers_id, (int) timer_id);
|
||||
if (timr) {
|
||||
spin_lock(&timr->it_lock);
|
||||
spin_unlock(&idr_lock);
|
||||
|
||||
if ((timr->it_id != timer_id) || !(timr->it_process) ||
|
||||
timr->it_process->tgid != current->tgid) {
|
||||
unlock_timer(timr, *flags);
|
||||
timr = NULL;
|
||||
}
|
||||
} else
|
||||
spin_unlock_irqrestore(&idr_lock, *flags);
|
||||
|
||||
return timr;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the time remaining on a POSIX.1b interval timer. This function
|
||||
* is ALWAYS called with spin_lock_irq on the timer, thus it must not
|
||||
* mess with irq.
|
||||
*
|
||||
* We have a couple of messes to clean up here. First there is the case
|
||||
* of a timer that has a requeue pending. These timers should appear to
|
||||
* be in the timer list with an expiry as if we were to requeue them
|
||||
* now.
|
||||
*
|
||||
* The second issue is the SIGEV_NONE timer which may be active but is
|
||||
* not really ever put in the timer list (to save system resources).
|
||||
* This timer may be expired, and if so, we will do it here. Otherwise
|
||||
* it is the same as a requeue pending timer WRT to what we should
|
||||
* report.
|
||||
*/
|
||||
static void
|
||||
common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting)
|
||||
{
|
||||
ktime_t now, remaining, iv;
|
||||
struct hrtimer *timer = &timr->it.real.timer;
|
||||
|
||||
memset(cur_setting, 0, sizeof(struct itimerspec));
|
||||
|
||||
iv = timr->it.real.interval;
|
||||
|
||||
/* interval timer ? */
|
||||
if (iv.tv64)
|
||||
cur_setting->it_interval = ktime_to_timespec(iv);
|
||||
else if (!hrtimer_active(timer) &&
|
||||
(timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE)
|
||||
return;
|
||||
|
||||
now = timer->base->get_time();
|
||||
|
||||
/*
|
||||
* When a requeue is pending or this is a SIGEV_NONE
|
||||
* timer move the expiry time forward by intervals, so
|
||||
* expiry is > now.
|
||||
*/
|
||||
if (iv.tv64 && (timr->it_requeue_pending & REQUEUE_PENDING ||
|
||||
(timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE))
|
||||
timr->it_overrun += hrtimer_forward(timer, now, iv);
|
||||
|
||||
remaining = ktime_sub(timer->expires, now);
|
||||
/* Return 0 only, when the timer is expired and not pending */
|
||||
if (remaining.tv64 <= 0) {
|
||||
/*
|
||||
* A single shot SIGEV_NONE timer must return 0, when
|
||||
* it is expired !
|
||||
*/
|
||||
if ((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE)
|
||||
cur_setting->it_value.tv_nsec = 1;
|
||||
} else
|
||||
cur_setting->it_value = ktime_to_timespec(remaining);
|
||||
}
|
||||
|
||||
/* Get the time remaining on a POSIX.1b interval timer. */
|
||||
asmlinkage long
|
||||
sys_timer_gettime(timer_t timer_id, struct itimerspec __user *setting)
|
||||
{
|
||||
struct k_itimer *timr;
|
||||
struct itimerspec cur_setting;
|
||||
unsigned long flags;
|
||||
|
||||
timr = lock_timer(timer_id, &flags);
|
||||
if (!timr)
|
||||
return -EINVAL;
|
||||
|
||||
CLOCK_DISPATCH(timr->it_clock, timer_get, (timr, &cur_setting));
|
||||
|
||||
unlock_timer(timr, flags);
|
||||
|
||||
if (copy_to_user(setting, &cur_setting, sizeof (cur_setting)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the number of overruns of a POSIX.1b interval timer. This is to
|
||||
* be the overrun of the timer last delivered. At the same time we are
|
||||
* accumulating overruns on the next timer. The overrun is frozen when
|
||||
* the signal is delivered, either at the notify time (if the info block
|
||||
* is not queued) or at the actual delivery time (as we are informed by
|
||||
* the call back to do_schedule_next_timer(). So all we need to do is
|
||||
* to pick up the frozen overrun.
|
||||
*/
|
||||
asmlinkage long
|
||||
sys_timer_getoverrun(timer_t timer_id)
|
||||
{
|
||||
struct k_itimer *timr;
|
||||
int overrun;
|
||||
long flags;
|
||||
|
||||
timr = lock_timer(timer_id, &flags);
|
||||
if (!timr)
|
||||
return -EINVAL;
|
||||
|
||||
overrun = timr->it_overrun_last;
|
||||
unlock_timer(timr, flags);
|
||||
|
||||
return overrun;
|
||||
}
|
||||
|
||||
/* Set a POSIX.1b interval timer. */
|
||||
/* timr->it_lock is taken. */
|
||||
static int
|
||||
common_timer_set(struct k_itimer *timr, int flags,
|
||||
struct itimerspec *new_setting, struct itimerspec *old_setting)
|
||||
{
|
||||
struct hrtimer *timer = &timr->it.real.timer;
|
||||
enum hrtimer_mode mode;
|
||||
|
||||
if (old_setting)
|
||||
common_timer_get(timr, old_setting);
|
||||
|
||||
/* disable the timer */
|
||||
timr->it.real.interval.tv64 = 0;
|
||||
/*
|
||||
* careful here. If smp we could be in the "fire" routine which will
|
||||
* be spinning as we hold the lock. But this is ONLY an SMP issue.
|
||||
*/
|
||||
if (hrtimer_try_to_cancel(timer) < 0)
|
||||
return TIMER_RETRY;
|
||||
|
||||
timr->it_requeue_pending = (timr->it_requeue_pending + 2) &
|
||||
~REQUEUE_PENDING;
|
||||
timr->it_overrun_last = 0;
|
||||
|
||||
/* switch off the timer when it_value is zero */
|
||||
if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec)
|
||||
return 0;
|
||||
|
||||
mode = flags & TIMER_ABSTIME ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL;
|
||||
hrtimer_init(&timr->it.real.timer, timr->it_clock, mode);
|
||||
timr->it.real.timer.function = posix_timer_fn;
|
||||
|
||||
timer->expires = timespec_to_ktime(new_setting->it_value);
|
||||
|
||||
/* Convert interval */
|
||||
timr->it.real.interval = timespec_to_ktime(new_setting->it_interval);
|
||||
|
||||
/* SIGEV_NONE timers are not queued ! See common_timer_get */
|
||||
if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) {
|
||||
/* Setup correct expiry time for relative timers */
|
||||
if (mode == HRTIMER_MODE_REL)
|
||||
timer->expires = ktime_add(timer->expires,
|
||||
timer->base->get_time());
|
||||
return 0;
|
||||
}
|
||||
|
||||
hrtimer_start(timer, timer->expires, mode);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Set a POSIX.1b interval timer */
|
||||
asmlinkage long
|
||||
sys_timer_settime(timer_t timer_id, int flags,
|
||||
const struct itimerspec __user *new_setting,
|
||||
struct itimerspec __user *old_setting)
|
||||
{
|
||||
struct k_itimer *timr;
|
||||
struct itimerspec new_spec, old_spec;
|
||||
int error = 0;
|
||||
long flag;
|
||||
struct itimerspec *rtn = old_setting ? &old_spec : NULL;
|
||||
|
||||
if (!new_setting)
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(&new_spec, new_setting, sizeof (new_spec)))
|
||||
return -EFAULT;
|
||||
|
||||
if (!timespec_valid(&new_spec.it_interval) ||
|
||||
!timespec_valid(&new_spec.it_value))
|
||||
return -EINVAL;
|
||||
retry:
|
||||
timr = lock_timer(timer_id, &flag);
|
||||
if (!timr)
|
||||
return -EINVAL;
|
||||
|
||||
error = CLOCK_DISPATCH(timr->it_clock, timer_set,
|
||||
(timr, flags, &new_spec, rtn));
|
||||
|
||||
unlock_timer(timr, flag);
|
||||
if (error == TIMER_RETRY) {
|
||||
rtn = NULL; // We already got the old time...
|
||||
goto retry;
|
||||
}
|
||||
|
||||
if (old_setting && !error &&
|
||||
copy_to_user(old_setting, &old_spec, sizeof (old_spec)))
|
||||
error = -EFAULT;
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
static inline int common_timer_del(struct k_itimer *timer)
|
||||
{
|
||||
timer->it.real.interval.tv64 = 0;
|
||||
|
||||
if (hrtimer_try_to_cancel(&timer->it.real.timer) < 0)
|
||||
return TIMER_RETRY;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int timer_delete_hook(struct k_itimer *timer)
|
||||
{
|
||||
return CLOCK_DISPATCH(timer->it_clock, timer_del, (timer));
|
||||
}
|
||||
|
||||
/* Delete a POSIX.1b interval timer. */
|
||||
asmlinkage long
|
||||
sys_timer_delete(timer_t timer_id)
|
||||
{
|
||||
struct k_itimer *timer;
|
||||
long flags;
|
||||
|
||||
retry_delete:
|
||||
timer = lock_timer(timer_id, &flags);
|
||||
if (!timer)
|
||||
return -EINVAL;
|
||||
|
||||
if (timer_delete_hook(timer) == TIMER_RETRY) {
|
||||
unlock_timer(timer, flags);
|
||||
goto retry_delete;
|
||||
}
|
||||
|
||||
spin_lock(¤t->sighand->siglock);
|
||||
list_del(&timer->list);
|
||||
spin_unlock(¤t->sighand->siglock);
|
||||
/*
|
||||
* This keeps any tasks waiting on the spin lock from thinking
|
||||
* they got something (see the lock code above).
|
||||
*/
|
||||
if (timer->it_process) {
|
||||
if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
|
||||
put_task_struct(timer->it_process);
|
||||
timer->it_process = NULL;
|
||||
}
|
||||
unlock_timer(timer, flags);
|
||||
release_posix_timer(timer, IT_ID_SET);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* return timer owned by the process, used by exit_itimers
|
||||
*/
|
||||
static void itimer_delete(struct k_itimer *timer)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
retry_delete:
|
||||
spin_lock_irqsave(&timer->it_lock, flags);
|
||||
|
||||
if (timer_delete_hook(timer) == TIMER_RETRY) {
|
||||
unlock_timer(timer, flags);
|
||||
goto retry_delete;
|
||||
}
|
||||
list_del(&timer->list);
|
||||
/*
|
||||
* This keeps any tasks waiting on the spin lock from thinking
|
||||
* they got something (see the lock code above).
|
||||
*/
|
||||
if (timer->it_process) {
|
||||
if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
|
||||
put_task_struct(timer->it_process);
|
||||
timer->it_process = NULL;
|
||||
}
|
||||
unlock_timer(timer, flags);
|
||||
release_posix_timer(timer, IT_ID_SET);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is called by do_exit or de_thread, only when there are no more
|
||||
* references to the shared signal_struct.
|
||||
*/
|
||||
void exit_itimers(struct signal_struct *sig)
|
||||
{
|
||||
struct k_itimer *tmr;
|
||||
|
||||
while (!list_empty(&sig->posix_timers)) {
|
||||
tmr = list_entry(sig->posix_timers.next, struct k_itimer, list);
|
||||
itimer_delete(tmr);
|
||||
}
|
||||
}
|
||||
|
||||
/* Not available / possible... functions */
|
||||
int do_posix_clock_nosettime(const clockid_t clockid, struct timespec *tp)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(do_posix_clock_nosettime);
|
||||
|
||||
int do_posix_clock_nonanosleep(const clockid_t clock, int flags,
|
||||
struct timespec *t, struct timespec __user *r)
|
||||
{
|
||||
#ifndef ENOTSUP
|
||||
return -EOPNOTSUPP; /* aka ENOTSUP in userland for POSIX */
|
||||
#else /* parisc does define it separately. */
|
||||
return -ENOTSUP;
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(do_posix_clock_nonanosleep);
|
||||
|
||||
asmlinkage long sys_clock_settime(const clockid_t which_clock,
|
||||
const struct timespec __user *tp)
|
||||
{
|
||||
struct timespec new_tp;
|
||||
|
||||
if (invalid_clockid(which_clock))
|
||||
return -EINVAL;
|
||||
if (copy_from_user(&new_tp, tp, sizeof (*tp)))
|
||||
return -EFAULT;
|
||||
|
||||
return CLOCK_DISPATCH(which_clock, clock_set, (which_clock, &new_tp));
|
||||
}
|
||||
|
||||
asmlinkage long
|
||||
sys_clock_gettime(const clockid_t which_clock, struct timespec __user *tp)
|
||||
{
|
||||
struct timespec kernel_tp;
|
||||
int error;
|
||||
|
||||
if (invalid_clockid(which_clock))
|
||||
return -EINVAL;
|
||||
error = CLOCK_DISPATCH(which_clock, clock_get,
|
||||
(which_clock, &kernel_tp));
|
||||
if (!error && copy_to_user(tp, &kernel_tp, sizeof (kernel_tp)))
|
||||
error = -EFAULT;
|
||||
|
||||
return error;
|
||||
|
||||
}
|
||||
|
||||
asmlinkage long
|
||||
sys_clock_getres(const clockid_t which_clock, struct timespec __user *tp)
|
||||
{
|
||||
struct timespec rtn_tp;
|
||||
int error;
|
||||
|
||||
if (invalid_clockid(which_clock))
|
||||
return -EINVAL;
|
||||
|
||||
error = CLOCK_DISPATCH(which_clock, clock_getres,
|
||||
(which_clock, &rtn_tp));
|
||||
|
||||
if (!error && tp && copy_to_user(tp, &rtn_tp, sizeof (rtn_tp))) {
|
||||
error = -EFAULT;
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* nanosleep for monotonic and realtime clocks
|
||||
*/
|
||||
static int common_nsleep(const clockid_t which_clock, int flags,
|
||||
struct timespec *tsave, struct timespec __user *rmtp)
|
||||
{
|
||||
return hrtimer_nanosleep(tsave, rmtp, flags & TIMER_ABSTIME ?
|
||||
HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
|
||||
which_clock);
|
||||
}
|
||||
|
||||
asmlinkage long
|
||||
sys_clock_nanosleep(const clockid_t which_clock, int flags,
|
||||
const struct timespec __user *rqtp,
|
||||
struct timespec __user *rmtp)
|
||||
{
|
||||
struct timespec t;
|
||||
|
||||
if (invalid_clockid(which_clock))
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(&t, rqtp, sizeof (struct timespec)))
|
||||
return -EFAULT;
|
||||
|
||||
if (!timespec_valid(&t))
|
||||
return -EINVAL;
|
||||
|
||||
return CLOCK_DISPATCH(which_clock, nsleep,
|
||||
(which_clock, flags, &t, rmtp));
|
||||
}
|
||||
|
||||
/*
|
||||
* nanosleep_restart for monotonic and realtime clocks
|
||||
*/
|
||||
static int common_nsleep_restart(struct restart_block *restart_block)
|
||||
{
|
||||
return hrtimer_nanosleep_restart(restart_block);
|
||||
}
|
||||
|
||||
/*
|
||||
* This will restart clock_nanosleep. This is required only by
|
||||
* compat_clock_nanosleep_restart for now.
|
||||
*/
|
||||
long
|
||||
clock_nanosleep_restart(struct restart_block *restart_block)
|
||||
{
|
||||
clockid_t which_clock = restart_block->arg0;
|
||||
|
||||
return CLOCK_DISPATCH(which_clock, nsleep_restart,
|
||||
(restart_block));
|
||||
}
|
||||
173
kernel/power/Kconfig
Normal file
173
kernel/power/Kconfig
Normal file
@@ -0,0 +1,173 @@
|
||||
config PM
|
||||
bool "Power Management support"
|
||||
depends on !IA64_HP_SIM
|
||||
---help---
|
||||
"Power Management" means that parts of your computer are shut
|
||||
off or put into a power conserving "sleep" mode if they are not
|
||||
being used. There are two competing standards for doing this: APM
|
||||
and ACPI. If you want to use either one, say Y here and then also
|
||||
to the requisite support below.
|
||||
|
||||
Power Management is most important for battery powered laptop
|
||||
computers; if you have a laptop, check out the Linux Laptop home
|
||||
page on the WWW at <http://www.linux-on-laptops.com/> or
|
||||
Tuxmobil - Linux on Mobile Computers at <http://www.tuxmobil.org/>
|
||||
and the Battery Powered Linux mini-HOWTO, available from
|
||||
<http://www.tldp.org/docs.html#howto>.
|
||||
|
||||
Note that, even if you say N here, Linux on the x86 architecture
|
||||
will issue the hlt instruction if nothing is to be done, thereby
|
||||
sending the processor to sleep and saving power.
|
||||
|
||||
config PM_LEGACY
|
||||
bool "Legacy Power Management API (DEPRECATED)"
|
||||
depends on PM
|
||||
default n
|
||||
---help---
|
||||
Support for pm_register() and friends. This old API is obsoleted
|
||||
by the driver model.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config PM_CPU_MODE
|
||||
bool "PM_CPU_MODE"
|
||||
depends on PM
|
||||
default y
|
||||
---help---
|
||||
This is for the power consumption of CPU mode
|
||||
|
||||
config PM_DEBUG
|
||||
bool "Power Management Debug Support"
|
||||
depends on PM
|
||||
---help---
|
||||
This option enables verbose debugging support in the Power Management
|
||||
code. This is helpful when debugging and reporting various PM bugs,
|
||||
like suspend support.
|
||||
|
||||
config DISABLE_CONSOLE_SUSPEND
|
||||
bool "Keep console(s) enabled during suspend/resume (DANGEROUS)"
|
||||
depends on PM && PM_DEBUG
|
||||
default n
|
||||
---help---
|
||||
This option turns off the console suspend mechanism that prevents
|
||||
debug messages from reaching the console during the suspend/resume
|
||||
operations. This may be helpful when debugging device drivers'
|
||||
suspend/resume routines, but may itself lead to problems, for example
|
||||
if netconsole is used.
|
||||
|
||||
config PM_TRACE
|
||||
bool "Suspend/resume event tracing"
|
||||
depends on PM && PM_DEBUG && X86_32 && EXPERIMENTAL
|
||||
default n
|
||||
---help---
|
||||
This enables some cheesy code to save the last PM event point in the
|
||||
RTC across reboots, so that you can debug a machine that just hangs
|
||||
during suspend (or more commonly, during resume).
|
||||
|
||||
To use this debugging feature you should attempt to suspend the machine,
|
||||
then reboot it, then run
|
||||
|
||||
dmesg -s 1000000 | grep 'hash matches'
|
||||
|
||||
CAUTION: this option will cause your machine's real-time clock to be
|
||||
set to an invalid time after a resume.
|
||||
|
||||
config PM_SYSFS_DEPRECATED
|
||||
bool "Driver model /sys/devices/.../power/state files (DEPRECATED)"
|
||||
depends on PM && SYSFS
|
||||
default n
|
||||
help
|
||||
The driver model started out with a sysfs file intended to provide
|
||||
a userspace hook for device power management. This feature has never
|
||||
worked very well, except for limited testing purposes, and so it will
|
||||
be removed. It's not clear that a generic mechanism could really
|
||||
handle the wide variability of device power states; any replacements
|
||||
are likely to be bus or driver specific.
|
||||
|
||||
config SOFTWARE_SUSPEND
|
||||
bool "Software Suspend"
|
||||
depends on PM && SWAP && ((X86 && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP))
|
||||
---help---
|
||||
Enable the suspend to disk (STD) functionality.
|
||||
|
||||
You can suspend your machine with 'echo disk > /sys/power/state'.
|
||||
Alternatively, you can use the additional userland tools available
|
||||
from <http://suspend.sf.net>.
|
||||
|
||||
In principle it does not require ACPI or APM, although for example
|
||||
ACPI will be used if available.
|
||||
|
||||
It creates an image which is saved in your active swap. Upon the next
|
||||
boot, pass the 'resume=/dev/swappartition' argument to the kernel to
|
||||
have it detect the saved image, restore memory state from it, and
|
||||
continue to run as before. If you do not want the previous state to
|
||||
be reloaded, then use the 'noresume' kernel command line argument.
|
||||
Note, however, that fsck will be run on your filesystems and you will
|
||||
need to run mkswap against the swap partition used for the suspend.
|
||||
|
||||
It also works with swap files to a limited extent (for details see
|
||||
<file:Documentation/power/swsusp-and-swap-files.txt>).
|
||||
|
||||
Right now you may boot without resuming and resume later but in the
|
||||
meantime you cannot use the swap partition(s)/file(s) involved in
|
||||
suspending. Also in this case you must not use the filesystems
|
||||
that were mounted before the suspend. In particular, you MUST NOT
|
||||
MOUNT any journaled filesystems mounted before the suspend or they
|
||||
will get corrupted in a nasty way.
|
||||
|
||||
For more information take a look at <file:Documentation/power/swsusp.txt>.
|
||||
|
||||
config PM_STD_PARTITION
|
||||
string "Default resume partition"
|
||||
depends on SOFTWARE_SUSPEND
|
||||
default ""
|
||||
---help---
|
||||
The default resume partition is the partition that the suspend-
|
||||
to-disk implementation will look for a suspended disk image.
|
||||
|
||||
The partition specified here will be different for almost every user.
|
||||
It should be a valid swap partition (at least for now) that is turned
|
||||
on before suspending.
|
||||
|
||||
The partition specified can be overridden by specifying:
|
||||
|
||||
resume=/dev/<other device>
|
||||
|
||||
which will set the resume partition to the device specified.
|
||||
|
||||
Note there is currently not a way to specify which device to save the
|
||||
suspended image to. It will simply pick the first available swap
|
||||
device.
|
||||
|
||||
config SUSPEND_SMP
|
||||
bool
|
||||
depends on HOTPLUG_CPU && X86 && PM
|
||||
default y
|
||||
|
||||
config APM_EMULATION
|
||||
tristate "Advanced Power Management Emulation"
|
||||
depends on PM && SYS_SUPPORTS_APM_EMULATION
|
||||
help
|
||||
APM is a BIOS specification for saving power using several different
|
||||
techniques. This is mostly useful for battery powered laptops with
|
||||
APM compliant BIOSes. If you say Y here, the system time will be
|
||||
reset after a RESUME operation, the /proc/apm device will provide
|
||||
battery status information, and user-space programs will receive
|
||||
notification of APM "events" (e.g. battery status change).
|
||||
|
||||
In order to use APM, you will need supporting software. For location
|
||||
and more information, read <file:Documentation/pm.txt> and the
|
||||
Battery Powered Linux mini-HOWTO, available from
|
||||
<http://www.tldp.org/docs.html#howto>.
|
||||
|
||||
This driver does not spin down disk drives (see the hdparm(8)
|
||||
manpage ("man 8 hdparm") for that), and it doesn't turn off
|
||||
VESA-compliant "green" monitors.
|
||||
|
||||
Generally, if you don't have a battery in your machine, there isn't
|
||||
much point in using this driver and you should say N. If you get
|
||||
random kernel OOPSes or reboots that don't seem to be related to
|
||||
anything, try disabling/enabling this option (or disabling/enabling
|
||||
APM in your BIOS).
|
||||
|
||||
source "drivers/char/s3c-dvfs/Kconfig"
|
||||
10
kernel/power/Makefile
Normal file
10
kernel/power/Makefile
Normal file
@@ -0,0 +1,10 @@
|
||||
|
||||
ifeq ($(CONFIG_PM_DEBUG),y)
|
||||
EXTRA_CFLAGS += -DDEBUG
|
||||
endif
|
||||
|
||||
obj-y := main.o process.o console.o
|
||||
obj-$(CONFIG_PM_LEGACY) += pm.o
|
||||
obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o disk.o snapshot.o swap.o user.o
|
||||
|
||||
obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o
|
||||
58
kernel/power/console.c
Normal file
58
kernel/power/console.c
Normal file
@@ -0,0 +1,58 @@
|
||||
/*
|
||||
* drivers/power/process.c - Functions for saving/restoring console.
|
||||
*
|
||||
* Originally from swsusp.
|
||||
*/
|
||||
|
||||
#include <linux/vt_kern.h>
|
||||
#include <linux/kbd_kern.h>
|
||||
#include <linux/console.h>
|
||||
#include "power.h"
|
||||
|
||||
#if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE)
|
||||
#define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1)
|
||||
|
||||
static int orig_fgconsole, orig_kmsg;
|
||||
|
||||
int pm_prepare_console(void)
|
||||
{
|
||||
acquire_console_sem();
|
||||
|
||||
orig_fgconsole = fg_console;
|
||||
|
||||
if (vc_allocate(SUSPEND_CONSOLE)) {
|
||||
/* we can't have a free VC for now. Too bad,
|
||||
* we don't want to mess the screen for now. */
|
||||
release_console_sem();
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (set_console(SUSPEND_CONSOLE)) {
|
||||
/*
|
||||
* We're unable to switch to the SUSPEND_CONSOLE.
|
||||
* Let the calling function know so it can decide
|
||||
* what to do.
|
||||
*/
|
||||
release_console_sem();
|
||||
return 1;
|
||||
}
|
||||
release_console_sem();
|
||||
|
||||
if (vt_waitactive(SUSPEND_CONSOLE)) {
|
||||
pr_debug("Suspend: Can't switch VCs.");
|
||||
return 1;
|
||||
}
|
||||
orig_kmsg = kmsg_redirect;
|
||||
kmsg_redirect = SUSPEND_CONSOLE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void pm_restore_console(void)
|
||||
{
|
||||
acquire_console_sem();
|
||||
set_console(orig_fgconsole);
|
||||
release_console_sem();
|
||||
kmsg_redirect = orig_kmsg;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
465
kernel/power/disk.c
Normal file
465
kernel/power/disk.c
Normal file
@@ -0,0 +1,465 @@
|
||||
/*
|
||||
* kernel/power/disk.c - Suspend-to-disk support.
|
||||
*
|
||||
* Copyright (c) 2003 Patrick Mochel
|
||||
* Copyright (c) 2003 Open Source Development Lab
|
||||
* Copyright (c) 2004 Pavel Machek <pavel@suse.cz>
|
||||
*
|
||||
* This file is released under the GPLv2.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/suspend.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/reboot.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/mount.h>
|
||||
#include <linux/pm.h>
|
||||
#include <linux/console.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/freezer.h>
|
||||
|
||||
#include "power.h"
|
||||
|
||||
|
||||
static int noresume = 0;
|
||||
char resume_file[256] = CONFIG_PM_STD_PARTITION;
|
||||
dev_t swsusp_resume_device;
|
||||
sector_t swsusp_resume_block;
|
||||
|
||||
/**
|
||||
* platform_prepare - prepare the machine for hibernation using the
|
||||
* platform driver if so configured and return an error code if it fails
|
||||
*/
|
||||
|
||||
static inline int platform_prepare(void)
|
||||
{
|
||||
int error = 0;
|
||||
|
||||
if (pm_disk_mode == PM_DISK_PLATFORM) {
|
||||
if (pm_ops && pm_ops->prepare)
|
||||
error = pm_ops->prepare(PM_SUSPEND_DISK);
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
/**
|
||||
* power_down - Shut machine down for hibernate.
|
||||
* @mode: Suspend-to-disk mode
|
||||
*
|
||||
* Use the platform driver, if configured so, and return gracefully if it
|
||||
* fails.
|
||||
* Otherwise, try to power off and reboot. If they fail, halt the machine,
|
||||
* there ain't no turning back.
|
||||
*/
|
||||
|
||||
static void power_down(suspend_disk_method_t mode)
|
||||
{
|
||||
switch(mode) {
|
||||
case PM_DISK_PLATFORM:
|
||||
if (pm_ops && pm_ops->enter) {
|
||||
kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK);
|
||||
pm_ops->enter(PM_SUSPEND_DISK);
|
||||
break;
|
||||
}
|
||||
case PM_DISK_SHUTDOWN:
|
||||
kernel_power_off();
|
||||
break;
|
||||
case PM_DISK_REBOOT:
|
||||
kernel_restart(NULL);
|
||||
break;
|
||||
}
|
||||
kernel_halt();
|
||||
/* Valid image is on the disk, if we continue we risk serious data corruption
|
||||
after resume. */
|
||||
printk(KERN_CRIT "Please power me down manually\n");
|
||||
while(1);
|
||||
}
|
||||
|
||||
static inline void platform_finish(void)
|
||||
{
|
||||
if (pm_disk_mode == PM_DISK_PLATFORM) {
|
||||
if (pm_ops && pm_ops->finish)
|
||||
pm_ops->finish(PM_SUSPEND_DISK);
|
||||
}
|
||||
}
|
||||
|
||||
static void unprepare_processes(void)
|
||||
{
|
||||
thaw_processes();
|
||||
pm_restore_console();
|
||||
}
|
||||
|
||||
static int prepare_processes(void)
|
||||
{
|
||||
int error = 0;
|
||||
|
||||
pm_prepare_console();
|
||||
if (freeze_processes()) {
|
||||
error = -EBUSY;
|
||||
unprepare_processes();
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
/**
|
||||
* pm_suspend_disk - The granpappy of hibernation power management.
|
||||
*
|
||||
* If we're going through the firmware, then get it over with quickly.
|
||||
*
|
||||
* If not, then call swsusp to do its thing, then figure out how
|
||||
* to power down the system.
|
||||
*/
|
||||
|
||||
int pm_suspend_disk(void)
|
||||
{
|
||||
int error;
|
||||
|
||||
error = prepare_processes();
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
if (pm_disk_mode == PM_DISK_TESTPROC) {
|
||||
printk("swsusp debug: Waiting for 5 seconds.\n");
|
||||
mdelay(5000);
|
||||
goto Thaw;
|
||||
}
|
||||
/* Free memory before shutting down devices. */
|
||||
error = swsusp_shrink_memory();
|
||||
if (error)
|
||||
goto Thaw;
|
||||
|
||||
error = platform_prepare();
|
||||
if (error)
|
||||
goto Thaw;
|
||||
|
||||
suspend_console();
|
||||
error = device_suspend(PMSG_FREEZE);
|
||||
if (error) {
|
||||
printk(KERN_ERR "PM: Some devices failed to suspend\n");
|
||||
goto Resume_devices;
|
||||
}
|
||||
error = disable_nonboot_cpus();
|
||||
if (error)
|
||||
goto Enable_cpus;
|
||||
|
||||
if (pm_disk_mode == PM_DISK_TEST) {
|
||||
printk("swsusp debug: Waiting for 5 seconds.\n");
|
||||
mdelay(5000);
|
||||
goto Enable_cpus;
|
||||
}
|
||||
|
||||
pr_debug("PM: snapshotting memory.\n");
|
||||
in_suspend = 1;
|
||||
error = swsusp_suspend();
|
||||
if (error)
|
||||
goto Enable_cpus;
|
||||
|
||||
if (in_suspend) {
|
||||
enable_nonboot_cpus();
|
||||
platform_finish();
|
||||
device_resume();
|
||||
resume_console();
|
||||
pr_debug("PM: writing image.\n");
|
||||
error = swsusp_write();
|
||||
if (!error)
|
||||
power_down(pm_disk_mode);
|
||||
else {
|
||||
swsusp_free();
|
||||
goto Thaw;
|
||||
}
|
||||
} else {
|
||||
pr_debug("PM: Image restored successfully.\n");
|
||||
}
|
||||
|
||||
swsusp_free();
|
||||
Enable_cpus:
|
||||
enable_nonboot_cpus();
|
||||
Resume_devices:
|
||||
platform_finish();
|
||||
device_resume();
|
||||
resume_console();
|
||||
Thaw:
|
||||
unprepare_processes();
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* software_resume - Resume from a saved image.
|
||||
*
|
||||
* Called as a late_initcall (so all devices are discovered and
|
||||
* initialized), we call swsusp to see if we have a saved image or not.
|
||||
* If so, we quiesce devices, the restore the saved image. We will
|
||||
* return above (in pm_suspend_disk() ) if everything goes well.
|
||||
* Otherwise, we fail gracefully and return to the normally
|
||||
* scheduled program.
|
||||
*
|
||||
*/
|
||||
|
||||
static int software_resume(void)
|
||||
{
|
||||
int error;
|
||||
|
||||
mutex_lock(&pm_mutex);
|
||||
if (!swsusp_resume_device) {
|
||||
if (!strlen(resume_file)) {
|
||||
mutex_unlock(&pm_mutex);
|
||||
return -ENOENT;
|
||||
}
|
||||
swsusp_resume_device = name_to_dev_t(resume_file);
|
||||
pr_debug("swsusp: Resume From Partition %s\n", resume_file);
|
||||
} else {
|
||||
pr_debug("swsusp: Resume From Partition %d:%d\n",
|
||||
MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device));
|
||||
}
|
||||
|
||||
if (noresume) {
|
||||
/**
|
||||
* FIXME: If noresume is specified, we need to find the partition
|
||||
* and reset it back to normal swap space.
|
||||
*/
|
||||
mutex_unlock(&pm_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
pr_debug("PM: Checking swsusp image.\n");
|
||||
|
||||
error = swsusp_check();
|
||||
if (error)
|
||||
goto Done;
|
||||
|
||||
pr_debug("PM: Preparing processes for restore.\n");
|
||||
|
||||
error = prepare_processes();
|
||||
if (error) {
|
||||
swsusp_close();
|
||||
goto Done;
|
||||
}
|
||||
|
||||
pr_debug("PM: Reading swsusp image.\n");
|
||||
|
||||
error = swsusp_read();
|
||||
if (error) {
|
||||
swsusp_free();
|
||||
goto Thaw;
|
||||
}
|
||||
|
||||
pr_debug("PM: Preparing devices for restore.\n");
|
||||
|
||||
suspend_console();
|
||||
error = device_suspend(PMSG_PRETHAW);
|
||||
if (error)
|
||||
goto Free;
|
||||
|
||||
error = disable_nonboot_cpus();
|
||||
if (!error)
|
||||
swsusp_resume();
|
||||
|
||||
enable_nonboot_cpus();
|
||||
Free:
|
||||
swsusp_free();
|
||||
device_resume();
|
||||
resume_console();
|
||||
Thaw:
|
||||
printk(KERN_ERR "PM: Restore failed, recovering.\n");
|
||||
unprepare_processes();
|
||||
Done:
|
||||
/* For success case, the suspend path will release the lock */
|
||||
mutex_unlock(&pm_mutex);
|
||||
pr_debug("PM: Resume from disk failed.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
late_initcall(software_resume);
|
||||
|
||||
|
||||
static const char * const pm_disk_modes[] = {
|
||||
[PM_DISK_FIRMWARE] = "firmware",
|
||||
[PM_DISK_PLATFORM] = "platform",
|
||||
[PM_DISK_SHUTDOWN] = "shutdown",
|
||||
[PM_DISK_REBOOT] = "reboot",
|
||||
[PM_DISK_TEST] = "test",
|
||||
[PM_DISK_TESTPROC] = "testproc",
|
||||
};
|
||||
|
||||
/**
|
||||
* disk - Control suspend-to-disk mode
|
||||
*
|
||||
* Suspend-to-disk can be handled in several ways. The greatest
|
||||
* distinction is who writes memory to disk - the firmware or the OS.
|
||||
* If the firmware does it, we assume that it also handles suspending
|
||||
* the system.
|
||||
* If the OS does it, then we have three options for putting the system
|
||||
* to sleep - using the platform driver (e.g. ACPI or other PM registers),
|
||||
* powering off the system or rebooting the system (for testing).
|
||||
*
|
||||
* The system will support either 'firmware' or 'platform', and that is
|
||||
* known a priori (and encoded in pm_ops). But, the user may choose
|
||||
* 'shutdown' or 'reboot' as alternatives.
|
||||
*
|
||||
* show() will display what the mode is currently set to.
|
||||
* store() will accept one of
|
||||
*
|
||||
* 'firmware'
|
||||
* 'platform'
|
||||
* 'shutdown'
|
||||
* 'reboot'
|
||||
*
|
||||
* It will only change to 'firmware' or 'platform' if the system
|
||||
* supports it (as determined from pm_ops->pm_disk_mode).
|
||||
*/
|
||||
|
||||
static ssize_t disk_show(struct subsystem * subsys, char * buf)
|
||||
{
|
||||
return sprintf(buf, "%s\n", pm_disk_modes[pm_disk_mode]);
|
||||
}
|
||||
|
||||
|
||||
static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n)
|
||||
{
|
||||
int error = 0;
|
||||
int i;
|
||||
int len;
|
||||
char *p;
|
||||
suspend_disk_method_t mode = 0;
|
||||
|
||||
p = memchr(buf, '\n', n);
|
||||
len = p ? p - buf : n;
|
||||
|
||||
mutex_lock(&pm_mutex);
|
||||
for (i = PM_DISK_FIRMWARE; i < PM_DISK_MAX; i++) {
|
||||
if (!strncmp(buf, pm_disk_modes[i], len)) {
|
||||
mode = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (mode) {
|
||||
if (mode == PM_DISK_SHUTDOWN || mode == PM_DISK_REBOOT ||
|
||||
mode == PM_DISK_TEST || mode == PM_DISK_TESTPROC) {
|
||||
pm_disk_mode = mode;
|
||||
} else {
|
||||
if (pm_ops && pm_ops->enter &&
|
||||
(mode == pm_ops->pm_disk_mode))
|
||||
pm_disk_mode = mode;
|
||||
else
|
||||
error = -EINVAL;
|
||||
}
|
||||
} else {
|
||||
error = -EINVAL;
|
||||
}
|
||||
|
||||
pr_debug("PM: suspend-to-disk mode set to '%s'\n",
|
||||
pm_disk_modes[mode]);
|
||||
mutex_unlock(&pm_mutex);
|
||||
return error ? error : n;
|
||||
}
|
||||
|
||||
power_attr(disk);
|
||||
|
||||
static ssize_t resume_show(struct subsystem * subsys, char *buf)
|
||||
{
|
||||
return sprintf(buf,"%d:%d\n", MAJOR(swsusp_resume_device),
|
||||
MINOR(swsusp_resume_device));
|
||||
}
|
||||
|
||||
static ssize_t resume_store(struct subsystem *subsys, const char *buf, size_t n)
|
||||
{
|
||||
unsigned int maj, min;
|
||||
dev_t res;
|
||||
int ret = -EINVAL;
|
||||
|
||||
if (sscanf(buf, "%u:%u", &maj, &min) != 2)
|
||||
goto out;
|
||||
|
||||
res = MKDEV(maj,min);
|
||||
if (maj != MAJOR(res) || min != MINOR(res))
|
||||
goto out;
|
||||
|
||||
mutex_lock(&pm_mutex);
|
||||
swsusp_resume_device = res;
|
||||
mutex_unlock(&pm_mutex);
|
||||
printk("Attempting manual resume\n");
|
||||
noresume = 0;
|
||||
software_resume();
|
||||
ret = n;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
power_attr(resume);
|
||||
|
||||
static ssize_t image_size_show(struct subsystem * subsys, char *buf)
|
||||
{
|
||||
return sprintf(buf, "%lu\n", image_size);
|
||||
}
|
||||
|
||||
static ssize_t image_size_store(struct subsystem * subsys, const char * buf, size_t n)
|
||||
{
|
||||
unsigned long size;
|
||||
|
||||
if (sscanf(buf, "%lu", &size) == 1) {
|
||||
image_size = size;
|
||||
return n;
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
power_attr(image_size);
|
||||
|
||||
static struct attribute * g[] = {
|
||||
&disk_attr.attr,
|
||||
&resume_attr.attr,
|
||||
&image_size_attr.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
|
||||
static struct attribute_group attr_group = {
|
||||
.attrs = g,
|
||||
};
|
||||
|
||||
|
||||
static int __init pm_disk_init(void)
|
||||
{
|
||||
return sysfs_create_group(&power_subsys.kset.kobj,&attr_group);
|
||||
}
|
||||
|
||||
core_initcall(pm_disk_init);
|
||||
|
||||
|
||||
static int __init resume_setup(char *str)
|
||||
{
|
||||
if (noresume)
|
||||
return 1;
|
||||
|
||||
strncpy( resume_file, str, 255 );
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int __init resume_offset_setup(char *str)
|
||||
{
|
||||
unsigned long long offset;
|
||||
|
||||
if (noresume)
|
||||
return 1;
|
||||
|
||||
if (sscanf(str, "%llu", &offset) == 1)
|
||||
swsusp_resume_block = offset;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int __init noresume_setup(char *str)
|
||||
{
|
||||
noresume = 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("noresume", noresume_setup);
|
||||
__setup("resume_offset=", resume_offset_setup);
|
||||
__setup("resume=", resume_setup);
|
||||
451
kernel/power/main.c
Normal file
451
kernel/power/main.c
Normal file
@@ -0,0 +1,451 @@
|
||||
/*
|
||||
* kernel/power/main.c - PM subsystem core functionality.
|
||||
*
|
||||
* Copyright (c) 2003 Patrick Mochel
|
||||
* Copyright (c) 2003 Open Source Development Lab
|
||||
*
|
||||
* This file is released under the GPLv2
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/suspend.h>
|
||||
#include <linux/kobject.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/pm.h>
|
||||
#include <linux/console.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/resume-trace.h>
|
||||
#include <linux/freezer.h>
|
||||
#include <linux/vmstat.h>
|
||||
#include "power.h"
|
||||
#if 0
|
||||
/* Qisda, ShiYong Lin, 2009/07/18, Send message when sleep{*/
|
||||
#include <linux/input.h>
|
||||
/* Qisda, ShiYong Lin, 2009/07/18, Send message when sleep}*/
|
||||
#endif
|
||||
#ifdef CONFIG_PM_CPU_MODE
|
||||
extern unsigned char pm_cpu_mode;
|
||||
#endif
|
||||
|
||||
/*This is just an arbitrary number */
|
||||
#define FREE_PAGE_NUMBER (100)
|
||||
|
||||
DEFINE_MUTEX(pm_mutex);
|
||||
|
||||
struct pm_ops *pm_ops;
|
||||
suspend_disk_method_t pm_disk_mode = PM_DISK_PLATFORM;
|
||||
/* Qisda, ShiYong Lin, 2009/07/18, Send message when sleep{*/
|
||||
extern void pm_keypad_message_to_ap (void);
|
||||
/* Qisda, ShiYong Lin, 2009/07/18, Send message when sleep}*/
|
||||
|
||||
/**
|
||||
* pm_set_ops - Set the global power method table.
|
||||
* @ops: Pointer to ops structure.
|
||||
*/
|
||||
|
||||
void pm_set_ops(struct pm_ops * ops)
|
||||
{
|
||||
mutex_lock(&pm_mutex);
|
||||
pm_ops = ops;
|
||||
mutex_unlock(&pm_mutex);
|
||||
}
|
||||
|
||||
static inline void pm_finish(suspend_state_t state)
|
||||
{
|
||||
if (pm_ops->finish)
|
||||
pm_ops->finish(state);
|
||||
}
|
||||
|
||||
/**
|
||||
* suspend_prepare - Do prep work before entering low-power state.
|
||||
* @state: State we're entering.
|
||||
*
|
||||
* This is common code that is called for each state that we're
|
||||
* entering. Allocate a console, stop all processes, then make sure
|
||||
* the platform can enter the requested state.
|
||||
*/
|
||||
|
||||
static int suspend_prepare(suspend_state_t state)
|
||||
{
|
||||
int error;
|
||||
unsigned int free_pages;
|
||||
|
||||
if (!pm_ops || !pm_ops->enter)
|
||||
return -EPERM;
|
||||
|
||||
pm_prepare_console();
|
||||
|
||||
if (freeze_processes()) {
|
||||
error = -EAGAIN;
|
||||
goto Thaw;
|
||||
}
|
||||
|
||||
if ((free_pages = global_page_state(NR_FREE_PAGES))
|
||||
< FREE_PAGE_NUMBER) {
|
||||
pr_debug("PM: free some memory\n");
|
||||
shrink_all_memory(FREE_PAGE_NUMBER - free_pages);
|
||||
if (nr_free_pages() < FREE_PAGE_NUMBER) {
|
||||
error = -ENOMEM;
|
||||
printk(KERN_ERR "PM: No enough memory\n");
|
||||
goto Thaw;
|
||||
}
|
||||
}
|
||||
|
||||
if (pm_ops->prepare) {
|
||||
if ((error = pm_ops->prepare(state)))
|
||||
goto Thaw;
|
||||
}
|
||||
|
||||
suspend_console();
|
||||
error = device_suspend(PMSG_SUSPEND);
|
||||
if (error) {
|
||||
printk(KERN_ERR "Some devices failed to suspend\n");
|
||||
goto Resume_devices;
|
||||
}
|
||||
error = disable_nonboot_cpus();
|
||||
if (!error)
|
||||
return 0;
|
||||
|
||||
enable_nonboot_cpus();
|
||||
Resume_devices:
|
||||
pm_finish(state);
|
||||
device_resume();
|
||||
resume_console();
|
||||
Thaw:
|
||||
thaw_processes();
|
||||
pm_restore_console();
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
int suspend_enter(suspend_state_t state)
|
||||
{
|
||||
int error = 0;
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
if ((error = device_power_down(PMSG_SUSPEND))) {
|
||||
printk(KERN_ERR "Some devices failed to power down\n");
|
||||
goto Done;
|
||||
}
|
||||
error = pm_ops->enter(state);
|
||||
device_power_up();
|
||||
Done:
|
||||
local_irq_restore(flags);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* suspend_finish - Do final work before exiting suspend sequence.
|
||||
* @state: State we're coming out of.
|
||||
*
|
||||
* Call platform code to clean up, restart processes, and free the
|
||||
* console that we've allocated. This is not called for suspend-to-disk.
|
||||
*/
|
||||
|
||||
static void suspend_finish(suspend_state_t state)
|
||||
{
|
||||
enable_nonboot_cpus();
|
||||
pm_finish(state);
|
||||
device_resume();
|
||||
resume_console();
|
||||
thaw_processes();
|
||||
pm_restore_console();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
static const char * const pm_states[PM_SUSPEND_MAX] = {
|
||||
[PM_SUSPEND_STANDBY] = "standby",
|
||||
[PM_SUSPEND_MEM] = "mem",
|
||||
#ifdef CONFIG_PM_CPU_MODE
|
||||
[PM_SUSPEND_CPU_MODE] = "cpu",
|
||||
#endif
|
||||
#ifdef CONFIG_SOFTWARE_SUSPEND
|
||||
[PM_SUSPEND_DISK] = "disk",
|
||||
#endif
|
||||
};
|
||||
|
||||
static inline int valid_state(suspend_state_t state)
|
||||
{
|
||||
/* Suspend-to-disk does not really need low-level support.
|
||||
* It can work with reboot if needed. */
|
||||
if (state == PM_SUSPEND_DISK)
|
||||
return 1;
|
||||
|
||||
/* all other states need lowlevel support and need to be
|
||||
* valid to the lowlevel implementation, no valid callback
|
||||
* implies that all are valid. */
|
||||
if (!pm_ops || (pm_ops->valid && !pm_ops->valid(state)))
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PM_CPU_MODE
|
||||
static int suspend_pm_cpu_mode_prepare(suspend_state_t state)
|
||||
{
|
||||
return suspend_prepare(state);
|
||||
}
|
||||
|
||||
|
||||
int suspend_pm_cpu_mode_enter(suspend_state_t state)
|
||||
{
|
||||
int error = 0;
|
||||
unsigned long flags;
|
||||
printk("suspend_cpu_mode_enter\n");
|
||||
local_irq_save(flags);
|
||||
|
||||
/*
|
||||
if ((error = device_power_down(PMSG_SUSPEND))) {
|
||||
printk(KERN_ERR "Some devices failed to power down\n");
|
||||
goto Done;
|
||||
}
|
||||
*/
|
||||
error = pm_ops->enter(state);
|
||||
|
||||
device_power_up();
|
||||
Done:
|
||||
local_irq_restore(flags);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
static void suspend_pm_cpu_mode_finish(suspend_state_t state)
|
||||
{
|
||||
enable_nonboot_cpus();
|
||||
pm_finish(state);
|
||||
device_resume();
|
||||
// s3c24xx_cpu_mode_serial_resume();
|
||||
resume_console();
|
||||
thaw_processes();
|
||||
pm_restore_console();
|
||||
}
|
||||
|
||||
static int enter_pm_cpu_mode(suspend_state_t state)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (!valid_state(state))
|
||||
return -ENODEV;
|
||||
|
||||
if (!mutex_trylock(&pm_mutex))
|
||||
return -EBUSY;
|
||||
|
||||
if (state == PM_SUSPEND_DISK) {
|
||||
error = pm_suspend_disk();
|
||||
goto Unlock;
|
||||
}
|
||||
|
||||
pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]);
|
||||
if ((error = suspend_pm_cpu_mode_prepare(state)))
|
||||
goto Unlock;
|
||||
|
||||
// pr_debug("PM: Entering %s sleep\n", pm_states[state]);
|
||||
error = suspend_pm_cpu_mode_enter(state);
|
||||
pr_debug("PM: Finishing wakeup.\n");
|
||||
suspend_pm_cpu_mode_finish(state);
|
||||
Unlock:
|
||||
mutex_unlock(&pm_mutex);
|
||||
/* Qisda, ShiYong Lin, 2009/09/28, Add the sleep event message when sleep {*/
|
||||
s3c_keypad_pm_sleep_message_to_ap(0);
|
||||
// printk(KERN_ERR "Sleep end enter_pm_cpu_mode, %d\n", state);
|
||||
/* } Qisda, ShiYong Lin, 2009/09/28, Add the sleep event message when sleep */
|
||||
return error;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* enter_state - Do common work of entering low-power state.
|
||||
* @state: pm_state structure for state we're entering.
|
||||
*
|
||||
* Make sure we're the only ones trying to enter a sleep state. Fail
|
||||
* if someone has beat us to it, since we don't want anything weird to
|
||||
* happen when we wake up.
|
||||
* Then, do the setup for suspend, enter the state, and cleaup (after
|
||||
* we've woken up).
|
||||
*/
|
||||
|
||||
static int enter_state(suspend_state_t state)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (!valid_state(state))
|
||||
return -ENODEV;
|
||||
if (!mutex_trylock(&pm_mutex))
|
||||
return -EBUSY;
|
||||
|
||||
if (state == PM_SUSPEND_DISK) {
|
||||
error = pm_suspend_disk();
|
||||
goto Unlock;
|
||||
}
|
||||
|
||||
pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]);
|
||||
if ((error = suspend_prepare(state)))
|
||||
goto Unlock;
|
||||
|
||||
pr_debug("PM: Entering %s sleep\n", pm_states[state]);
|
||||
error = suspend_enter(state);
|
||||
|
||||
pr_debug("PM: Finishing wakeup.\n");
|
||||
suspend_finish(state);
|
||||
Unlock:
|
||||
mutex_unlock(&pm_mutex);
|
||||
/* Qisda, ShiYong Lin, 2009/09/28, Add the sleep event message when sleep {*/
|
||||
s3c_keypad_pm_sleep_message_to_ap(0);
|
||||
/* } Qisda, ShiYong Lin, 2009/09/28, Add the sleep event message when sleep */
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is main interface to the outside world. It needs to be
|
||||
* called from process context.
|
||||
*/
|
||||
int software_suspend(void)
|
||||
{
|
||||
return enter_state(PM_SUSPEND_DISK);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* pm_suspend - Externally visible function for suspending system.
|
||||
* @state: Enumarted value of state to enter.
|
||||
*
|
||||
* Determine whether or not value is within range, get state
|
||||
* structure, and enter (above).
|
||||
*/
|
||||
|
||||
int pm_suspend(suspend_state_t state)
|
||||
{
|
||||
if (state > PM_SUSPEND_ON && state <= PM_SUSPEND_MAX)
|
||||
return enter_state(state);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(pm_suspend);
|
||||
|
||||
decl_subsys(power,NULL,NULL);
|
||||
|
||||
|
||||
/**
|
||||
* state - control system power state.
|
||||
*
|
||||
* show() returns what states are supported, which is hard-coded to
|
||||
* 'standby' (Power-On Suspend), 'mem' (Suspend-to-RAM), and
|
||||
* 'disk' (Suspend-to-Disk).
|
||||
*
|
||||
* store() accepts one of those strings, translates it into the
|
||||
* proper enumerated value, and initiates a suspend transition.
|
||||
*/
|
||||
|
||||
static ssize_t state_show(struct subsystem * subsys, char * buf)
|
||||
{
|
||||
int i;
|
||||
char * s = buf;
|
||||
|
||||
for (i = 0; i < PM_SUSPEND_MAX; i++) {
|
||||
if (pm_states[i] && valid_state(i))
|
||||
s += sprintf(s,"%s ", pm_states[i]);
|
||||
}
|
||||
s += sprintf(s,"\n");
|
||||
return (s - buf);
|
||||
}
|
||||
|
||||
static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n)
|
||||
{
|
||||
suspend_state_t state = PM_SUSPEND_STANDBY;
|
||||
const char * const *s;
|
||||
char *p;
|
||||
int error;
|
||||
int len;
|
||||
|
||||
p = memchr(buf, '\n', n);
|
||||
len = p ? p - buf : n;
|
||||
|
||||
for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) {
|
||||
if (*s && !strncmp(buf, *s, len))
|
||||
break;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PM_CPU_MODE
|
||||
printk(KERN_ERR "state_store, %d\n", state);
|
||||
if (state < PM_SUSPEND_MAX && (state == PM_SUSPEND_CPU_MODE ||
|
||||
state == PM_SUSPEND_MEM) ){
|
||||
if(pm_cpu_mode){
|
||||
// state = 0;
|
||||
error = enter_pm_cpu_mode(state);
|
||||
}
|
||||
else{
|
||||
error = enter_state(state);
|
||||
}
|
||||
}
|
||||
else{
|
||||
error = -EINVAL;
|
||||
}
|
||||
printk(KERN_ERR "end, leave state_store, %d\n", state);
|
||||
#else
|
||||
if (state < PM_SUSPEND_MAX && *s)
|
||||
error = enter_state(state);
|
||||
else
|
||||
error = -EINVAL;
|
||||
#endif
|
||||
return error ? error : n;
|
||||
}
|
||||
|
||||
power_attr(state);
|
||||
|
||||
#ifdef CONFIG_PM_TRACE
|
||||
int pm_trace_enabled;
|
||||
|
||||
static ssize_t pm_trace_show(struct subsystem * subsys, char * buf)
|
||||
{
|
||||
return sprintf(buf, "%d\n", pm_trace_enabled);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
pm_trace_store(struct subsystem * subsys, const char * buf, size_t n)
|
||||
{
|
||||
int val;
|
||||
|
||||
if (sscanf(buf, "%d", &val) == 1) {
|
||||
pm_trace_enabled = !!val;
|
||||
return n;
|
||||
}
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
power_attr(pm_trace);
|
||||
|
||||
static struct attribute * g[] = {
|
||||
&state_attr.attr,
|
||||
&pm_trace_attr.attr,
|
||||
NULL,
|
||||
};
|
||||
#else
|
||||
static struct attribute * g[] = {
|
||||
&state_attr.attr,
|
||||
NULL,
|
||||
};
|
||||
#endif /* CONFIG_PM_TRACE */
|
||||
|
||||
static struct attribute_group attr_group = {
|
||||
.attrs = g,
|
||||
};
|
||||
|
||||
|
||||
static int __init pm_init(void)
|
||||
{
|
||||
int error = subsystem_register(&power_subsys);
|
||||
if (!error)
|
||||
error = sysfs_create_group(&power_subsys.kset.kobj,&attr_group);
|
||||
return error;
|
||||
}
|
||||
|
||||
core_initcall(pm_init);
|
||||
209
kernel/power/pm.c
Normal file
209
kernel/power/pm.c
Normal file
@@ -0,0 +1,209 @@
|
||||
/*
|
||||
* pm.c - Power management interface
|
||||
*
|
||||
* Copyright (C) 2000 Andrew Henroid
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/pm.h>
|
||||
#include <linux/pm_legacy.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/mutex.h>
|
||||
|
||||
int pm_active;
|
||||
|
||||
/*
|
||||
* Locking notes:
|
||||
* pm_devs_lock can be a semaphore providing pm ops are not called
|
||||
* from an interrupt handler (already a bad idea so no change here). Each
|
||||
* change must be protected so that an unlink of an entry doesn't clash
|
||||
* with a pm send - which is permitted to sleep in the current architecture
|
||||
*
|
||||
* Module unloads clashing with pm events now work out safely, the module
|
||||
* unload path will block until the event has been sent. It may well block
|
||||
* until a resume but that will be fine.
|
||||
*/
|
||||
|
||||
static DEFINE_MUTEX(pm_devs_lock);
|
||||
static LIST_HEAD(pm_devs);
|
||||
|
||||
/**
|
||||
* pm_register - register a device with power management
|
||||
* @type: device type
|
||||
* @id: device ID
|
||||
* @callback: callback function
|
||||
*
|
||||
* Add a device to the list of devices that wish to be notified about
|
||||
* power management events. A &pm_dev structure is returned on success,
|
||||
* on failure the return is %NULL.
|
||||
*
|
||||
* The callback function will be called in process context and
|
||||
* it may sleep.
|
||||
*/
|
||||
|
||||
struct pm_dev *pm_register(pm_dev_t type,
|
||||
unsigned long id,
|
||||
pm_callback callback)
|
||||
{
|
||||
struct pm_dev *dev = kzalloc(sizeof(struct pm_dev), GFP_KERNEL);
|
||||
if (dev) {
|
||||
dev->type = type;
|
||||
dev->id = id;
|
||||
dev->callback = callback;
|
||||
|
||||
mutex_lock(&pm_devs_lock);
|
||||
list_add(&dev->entry, &pm_devs);
|
||||
mutex_unlock(&pm_devs_lock);
|
||||
}
|
||||
return dev;
|
||||
}
|
||||
|
||||
/**
|
||||
* pm_send - send request to a single device
|
||||
* @dev: device to send to
|
||||
* @rqst: power management request
|
||||
* @data: data for the callback
|
||||
*
|
||||
* Issue a power management request to a given device. The
|
||||
* %PM_SUSPEND and %PM_RESUME events are handled specially. The
|
||||
* data field must hold the intended next state. No call is made
|
||||
* if the state matches.
|
||||
*
|
||||
* BUGS: what stops two power management requests occurring in parallel
|
||||
* and conflicting.
|
||||
*
|
||||
* WARNING: Calling pm_send directly is not generally recommended, in
|
||||
* particular there is no locking against the pm_dev going away. The
|
||||
* caller must maintain all needed locking or have 'inside knowledge'
|
||||
* on the safety. Also remember that this function is not locked against
|
||||
* pm_unregister. This means that you must handle SMP races on callback
|
||||
* execution and unload yourself.
|
||||
*/
|
||||
|
||||
static int pm_send(struct pm_dev *dev, pm_request_t rqst, void *data)
|
||||
{
|
||||
int status = 0;
|
||||
unsigned long prev_state, next_state;
|
||||
|
||||
if (in_interrupt())
|
||||
BUG();
|
||||
|
||||
switch (rqst) {
|
||||
case PM_SUSPEND:
|
||||
case PM_RESUME:
|
||||
prev_state = dev->state;
|
||||
next_state = (unsigned long) data;
|
||||
if (prev_state != next_state) {
|
||||
if (dev->callback)
|
||||
status = (*dev->callback)(dev, rqst, data);
|
||||
if (!status) {
|
||||
dev->state = next_state;
|
||||
dev->prev_state = prev_state;
|
||||
}
|
||||
}
|
||||
else {
|
||||
dev->prev_state = prev_state;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
if (dev->callback)
|
||||
status = (*dev->callback)(dev, rqst, data);
|
||||
break;
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
/*
|
||||
* Undo incomplete request
|
||||
*/
|
||||
static void pm_undo_all(struct pm_dev *last)
|
||||
{
|
||||
struct list_head *entry = last->entry.prev;
|
||||
while (entry != &pm_devs) {
|
||||
struct pm_dev *dev = list_entry(entry, struct pm_dev, entry);
|
||||
if (dev->state != dev->prev_state) {
|
||||
/* previous state was zero (running) resume or
|
||||
* previous state was non-zero (suspended) suspend
|
||||
*/
|
||||
pm_request_t undo = (dev->prev_state
|
||||
? PM_SUSPEND:PM_RESUME);
|
||||
pm_send(dev, undo, (void*) dev->prev_state);
|
||||
}
|
||||
entry = entry->prev;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* pm_send_all - send request to all managed devices
|
||||
* @rqst: power management request
|
||||
* @data: data for the callback
|
||||
*
|
||||
* Issue a power management request to a all devices. The
|
||||
* %PM_SUSPEND events are handled specially. Any device is
|
||||
* permitted to fail a suspend by returning a non zero (error)
|
||||
* value from its callback function. If any device vetoes a
|
||||
* suspend request then all other devices that have suspended
|
||||
* during the processing of this request are restored to their
|
||||
* previous state.
|
||||
*
|
||||
* WARNING: This function takes the pm_devs_lock. The lock is not dropped until
|
||||
* the callbacks have completed. This prevents races against pm locking
|
||||
* functions, races against module unload pm_unregister code. It does
|
||||
* mean however that you must not issue pm_ functions within the callback
|
||||
* or you will deadlock and users will hate you.
|
||||
*
|
||||
* Zero is returned on success. If a suspend fails then the status
|
||||
* from the device that vetoes the suspend is returned.
|
||||
*
|
||||
* BUGS: what stops two power management requests occurring in parallel
|
||||
* and conflicting.
|
||||
*/
|
||||
|
||||
int pm_send_all(pm_request_t rqst, void *data)
|
||||
{
|
||||
struct list_head *entry;
|
||||
|
||||
mutex_lock(&pm_devs_lock);
|
||||
entry = pm_devs.next;
|
||||
while (entry != &pm_devs) {
|
||||
struct pm_dev *dev = list_entry(entry, struct pm_dev, entry);
|
||||
if (dev->callback) {
|
||||
int status = pm_send(dev, rqst, data);
|
||||
if (status) {
|
||||
/* return devices to previous state on
|
||||
* failed suspend request
|
||||
*/
|
||||
if (rqst == PM_SUSPEND)
|
||||
pm_undo_all(dev);
|
||||
mutex_unlock(&pm_devs_lock);
|
||||
return status;
|
||||
}
|
||||
}
|
||||
entry = entry->next;
|
||||
}
|
||||
mutex_unlock(&pm_devs_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(pm_register);
|
||||
EXPORT_SYMBOL(pm_send_all);
|
||||
EXPORT_SYMBOL(pm_active);
|
||||
|
||||
|
||||
179
kernel/power/power.h
Normal file
179
kernel/power/power.h
Normal file
@@ -0,0 +1,179 @@
|
||||
#include <linux/suspend.h>
|
||||
#include <linux/utsname.h>
|
||||
|
||||
struct swsusp_info {
|
||||
struct new_utsname uts;
|
||||
u32 version_code;
|
||||
unsigned long num_physpages;
|
||||
int cpus;
|
||||
unsigned long image_pages;
|
||||
unsigned long pages;
|
||||
unsigned long size;
|
||||
} __attribute__((aligned(PAGE_SIZE)));
|
||||
|
||||
|
||||
|
||||
#ifdef CONFIG_SOFTWARE_SUSPEND
|
||||
extern int pm_suspend_disk(void);
|
||||
|
||||
#else
|
||||
static inline int pm_suspend_disk(void)
|
||||
{
|
||||
return -EPERM;
|
||||
}
|
||||
#endif
|
||||
|
||||
extern struct mutex pm_mutex;
|
||||
|
||||
#define power_attr(_name) \
|
||||
static struct subsys_attribute _name##_attr = { \
|
||||
.attr = { \
|
||||
.name = __stringify(_name), \
|
||||
.mode = 0644, \
|
||||
}, \
|
||||
.show = _name##_show, \
|
||||
.store = _name##_store, \
|
||||
}
|
||||
|
||||
extern struct subsystem power_subsys;
|
||||
|
||||
/* References to section boundaries */
|
||||
extern const void __nosave_begin, __nosave_end;
|
||||
|
||||
/* Preferred image size in bytes (default 500 MB) */
|
||||
extern unsigned long image_size;
|
||||
extern int in_suspend;
|
||||
extern dev_t swsusp_resume_device;
|
||||
extern sector_t swsusp_resume_block;
|
||||
|
||||
extern asmlinkage int swsusp_arch_suspend(void);
|
||||
extern asmlinkage int swsusp_arch_resume(void);
|
||||
|
||||
extern unsigned int count_data_pages(void);
|
||||
|
||||
/**
|
||||
* Auxiliary structure used for reading the snapshot image data and
|
||||
* metadata from and writing them to the list of page backup entries
|
||||
* (PBEs) which is the main data structure of swsusp.
|
||||
*
|
||||
* Using struct snapshot_handle we can transfer the image, including its
|
||||
* metadata, as a continuous sequence of bytes with the help of
|
||||
* snapshot_read_next() and snapshot_write_next().
|
||||
*
|
||||
* The code that writes the image to a storage or transfers it to
|
||||
* the user land is required to use snapshot_read_next() for this
|
||||
* purpose and it should not make any assumptions regarding the internal
|
||||
* structure of the image. Similarly, the code that reads the image from
|
||||
* a storage or transfers it from the user land is required to use
|
||||
* snapshot_write_next().
|
||||
*
|
||||
* This may allow us to change the internal structure of the image
|
||||
* in the future with considerably less effort.
|
||||
*/
|
||||
|
||||
struct snapshot_handle {
|
||||
loff_t offset; /* number of the last byte ready for reading
|
||||
* or writing in the sequence
|
||||
*/
|
||||
unsigned int cur; /* number of the block of PAGE_SIZE bytes the
|
||||
* next operation will refer to (ie. current)
|
||||
*/
|
||||
unsigned int cur_offset; /* offset with respect to the current
|
||||
* block (for the next operation)
|
||||
*/
|
||||
unsigned int prev; /* number of the block of PAGE_SIZE bytes that
|
||||
* was the current one previously
|
||||
*/
|
||||
void *buffer; /* address of the block to read from
|
||||
* or write to
|
||||
*/
|
||||
unsigned int buf_offset; /* location to read from or write to,
|
||||
* given as a displacement from 'buffer'
|
||||
*/
|
||||
int sync_read; /* Set to one to notify the caller of
|
||||
* snapshot_write_next() that it may
|
||||
* need to call wait_on_bio_chain()
|
||||
*/
|
||||
};
|
||||
|
||||
/* This macro returns the address from/to which the caller of
|
||||
* snapshot_read_next()/snapshot_write_next() is allowed to
|
||||
* read/write data after the function returns
|
||||
*/
|
||||
#define data_of(handle) ((handle).buffer + (handle).buf_offset)
|
||||
|
||||
extern unsigned int snapshot_additional_pages(struct zone *zone);
|
||||
extern int snapshot_read_next(struct snapshot_handle *handle, size_t count);
|
||||
extern int snapshot_write_next(struct snapshot_handle *handle, size_t count);
|
||||
extern void snapshot_write_finalize(struct snapshot_handle *handle);
|
||||
extern int snapshot_image_loaded(struct snapshot_handle *handle);
|
||||
|
||||
/*
|
||||
* This structure is used to pass the values needed for the identification
|
||||
* of the resume swap area from a user space to the kernel via the
|
||||
* SNAPSHOT_SET_SWAP_AREA ioctl
|
||||
*/
|
||||
struct resume_swap_area {
|
||||
loff_t offset;
|
||||
u_int32_t dev;
|
||||
} __attribute__((packed));
|
||||
|
||||
#define SNAPSHOT_IOC_MAGIC '3'
|
||||
#define SNAPSHOT_FREEZE _IO(SNAPSHOT_IOC_MAGIC, 1)
|
||||
#define SNAPSHOT_UNFREEZE _IO(SNAPSHOT_IOC_MAGIC, 2)
|
||||
#define SNAPSHOT_ATOMIC_SNAPSHOT _IOW(SNAPSHOT_IOC_MAGIC, 3, void *)
|
||||
#define SNAPSHOT_ATOMIC_RESTORE _IO(SNAPSHOT_IOC_MAGIC, 4)
|
||||
#define SNAPSHOT_FREE _IO(SNAPSHOT_IOC_MAGIC, 5)
|
||||
#define SNAPSHOT_SET_IMAGE_SIZE _IOW(SNAPSHOT_IOC_MAGIC, 6, unsigned long)
|
||||
#define SNAPSHOT_AVAIL_SWAP _IOR(SNAPSHOT_IOC_MAGIC, 7, void *)
|
||||
#define SNAPSHOT_GET_SWAP_PAGE _IOR(SNAPSHOT_IOC_MAGIC, 8, void *)
|
||||
#define SNAPSHOT_FREE_SWAP_PAGES _IO(SNAPSHOT_IOC_MAGIC, 9)
|
||||
#define SNAPSHOT_SET_SWAP_FILE _IOW(SNAPSHOT_IOC_MAGIC, 10, unsigned int)
|
||||
#define SNAPSHOT_S2RAM _IO(SNAPSHOT_IOC_MAGIC, 11)
|
||||
#define SNAPSHOT_PMOPS _IOW(SNAPSHOT_IOC_MAGIC, 12, unsigned int)
|
||||
#define SNAPSHOT_SET_SWAP_AREA _IOW(SNAPSHOT_IOC_MAGIC, 13, \
|
||||
struct resume_swap_area)
|
||||
#define SNAPSHOT_IOC_MAXNR 13
|
||||
|
||||
#define PMOPS_PREPARE 1
|
||||
#define PMOPS_ENTER 2
|
||||
#define PMOPS_FINISH 3
|
||||
|
||||
/**
|
||||
* The bitmap is used for tracing allocated swap pages
|
||||
*
|
||||
* The entire bitmap consists of a number of bitmap_page
|
||||
* structures linked with the help of the .next member.
|
||||
* Thus each page can be allocated individually, so we only
|
||||
* need to make 0-order memory allocations to create
|
||||
* the bitmap.
|
||||
*/
|
||||
|
||||
#define BITMAP_PAGE_SIZE (PAGE_SIZE - sizeof(void *))
|
||||
#define BITMAP_PAGE_CHUNKS (BITMAP_PAGE_SIZE / sizeof(long))
|
||||
#define BITS_PER_CHUNK (sizeof(long) * 8)
|
||||
#define BITMAP_PAGE_BITS (BITMAP_PAGE_CHUNKS * BITS_PER_CHUNK)
|
||||
|
||||
struct bitmap_page {
|
||||
unsigned long chunks[BITMAP_PAGE_CHUNKS];
|
||||
struct bitmap_page *next;
|
||||
};
|
||||
|
||||
extern void free_bitmap(struct bitmap_page *bitmap);
|
||||
extern struct bitmap_page *alloc_bitmap(unsigned int nr_bits);
|
||||
extern sector_t alloc_swapdev_block(int swap, struct bitmap_page *bitmap);
|
||||
extern void free_all_swap_pages(int swap, struct bitmap_page *bitmap);
|
||||
|
||||
extern int swsusp_check(void);
|
||||
extern int swsusp_shrink_memory(void);
|
||||
extern void swsusp_free(void);
|
||||
extern int swsusp_suspend(void);
|
||||
extern int swsusp_resume(void);
|
||||
extern int swsusp_read(void);
|
||||
extern int swsusp_write(void);
|
||||
extern void swsusp_close(void);
|
||||
extern int suspend_enter(suspend_state_t state);
|
||||
|
||||
struct timeval;
|
||||
extern void swsusp_show_speed(struct timeval *, struct timeval *,
|
||||
unsigned int, char *);
|
||||
44
kernel/power/poweroff.c
Normal file
44
kernel/power/poweroff.c
Normal file
@@ -0,0 +1,44 @@
|
||||
/*
|
||||
* poweroff.c - sysrq handler to gracefully power down machine.
|
||||
*
|
||||
* This file is released under the GPL v2
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/sysrq.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/pm.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/reboot.h>
|
||||
|
||||
/*
|
||||
* When the user hits Sys-Rq o to power down the machine this is the
|
||||
* callback we use.
|
||||
*/
|
||||
|
||||
static void do_poweroff(struct work_struct *dummy)
|
||||
{
|
||||
kernel_power_off();
|
||||
}
|
||||
|
||||
static DECLARE_WORK(poweroff_work, do_poweroff);
|
||||
|
||||
static void handle_poweroff(int key, struct tty_struct *tty)
|
||||
{
|
||||
schedule_work(&poweroff_work);
|
||||
}
|
||||
|
||||
static struct sysrq_key_op sysrq_poweroff_op = {
|
||||
.handler = handle_poweroff,
|
||||
.help_msg = "powerOff",
|
||||
.action_msg = "Power Off",
|
||||
.enable_mask = SYSRQ_ENABLE_BOOT,
|
||||
};
|
||||
|
||||
static int pm_sysrq_init(void)
|
||||
{
|
||||
register_sysrq_key('o', &sysrq_poweroff_op);
|
||||
return 0;
|
||||
}
|
||||
|
||||
subsys_initcall(pm_sysrq_init);
|
||||
219
kernel/power/process.c
Normal file
219
kernel/power/process.c
Normal file
@@ -0,0 +1,219 @@
|
||||
/*
|
||||
* drivers/power/process.c - Functions for starting/stopping processes on
|
||||
* suspend transitions.
|
||||
*
|
||||
* Originally from swsusp.
|
||||
*/
|
||||
|
||||
|
||||
#undef DEBUG
|
||||
|
||||
#include <linux/smp_lock.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/suspend.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/freezer.h>
|
||||
|
||||
/*
|
||||
* Timeout for stopping processes
|
||||
*/
|
||||
#define TIMEOUT (20 * HZ)
|
||||
|
||||
#define FREEZER_KERNEL_THREADS 0
|
||||
#define FREEZER_USER_SPACE 1
|
||||
|
||||
static inline int freezeable(struct task_struct * p)
|
||||
{
|
||||
if ((p == current) ||
|
||||
(p->flags & PF_NOFREEZE) ||
|
||||
(p->exit_state == EXIT_ZOMBIE) ||
|
||||
(p->exit_state == EXIT_DEAD))
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Refrigerator is place where frozen processes are stored :-). */
|
||||
void refrigerator(void)
|
||||
{
|
||||
/* Hmm, should we be allowed to suspend when there are realtime
|
||||
processes around? */
|
||||
long save;
|
||||
save = current->state;
|
||||
pr_debug("%s entered refrigerator\n", current->comm);
|
||||
|
||||
frozen_process(current);
|
||||
spin_lock_irq(¤t->sighand->siglock);
|
||||
recalc_sigpending(); /* We sent fake signal, clean it up */
|
||||
spin_unlock_irq(¤t->sighand->siglock);
|
||||
|
||||
while (frozen(current)) {
|
||||
current->state = TASK_UNINTERRUPTIBLE;
|
||||
schedule();
|
||||
}
|
||||
pr_debug("%s left refrigerator\n", current->comm);
|
||||
current->state = save;
|
||||
}
|
||||
|
||||
static inline void freeze_process(struct task_struct *p)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (!freezing(p)) {
|
||||
rmb();
|
||||
if (!frozen(p)) {
|
||||
if (p->state == TASK_STOPPED)
|
||||
force_sig_specific(SIGSTOP, p);
|
||||
|
||||
freeze(p);
|
||||
spin_lock_irqsave(&p->sighand->siglock, flags);
|
||||
signal_wake_up(p, p->state == TASK_STOPPED);
|
||||
spin_unlock_irqrestore(&p->sighand->siglock, flags);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void cancel_freezing(struct task_struct *p)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (freezing(p)) {
|
||||
pr_debug(" clean up: %s\n", p->comm);
|
||||
do_not_freeze(p);
|
||||
spin_lock_irqsave(&p->sighand->siglock, flags);
|
||||
recalc_sigpending_tsk(p);
|
||||
spin_unlock_irqrestore(&p->sighand->siglock, flags);
|
||||
}
|
||||
}
|
||||
|
||||
static inline int is_user_space(struct task_struct *p)
|
||||
{
|
||||
return p->mm && !(p->flags & PF_BORROWED_MM);
|
||||
}
|
||||
|
||||
static unsigned int try_to_freeze_tasks(int freeze_user_space)
|
||||
{
|
||||
struct task_struct *g, *p;
|
||||
unsigned long end_time;
|
||||
unsigned int todo;
|
||||
|
||||
end_time = jiffies + TIMEOUT;
|
||||
do {
|
||||
todo = 0;
|
||||
read_lock(&tasklist_lock);
|
||||
do_each_thread(g, p) {
|
||||
if (!freezeable(p))
|
||||
continue;
|
||||
|
||||
if (frozen(p))
|
||||
continue;
|
||||
|
||||
if (p->state == TASK_TRACED && frozen(p->parent)) {
|
||||
cancel_freezing(p);
|
||||
continue;
|
||||
}
|
||||
if (is_user_space(p)) {
|
||||
if (!freeze_user_space)
|
||||
continue;
|
||||
|
||||
/* Freeze the task unless there is a vfork
|
||||
* completion pending
|
||||
*/
|
||||
if (!p->vfork_done)
|
||||
freeze_process(p);
|
||||
} else {
|
||||
if (freeze_user_space)
|
||||
continue;
|
||||
|
||||
freeze_process(p);
|
||||
}
|
||||
todo++;
|
||||
} while_each_thread(g, p);
|
||||
read_unlock(&tasklist_lock);
|
||||
yield(); /* Yield is okay here */
|
||||
if (todo && time_after(jiffies, end_time))
|
||||
break;
|
||||
} while (todo);
|
||||
|
||||
if (todo) {
|
||||
/* This does not unfreeze processes that are already frozen
|
||||
* (we have slightly ugly calling convention in that respect,
|
||||
* and caller must call thaw_processes() if something fails),
|
||||
* but it cleans up leftover PF_FREEZE requests.
|
||||
*/
|
||||
printk("\n");
|
||||
printk(KERN_ERR "Stopping %s timed out after %d seconds "
|
||||
"(%d tasks refusing to freeze):\n",
|
||||
freeze_user_space ? "user space processes" :
|
||||
"kernel threads",
|
||||
TIMEOUT / HZ, todo);
|
||||
read_lock(&tasklist_lock);
|
||||
do_each_thread(g, p) {
|
||||
if (is_user_space(p) == !freeze_user_space)
|
||||
continue;
|
||||
|
||||
if (freezeable(p) && !frozen(p))
|
||||
printk(KERN_ERR " %s\n", p->comm);
|
||||
|
||||
cancel_freezing(p);
|
||||
} while_each_thread(g, p);
|
||||
read_unlock(&tasklist_lock);
|
||||
}
|
||||
|
||||
return todo;
|
||||
}
|
||||
|
||||
/**
|
||||
* freeze_processes - tell processes to enter the refrigerator
|
||||
*
|
||||
* Returns 0 on success, or the number of processes that didn't freeze,
|
||||
* although they were told to.
|
||||
*/
|
||||
int freeze_processes(void)
|
||||
{
|
||||
unsigned int nr_unfrozen;
|
||||
|
||||
printk("Stopping tasks ... ");
|
||||
nr_unfrozen = try_to_freeze_tasks(FREEZER_USER_SPACE);
|
||||
if (nr_unfrozen)
|
||||
return nr_unfrozen;
|
||||
|
||||
sys_sync();
|
||||
nr_unfrozen = try_to_freeze_tasks(FREEZER_KERNEL_THREADS);
|
||||
if (nr_unfrozen)
|
||||
return nr_unfrozen;
|
||||
|
||||
printk("done.\n");
|
||||
BUG_ON(in_atomic());
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void thaw_tasks(int thaw_user_space)
|
||||
{
|
||||
struct task_struct *g, *p;
|
||||
|
||||
read_lock(&tasklist_lock);
|
||||
do_each_thread(g, p) {
|
||||
if (!freezeable(p))
|
||||
continue;
|
||||
|
||||
if (is_user_space(p) == !thaw_user_space)
|
||||
continue;
|
||||
|
||||
if (!thaw_process(p))
|
||||
printk(KERN_WARNING " Strange, %s not stopped\n",
|
||||
p->comm );
|
||||
} while_each_thread(g, p);
|
||||
read_unlock(&tasklist_lock);
|
||||
}
|
||||
|
||||
void thaw_processes(void)
|
||||
{
|
||||
printk("Restarting tasks ... ");
|
||||
thaw_tasks(FREEZER_KERNEL_THREADS);
|
||||
thaw_tasks(FREEZER_USER_SPACE);
|
||||
schedule();
|
||||
printk("done.\n");
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(refrigerator);
|
||||
1739
kernel/power/snapshot.c
Normal file
1739
kernel/power/snapshot.c
Normal file
File diff suppressed because it is too large
Load Diff
634
kernel/power/swap.c
Normal file
634
kernel/power/swap.c
Normal file
@@ -0,0 +1,634 @@
|
||||
/*
|
||||
* linux/kernel/power/swap.c
|
||||
*
|
||||
* This file provides functions for reading the suspend image from
|
||||
* and writing it to a swap partition.
|
||||
*
|
||||
* Copyright (C) 1998,2001-2005 Pavel Machek <pavel@suse.cz>
|
||||
* Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
|
||||
*
|
||||
* This file is released under the GPLv2.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/smp_lock.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/utsname.h>
|
||||
#include <linux/version.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/genhd.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/buffer_head.h>
|
||||
#include <linux/bio.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/swapops.h>
|
||||
#include <linux/pm.h>
|
||||
|
||||
#include "power.h"
|
||||
|
||||
extern char resume_file[];
|
||||
|
||||
#define SWSUSP_SIG "S1SUSPEND"
|
||||
|
||||
static struct swsusp_header {
|
||||
char reserved[PAGE_SIZE - 20 - sizeof(sector_t)];
|
||||
sector_t image;
|
||||
char orig_sig[10];
|
||||
char sig[10];
|
||||
} __attribute__((packed, aligned(PAGE_SIZE))) swsusp_header;
|
||||
|
||||
/*
|
||||
* General things
|
||||
*/
|
||||
|
||||
static unsigned short root_swap = 0xffff;
|
||||
static struct block_device *resume_bdev;
|
||||
|
||||
/**
|
||||
* submit - submit BIO request.
|
||||
* @rw: READ or WRITE.
|
||||
* @off physical offset of page.
|
||||
* @page: page we're reading or writing.
|
||||
* @bio_chain: list of pending biod (for async reading)
|
||||
*
|
||||
* Straight from the textbook - allocate and initialize the bio.
|
||||
* If we're reading, make sure the page is marked as dirty.
|
||||
* Then submit it and, if @bio_chain == NULL, wait.
|
||||
*/
|
||||
static int submit(int rw, pgoff_t page_off, struct page *page,
|
||||
struct bio **bio_chain)
|
||||
{
|
||||
struct bio *bio;
|
||||
|
||||
bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
|
||||
if (!bio)
|
||||
return -ENOMEM;
|
||||
bio->bi_sector = page_off * (PAGE_SIZE >> 9);
|
||||
bio->bi_bdev = resume_bdev;
|
||||
bio->bi_end_io = end_swap_bio_read;
|
||||
|
||||
if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
|
||||
printk("swsusp: ERROR: adding page to bio at %ld\n", page_off);
|
||||
bio_put(bio);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
lock_page(page);
|
||||
bio_get(bio);
|
||||
|
||||
if (bio_chain == NULL) {
|
||||
submit_bio(rw | (1 << BIO_RW_SYNC), bio);
|
||||
wait_on_page_locked(page);
|
||||
if (rw == READ)
|
||||
bio_set_pages_dirty(bio);
|
||||
bio_put(bio);
|
||||
} else {
|
||||
if (rw == READ)
|
||||
get_page(page); /* These pages are freed later */
|
||||
bio->bi_private = *bio_chain;
|
||||
*bio_chain = bio;
|
||||
submit_bio(rw | (1 << BIO_RW_SYNC), bio);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
|
||||
{
|
||||
return submit(READ, page_off, virt_to_page(addr), bio_chain);
|
||||
}
|
||||
|
||||
static int bio_write_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
|
||||
{
|
||||
return submit(WRITE, page_off, virt_to_page(addr), bio_chain);
|
||||
}
|
||||
|
||||
static int wait_on_bio_chain(struct bio **bio_chain)
|
||||
{
|
||||
struct bio *bio;
|
||||
struct bio *next_bio;
|
||||
int ret = 0;
|
||||
|
||||
if (bio_chain == NULL)
|
||||
return 0;
|
||||
|
||||
bio = *bio_chain;
|
||||
if (bio == NULL)
|
||||
return 0;
|
||||
while (bio) {
|
||||
struct page *page;
|
||||
|
||||
next_bio = bio->bi_private;
|
||||
page = bio->bi_io_vec[0].bv_page;
|
||||
wait_on_page_locked(page);
|
||||
if (!PageUptodate(page) || PageError(page))
|
||||
ret = -EIO;
|
||||
put_page(page);
|
||||
bio_put(bio);
|
||||
bio = next_bio;
|
||||
}
|
||||
*bio_chain = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Saving part
|
||||
*/
|
||||
|
||||
static int mark_swapfiles(sector_t start)
|
||||
{
|
||||
int error;
|
||||
|
||||
bio_read_page(swsusp_resume_block, &swsusp_header, NULL);
|
||||
if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) ||
|
||||
!memcmp("SWAPSPACE2",swsusp_header.sig, 10)) {
|
||||
memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10);
|
||||
memcpy(swsusp_header.sig,SWSUSP_SIG, 10);
|
||||
swsusp_header.image = start;
|
||||
error = bio_write_page(swsusp_resume_block,
|
||||
&swsusp_header, NULL);
|
||||
} else {
|
||||
printk(KERN_ERR "swsusp: Swap header not found!\n");
|
||||
error = -ENODEV;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
/**
|
||||
* swsusp_swap_check - check if the resume device is a swap device
|
||||
* and get its index (if so)
|
||||
*/
|
||||
|
||||
static int swsusp_swap_check(void) /* This is called before saving image */
|
||||
{
|
||||
int res;
|
||||
|
||||
res = swap_type_of(swsusp_resume_device, swsusp_resume_block,
|
||||
&resume_bdev);
|
||||
if (res < 0)
|
||||
return res;
|
||||
|
||||
root_swap = res;
|
||||
res = blkdev_get(resume_bdev, FMODE_WRITE, O_RDWR);
|
||||
if (res)
|
||||
return res;
|
||||
|
||||
res = set_blocksize(resume_bdev, PAGE_SIZE);
|
||||
if (res < 0)
|
||||
blkdev_put(resume_bdev);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* write_page - Write one page to given swap location.
|
||||
* @buf: Address we're writing.
|
||||
* @offset: Offset of the swap page we're writing to.
|
||||
* @bio_chain: Link the next write BIO here
|
||||
*/
|
||||
|
||||
static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
|
||||
{
|
||||
void *src;
|
||||
|
||||
if (!offset)
|
||||
return -ENOSPC;
|
||||
|
||||
if (bio_chain) {
|
||||
src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
|
||||
if (src) {
|
||||
memcpy(src, buf, PAGE_SIZE);
|
||||
} else {
|
||||
WARN_ON_ONCE(1);
|
||||
bio_chain = NULL; /* Go synchronous */
|
||||
src = buf;
|
||||
}
|
||||
} else {
|
||||
src = buf;
|
||||
}
|
||||
return bio_write_page(offset, src, bio_chain);
|
||||
}
|
||||
|
||||
/*
|
||||
* The swap map is a data structure used for keeping track of each page
|
||||
* written to a swap partition. It consists of many swap_map_page
|
||||
* structures that contain each an array of MAP_PAGE_SIZE swap entries.
|
||||
* These structures are stored on the swap and linked together with the
|
||||
* help of the .next_swap member.
|
||||
*
|
||||
* The swap map is created during suspend. The swap map pages are
|
||||
* allocated and populated one at a time, so we only need one memory
|
||||
* page to set up the entire structure.
|
||||
*
|
||||
* During resume we also only need to use one swap_map_page structure
|
||||
* at a time.
|
||||
*/
|
||||
|
||||
#define MAP_PAGE_ENTRIES (PAGE_SIZE / sizeof(sector_t) - 1)
|
||||
|
||||
struct swap_map_page {
|
||||
sector_t entries[MAP_PAGE_ENTRIES];
|
||||
sector_t next_swap;
|
||||
};
|
||||
|
||||
/**
|
||||
* The swap_map_handle structure is used for handling swap in
|
||||
* a file-alike way
|
||||
*/
|
||||
|
||||
struct swap_map_handle {
|
||||
struct swap_map_page *cur;
|
||||
sector_t cur_swap;
|
||||
struct bitmap_page *bitmap;
|
||||
unsigned int k;
|
||||
};
|
||||
|
||||
static void release_swap_writer(struct swap_map_handle *handle)
|
||||
{
|
||||
if (handle->cur)
|
||||
free_page((unsigned long)handle->cur);
|
||||
handle->cur = NULL;
|
||||
if (handle->bitmap)
|
||||
free_bitmap(handle->bitmap);
|
||||
handle->bitmap = NULL;
|
||||
}
|
||||
|
||||
static int get_swap_writer(struct swap_map_handle *handle)
|
||||
{
|
||||
handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL);
|
||||
if (!handle->cur)
|
||||
return -ENOMEM;
|
||||
handle->bitmap = alloc_bitmap(count_swap_pages(root_swap, 0));
|
||||
if (!handle->bitmap) {
|
||||
release_swap_writer(handle);
|
||||
return -ENOMEM;
|
||||
}
|
||||
handle->cur_swap = alloc_swapdev_block(root_swap, handle->bitmap);
|
||||
if (!handle->cur_swap) {
|
||||
release_swap_writer(handle);
|
||||
return -ENOSPC;
|
||||
}
|
||||
handle->k = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int swap_write_page(struct swap_map_handle *handle, void *buf,
|
||||
struct bio **bio_chain)
|
||||
{
|
||||
int error = 0;
|
||||
sector_t offset;
|
||||
|
||||
if (!handle->cur)
|
||||
return -EINVAL;
|
||||
offset = alloc_swapdev_block(root_swap, handle->bitmap);
|
||||
error = write_page(buf, offset, bio_chain);
|
||||
if (error)
|
||||
return error;
|
||||
handle->cur->entries[handle->k++] = offset;
|
||||
if (handle->k >= MAP_PAGE_ENTRIES) {
|
||||
error = wait_on_bio_chain(bio_chain);
|
||||
if (error)
|
||||
goto out;
|
||||
offset = alloc_swapdev_block(root_swap, handle->bitmap);
|
||||
if (!offset)
|
||||
return -ENOSPC;
|
||||
handle->cur->next_swap = offset;
|
||||
error = write_page(handle->cur, handle->cur_swap, NULL);
|
||||
if (error)
|
||||
goto out;
|
||||
memset(handle->cur, 0, PAGE_SIZE);
|
||||
handle->cur_swap = offset;
|
||||
handle->k = 0;
|
||||
}
|
||||
out:
|
||||
return error;
|
||||
}
|
||||
|
||||
static int flush_swap_writer(struct swap_map_handle *handle)
|
||||
{
|
||||
if (handle->cur && handle->cur_swap)
|
||||
return write_page(handle->cur, handle->cur_swap, NULL);
|
||||
else
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/**
|
||||
* save_image - save the suspend image data
|
||||
*/
|
||||
|
||||
static int save_image(struct swap_map_handle *handle,
|
||||
struct snapshot_handle *snapshot,
|
||||
unsigned int nr_to_write)
|
||||
{
|
||||
unsigned int m;
|
||||
int ret;
|
||||
int error = 0;
|
||||
int nr_pages;
|
||||
int err2;
|
||||
struct bio *bio;
|
||||
struct timeval start;
|
||||
struct timeval stop;
|
||||
|
||||
printk("Saving image data pages (%u pages) ... ", nr_to_write);
|
||||
m = nr_to_write / 100;
|
||||
if (!m)
|
||||
m = 1;
|
||||
nr_pages = 0;
|
||||
bio = NULL;
|
||||
do_gettimeofday(&start);
|
||||
do {
|
||||
ret = snapshot_read_next(snapshot, PAGE_SIZE);
|
||||
if (ret > 0) {
|
||||
error = swap_write_page(handle, data_of(*snapshot),
|
||||
&bio);
|
||||
if (error)
|
||||
break;
|
||||
if (!(nr_pages % m))
|
||||
printk("\b\b\b\b%3d%%", nr_pages / m);
|
||||
nr_pages++;
|
||||
}
|
||||
} while (ret > 0);
|
||||
err2 = wait_on_bio_chain(&bio);
|
||||
do_gettimeofday(&stop);
|
||||
if (!error)
|
||||
error = err2;
|
||||
if (!error)
|
||||
printk("\b\b\b\bdone\n");
|
||||
swsusp_show_speed(&start, &stop, nr_to_write, "Wrote");
|
||||
return error;
|
||||
}
|
||||
|
||||
/**
|
||||
* enough_swap - Make sure we have enough swap to save the image.
|
||||
*
|
||||
* Returns TRUE or FALSE after checking the total amount of swap
|
||||
* space avaiable from the resume partition.
|
||||
*/
|
||||
|
||||
static int enough_swap(unsigned int nr_pages)
|
||||
{
|
||||
unsigned int free_swap = count_swap_pages(root_swap, 1);
|
||||
|
||||
pr_debug("swsusp: free swap pages: %u\n", free_swap);
|
||||
return free_swap > nr_pages + PAGES_FOR_IO;
|
||||
}
|
||||
|
||||
/**
|
||||
* swsusp_write - Write entire image and metadata.
|
||||
*
|
||||
* It is important _NOT_ to umount filesystems at this point. We want
|
||||
* them synced (in case something goes wrong) but we DO not want to mark
|
||||
* filesystem clean: it is not. (And it does not matter, if we resume
|
||||
* correctly, we'll mark system clean, anyway.)
|
||||
*/
|
||||
|
||||
int swsusp_write(void)
|
||||
{
|
||||
struct swap_map_handle handle;
|
||||
struct snapshot_handle snapshot;
|
||||
struct swsusp_info *header;
|
||||
int error;
|
||||
|
||||
error = swsusp_swap_check();
|
||||
if (error) {
|
||||
printk(KERN_ERR "swsusp: Cannot find swap device, try "
|
||||
"swapon -a.\n");
|
||||
return error;
|
||||
}
|
||||
memset(&snapshot, 0, sizeof(struct snapshot_handle));
|
||||
error = snapshot_read_next(&snapshot, PAGE_SIZE);
|
||||
if (error < PAGE_SIZE) {
|
||||
if (error >= 0)
|
||||
error = -EFAULT;
|
||||
|
||||
goto out;
|
||||
}
|
||||
header = (struct swsusp_info *)data_of(snapshot);
|
||||
if (!enough_swap(header->pages)) {
|
||||
printk(KERN_ERR "swsusp: Not enough free swap\n");
|
||||
error = -ENOSPC;
|
||||
goto out;
|
||||
}
|
||||
error = get_swap_writer(&handle);
|
||||
if (!error) {
|
||||
sector_t start = handle.cur_swap;
|
||||
|
||||
error = swap_write_page(&handle, header, NULL);
|
||||
if (!error)
|
||||
error = save_image(&handle, &snapshot,
|
||||
header->pages - 1);
|
||||
|
||||
if (!error) {
|
||||
flush_swap_writer(&handle);
|
||||
printk("S");
|
||||
error = mark_swapfiles(start);
|
||||
printk("|\n");
|
||||
}
|
||||
}
|
||||
if (error)
|
||||
free_all_swap_pages(root_swap, handle.bitmap);
|
||||
release_swap_writer(&handle);
|
||||
out:
|
||||
swsusp_close();
|
||||
return error;
|
||||
}
|
||||
|
||||
/**
|
||||
* The following functions allow us to read data using a swap map
|
||||
* in a file-alike way
|
||||
*/
|
||||
|
||||
static void release_swap_reader(struct swap_map_handle *handle)
|
||||
{
|
||||
if (handle->cur)
|
||||
free_page((unsigned long)handle->cur);
|
||||
handle->cur = NULL;
|
||||
}
|
||||
|
||||
static int get_swap_reader(struct swap_map_handle *handle, sector_t start)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (!start)
|
||||
return -EINVAL;
|
||||
|
||||
handle->cur = (struct swap_map_page *)get_zeroed_page(__GFP_WAIT | __GFP_HIGH);
|
||||
if (!handle->cur)
|
||||
return -ENOMEM;
|
||||
|
||||
error = bio_read_page(start, handle->cur, NULL);
|
||||
if (error) {
|
||||
release_swap_reader(handle);
|
||||
return error;
|
||||
}
|
||||
handle->k = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int swap_read_page(struct swap_map_handle *handle, void *buf,
|
||||
struct bio **bio_chain)
|
||||
{
|
||||
sector_t offset;
|
||||
int error;
|
||||
|
||||
if (!handle->cur)
|
||||
return -EINVAL;
|
||||
offset = handle->cur->entries[handle->k];
|
||||
if (!offset)
|
||||
return -EFAULT;
|
||||
error = bio_read_page(offset, buf, bio_chain);
|
||||
if (error)
|
||||
return error;
|
||||
if (++handle->k >= MAP_PAGE_ENTRIES) {
|
||||
error = wait_on_bio_chain(bio_chain);
|
||||
handle->k = 0;
|
||||
offset = handle->cur->next_swap;
|
||||
if (!offset)
|
||||
release_swap_reader(handle);
|
||||
else if (!error)
|
||||
error = bio_read_page(offset, handle->cur, NULL);
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
/**
|
||||
* load_image - load the image using the swap map handle
|
||||
* @handle and the snapshot handle @snapshot
|
||||
* (assume there are @nr_pages pages to load)
|
||||
*/
|
||||
|
||||
static int load_image(struct swap_map_handle *handle,
|
||||
struct snapshot_handle *snapshot,
|
||||
unsigned int nr_to_read)
|
||||
{
|
||||
unsigned int m;
|
||||
int error = 0;
|
||||
struct timeval start;
|
||||
struct timeval stop;
|
||||
struct bio *bio;
|
||||
int err2;
|
||||
unsigned nr_pages;
|
||||
|
||||
printk("Loading image data pages (%u pages) ... ", nr_to_read);
|
||||
m = nr_to_read / 100;
|
||||
if (!m)
|
||||
m = 1;
|
||||
nr_pages = 0;
|
||||
bio = NULL;
|
||||
do_gettimeofday(&start);
|
||||
for ( ; ; ) {
|
||||
error = snapshot_write_next(snapshot, PAGE_SIZE);
|
||||
if (error <= 0)
|
||||
break;
|
||||
error = swap_read_page(handle, data_of(*snapshot), &bio);
|
||||
if (error)
|
||||
break;
|
||||
if (snapshot->sync_read)
|
||||
error = wait_on_bio_chain(&bio);
|
||||
if (error)
|
||||
break;
|
||||
if (!(nr_pages % m))
|
||||
printk("\b\b\b\b%3d%%", nr_pages / m);
|
||||
nr_pages++;
|
||||
}
|
||||
err2 = wait_on_bio_chain(&bio);
|
||||
do_gettimeofday(&stop);
|
||||
if (!error)
|
||||
error = err2;
|
||||
if (!error) {
|
||||
printk("\b\b\b\bdone\n");
|
||||
snapshot_write_finalize(snapshot);
|
||||
if (!snapshot_image_loaded(snapshot))
|
||||
error = -ENODATA;
|
||||
}
|
||||
swsusp_show_speed(&start, &stop, nr_to_read, "Read");
|
||||
return error;
|
||||
}
|
||||
|
||||
int swsusp_read(void)
|
||||
{
|
||||
int error;
|
||||
struct swap_map_handle handle;
|
||||
struct snapshot_handle snapshot;
|
||||
struct swsusp_info *header;
|
||||
|
||||
if (IS_ERR(resume_bdev)) {
|
||||
pr_debug("swsusp: block device not initialised\n");
|
||||
return PTR_ERR(resume_bdev);
|
||||
}
|
||||
|
||||
memset(&snapshot, 0, sizeof(struct snapshot_handle));
|
||||
error = snapshot_write_next(&snapshot, PAGE_SIZE);
|
||||
if (error < PAGE_SIZE)
|
||||
return error < 0 ? error : -EFAULT;
|
||||
header = (struct swsusp_info *)data_of(snapshot);
|
||||
error = get_swap_reader(&handle, swsusp_header.image);
|
||||
if (!error)
|
||||
error = swap_read_page(&handle, header, NULL);
|
||||
if (!error)
|
||||
error = load_image(&handle, &snapshot, header->pages - 1);
|
||||
release_swap_reader(&handle);
|
||||
|
||||
blkdev_put(resume_bdev);
|
||||
|
||||
if (!error)
|
||||
pr_debug("swsusp: Reading resume file was successful\n");
|
||||
else
|
||||
pr_debug("swsusp: Error %d resuming\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
/**
|
||||
* swsusp_check - Check for swsusp signature in the resume device
|
||||
*/
|
||||
|
||||
int swsusp_check(void)
|
||||
{
|
||||
int error;
|
||||
|
||||
resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ);
|
||||
if (!IS_ERR(resume_bdev)) {
|
||||
set_blocksize(resume_bdev, PAGE_SIZE);
|
||||
memset(&swsusp_header, 0, sizeof(swsusp_header));
|
||||
error = bio_read_page(swsusp_resume_block,
|
||||
&swsusp_header, NULL);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) {
|
||||
memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10);
|
||||
/* Reset swap signature now */
|
||||
error = bio_write_page(swsusp_resume_block,
|
||||
&swsusp_header, NULL);
|
||||
} else {
|
||||
return -EINVAL;
|
||||
}
|
||||
if (error)
|
||||
blkdev_put(resume_bdev);
|
||||
else
|
||||
pr_debug("swsusp: Signature found, resuming\n");
|
||||
} else {
|
||||
error = PTR_ERR(resume_bdev);
|
||||
}
|
||||
|
||||
if (error)
|
||||
pr_debug("swsusp: Error %d check for resume file\n", error);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
/**
|
||||
* swsusp_close - close swap device.
|
||||
*/
|
||||
|
||||
void swsusp_close(void)
|
||||
{
|
||||
if (IS_ERR(resume_bdev)) {
|
||||
pr_debug("swsusp: block device not initialised\n");
|
||||
return;
|
||||
}
|
||||
|
||||
blkdev_put(resume_bdev);
|
||||
}
|
||||
330
kernel/power/swsusp.c
Normal file
330
kernel/power/swsusp.c
Normal file
@@ -0,0 +1,330 @@
|
||||
/*
|
||||
* linux/kernel/power/swsusp.c
|
||||
*
|
||||
* This file provides code to write suspend image to swap and read it back.
|
||||
*
|
||||
* Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
|
||||
* Copyright (C) 1998,2001-2005 Pavel Machek <pavel@suse.cz>
|
||||
*
|
||||
* This file is released under the GPLv2.
|
||||
*
|
||||
* I'd like to thank the following people for their work:
|
||||
*
|
||||
* Pavel Machek <pavel@ucw.cz>:
|
||||
* Modifications, defectiveness pointing, being with me at the very beginning,
|
||||
* suspend to swap space, stop all tasks. Port to 2.4.18-ac and 2.5.17.
|
||||
*
|
||||
* Steve Doddi <dirk@loth.demon.co.uk>:
|
||||
* Support the possibility of hardware state restoring.
|
||||
*
|
||||
* Raph <grey.havens@earthling.net>:
|
||||
* Support for preserving states of network devices and virtual console
|
||||
* (including X and svgatextmode)
|
||||
*
|
||||
* Kurt Garloff <garloff@suse.de>:
|
||||
* Straightened the critical function in order to prevent compilers from
|
||||
* playing tricks with local variables.
|
||||
*
|
||||
* Andreas Mohr <a.mohr@mailto.de>
|
||||
*
|
||||
* Alex Badea <vampire@go.ro>:
|
||||
* Fixed runaway init
|
||||
*
|
||||
* Rafael J. Wysocki <rjw@sisk.pl>
|
||||
* Reworked the freeing of memory and the handling of swap
|
||||
*
|
||||
* More state savers are welcome. Especially for the scsi layer...
|
||||
*
|
||||
* For TODOs,FIXMEs also look in Documentation/power/swsusp.txt
|
||||
*/
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/suspend.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/major.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/pm.h>
|
||||
#include <linux/swapops.h>
|
||||
#include <linux/bootmem.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/time.h>
|
||||
|
||||
#include "power.h"
|
||||
|
||||
/*
|
||||
* Preferred image size in bytes (tunable via /sys/power/image_size).
|
||||
* When it is set to N, swsusp will do its best to ensure the image
|
||||
* size will not exceed N bytes, but if that is impossible, it will
|
||||
* try to create the smallest image possible.
|
||||
*/
|
||||
unsigned long image_size = 500 * 1024 * 1024;
|
||||
|
||||
int in_suspend __nosavedata = 0;
|
||||
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
unsigned int count_highmem_pages(void);
|
||||
int restore_highmem(void);
|
||||
#else
|
||||
static inline int restore_highmem(void) { return 0; }
|
||||
static inline unsigned int count_highmem_pages(void) { return 0; }
|
||||
#endif
|
||||
|
||||
/**
|
||||
* The following functions are used for tracing the allocated
|
||||
* swap pages, so that they can be freed in case of an error.
|
||||
*
|
||||
* The functions operate on a linked bitmap structure defined
|
||||
* in power.h
|
||||
*/
|
||||
|
||||
void free_bitmap(struct bitmap_page *bitmap)
|
||||
{
|
||||
struct bitmap_page *bp;
|
||||
|
||||
while (bitmap) {
|
||||
bp = bitmap->next;
|
||||
free_page((unsigned long)bitmap);
|
||||
bitmap = bp;
|
||||
}
|
||||
}
|
||||
|
||||
struct bitmap_page *alloc_bitmap(unsigned int nr_bits)
|
||||
{
|
||||
struct bitmap_page *bitmap, *bp;
|
||||
unsigned int n;
|
||||
|
||||
if (!nr_bits)
|
||||
return NULL;
|
||||
|
||||
bitmap = (struct bitmap_page *)get_zeroed_page(GFP_KERNEL);
|
||||
bp = bitmap;
|
||||
for (n = BITMAP_PAGE_BITS; n < nr_bits; n += BITMAP_PAGE_BITS) {
|
||||
bp->next = (struct bitmap_page *)get_zeroed_page(GFP_KERNEL);
|
||||
bp = bp->next;
|
||||
if (!bp) {
|
||||
free_bitmap(bitmap);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
return bitmap;
|
||||
}
|
||||
|
||||
static int bitmap_set(struct bitmap_page *bitmap, unsigned long bit)
|
||||
{
|
||||
unsigned int n;
|
||||
|
||||
n = BITMAP_PAGE_BITS;
|
||||
while (bitmap && n <= bit) {
|
||||
n += BITMAP_PAGE_BITS;
|
||||
bitmap = bitmap->next;
|
||||
}
|
||||
if (!bitmap)
|
||||
return -EINVAL;
|
||||
n -= BITMAP_PAGE_BITS;
|
||||
bit -= n;
|
||||
n = 0;
|
||||
while (bit >= BITS_PER_CHUNK) {
|
||||
bit -= BITS_PER_CHUNK;
|
||||
n++;
|
||||
}
|
||||
bitmap->chunks[n] |= (1UL << bit);
|
||||
return 0;
|
||||
}
|
||||
|
||||
sector_t alloc_swapdev_block(int swap, struct bitmap_page *bitmap)
|
||||
{
|
||||
unsigned long offset;
|
||||
|
||||
offset = swp_offset(get_swap_page_of_type(swap));
|
||||
if (offset) {
|
||||
if (bitmap_set(bitmap, offset))
|
||||
swap_free(swp_entry(swap, offset));
|
||||
else
|
||||
return swapdev_block(swap, offset);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void free_all_swap_pages(int swap, struct bitmap_page *bitmap)
|
||||
{
|
||||
unsigned int bit, n;
|
||||
unsigned long test;
|
||||
|
||||
bit = 0;
|
||||
while (bitmap) {
|
||||
for (n = 0; n < BITMAP_PAGE_CHUNKS; n++)
|
||||
for (test = 1UL; test; test <<= 1) {
|
||||
if (bitmap->chunks[n] & test)
|
||||
swap_free(swp_entry(swap, bit));
|
||||
bit++;
|
||||
}
|
||||
bitmap = bitmap->next;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* swsusp_show_speed - print the time elapsed between two events represented by
|
||||
* @start and @stop
|
||||
*
|
||||
* @nr_pages - number of pages processed between @start and @stop
|
||||
* @msg - introductory message to print
|
||||
*/
|
||||
|
||||
void swsusp_show_speed(struct timeval *start, struct timeval *stop,
|
||||
unsigned nr_pages, char *msg)
|
||||
{
|
||||
s64 elapsed_centisecs64;
|
||||
int centisecs;
|
||||
int k;
|
||||
int kps;
|
||||
|
||||
elapsed_centisecs64 = timeval_to_ns(stop) - timeval_to_ns(start);
|
||||
do_div(elapsed_centisecs64, NSEC_PER_SEC / 100);
|
||||
centisecs = elapsed_centisecs64;
|
||||
if (centisecs == 0)
|
||||
centisecs = 1; /* avoid div-by-zero */
|
||||
k = nr_pages * (PAGE_SIZE / 1024);
|
||||
kps = (k * 100) / centisecs;
|
||||
printk("%s %d kbytes in %d.%02d seconds (%d.%02d MB/s)\n", msg, k,
|
||||
centisecs / 100, centisecs % 100,
|
||||
kps / 1000, (kps % 1000) / 10);
|
||||
}
|
||||
|
||||
/**
|
||||
* swsusp_shrink_memory - Try to free as much memory as needed
|
||||
*
|
||||
* ... but do not OOM-kill anyone
|
||||
*
|
||||
* Notice: all userland should be stopped before it is called, or
|
||||
* livelock is possible.
|
||||
*/
|
||||
|
||||
#define SHRINK_BITE 10000
|
||||
static inline unsigned long __shrink_memory(long tmp)
|
||||
{
|
||||
if (tmp > SHRINK_BITE)
|
||||
tmp = SHRINK_BITE;
|
||||
return shrink_all_memory(tmp);
|
||||
}
|
||||
|
||||
int swsusp_shrink_memory(void)
|
||||
{
|
||||
long tmp;
|
||||
struct zone *zone;
|
||||
unsigned long pages = 0;
|
||||
unsigned int i = 0;
|
||||
char *p = "-\\|/";
|
||||
struct timeval start, stop;
|
||||
|
||||
printk("Shrinking memory... ");
|
||||
do_gettimeofday(&start);
|
||||
do {
|
||||
long size, highmem_size;
|
||||
|
||||
highmem_size = count_highmem_pages();
|
||||
size = count_data_pages() + PAGES_FOR_IO;
|
||||
tmp = size;
|
||||
size += highmem_size;
|
||||
for_each_zone (zone)
|
||||
if (populated_zone(zone)) {
|
||||
tmp += snapshot_additional_pages(zone);
|
||||
if (is_highmem(zone)) {
|
||||
highmem_size -=
|
||||
zone_page_state(zone, NR_FREE_PAGES);
|
||||
} else {
|
||||
tmp -= zone_page_state(zone, NR_FREE_PAGES);
|
||||
tmp += zone->lowmem_reserve[ZONE_NORMAL];
|
||||
}
|
||||
}
|
||||
|
||||
if (highmem_size < 0)
|
||||
highmem_size = 0;
|
||||
|
||||
tmp += highmem_size;
|
||||
if (tmp > 0) {
|
||||
tmp = __shrink_memory(tmp);
|
||||
if (!tmp)
|
||||
return -ENOMEM;
|
||||
pages += tmp;
|
||||
} else if (size > image_size / PAGE_SIZE) {
|
||||
tmp = __shrink_memory(size - (image_size / PAGE_SIZE));
|
||||
pages += tmp;
|
||||
}
|
||||
printk("\b%c", p[i++%4]);
|
||||
} while (tmp > 0);
|
||||
do_gettimeofday(&stop);
|
||||
printk("\bdone (%lu pages freed)\n", pages);
|
||||
swsusp_show_speed(&start, &stop, pages, "Freed");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int swsusp_suspend(void)
|
||||
{
|
||||
int error;
|
||||
|
||||
if ((error = arch_prepare_suspend()))
|
||||
return error;
|
||||
|
||||
local_irq_disable();
|
||||
/* At this point, device_suspend() has been called, but *not*
|
||||
* device_power_down(). We *must* device_power_down() now.
|
||||
* Otherwise, drivers for some devices (e.g. interrupt controllers)
|
||||
* become desynchronized with the actual state of the hardware
|
||||
* at resume time, and evil weirdness ensues.
|
||||
*/
|
||||
if ((error = device_power_down(PMSG_FREEZE))) {
|
||||
printk(KERN_ERR "Some devices failed to power down, aborting suspend\n");
|
||||
goto Enable_irqs;
|
||||
}
|
||||
|
||||
save_processor_state();
|
||||
if ((error = swsusp_arch_suspend()))
|
||||
printk(KERN_ERR "Error %d suspending\n", error);
|
||||
/* Restore control flow magically appears here */
|
||||
restore_processor_state();
|
||||
/* NOTE: device_power_up() is just a resume() for devices
|
||||
* that suspended with irqs off ... no overall powerup.
|
||||
*/
|
||||
device_power_up();
|
||||
Enable_irqs:
|
||||
local_irq_enable();
|
||||
return error;
|
||||
}
|
||||
|
||||
int swsusp_resume(void)
|
||||
{
|
||||
int error;
|
||||
|
||||
local_irq_disable();
|
||||
/* NOTE: device_power_down() is just a suspend() with irqs off;
|
||||
* it has no special "power things down" semantics
|
||||
*/
|
||||
if (device_power_down(PMSG_PRETHAW))
|
||||
printk(KERN_ERR "Some devices failed to power down, very bad\n");
|
||||
/* We'll ignore saved state, but this gets preempt count (etc) right */
|
||||
save_processor_state();
|
||||
error = restore_highmem();
|
||||
if (!error) {
|
||||
error = swsusp_arch_resume();
|
||||
/* The code below is only ever reached in case of a failure.
|
||||
* Otherwise execution continues at place where
|
||||
* swsusp_arch_suspend() was called
|
||||
*/
|
||||
BUG_ON(!error);
|
||||
/* This call to restore_highmem() undos the previous one */
|
||||
restore_highmem();
|
||||
}
|
||||
/* The only reason why swsusp_arch_resume() can fail is memory being
|
||||
* very tight, so we have to free it as soon as we can to avoid
|
||||
* subsequent failures
|
||||
*/
|
||||
swsusp_free();
|
||||
restore_processor_state();
|
||||
touch_softlockup_watchdog();
|
||||
device_power_up();
|
||||
local_irq_enable();
|
||||
return error;
|
||||
}
|
||||
481
kernel/power/user.c
Normal file
481
kernel/power/user.c
Normal file
@@ -0,0 +1,481 @@
|
||||
/*
|
||||
* linux/kernel/power/user.c
|
||||
*
|
||||
* This file provides the user space interface for software suspend/resume.
|
||||
*
|
||||
* Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
|
||||
*
|
||||
* This file is released under the GPLv2.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/suspend.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/reboot.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/miscdevice.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/swapops.h>
|
||||
#include <linux/pm.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/console.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/freezer.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
#include "power.h"
|
||||
|
||||
#define SNAPSHOT_MINOR 231
|
||||
|
||||
static struct snapshot_data {
|
||||
struct snapshot_handle handle;
|
||||
int swap;
|
||||
struct bitmap_page *bitmap;
|
||||
int mode;
|
||||
char frozen;
|
||||
char ready;
|
||||
char platform_suspend;
|
||||
} snapshot_state;
|
||||
|
||||
static atomic_t device_available = ATOMIC_INIT(1);
|
||||
|
||||
static int snapshot_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct snapshot_data *data;
|
||||
|
||||
if (!atomic_add_unless(&device_available, -1, 0))
|
||||
return -EBUSY;
|
||||
|
||||
if ((filp->f_flags & O_ACCMODE) == O_RDWR)
|
||||
return -ENOSYS;
|
||||
|
||||
nonseekable_open(inode, filp);
|
||||
data = &snapshot_state;
|
||||
filp->private_data = data;
|
||||
memset(&data->handle, 0, sizeof(struct snapshot_handle));
|
||||
if ((filp->f_flags & O_ACCMODE) == O_RDONLY) {
|
||||
data->swap = swsusp_resume_device ?
|
||||
swap_type_of(swsusp_resume_device, 0, NULL) : -1;
|
||||
data->mode = O_RDONLY;
|
||||
} else {
|
||||
data->swap = -1;
|
||||
data->mode = O_WRONLY;
|
||||
}
|
||||
data->bitmap = NULL;
|
||||
data->frozen = 0;
|
||||
data->ready = 0;
|
||||
data->platform_suspend = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int snapshot_release(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct snapshot_data *data;
|
||||
|
||||
swsusp_free();
|
||||
data = filp->private_data;
|
||||
free_all_swap_pages(data->swap, data->bitmap);
|
||||
free_bitmap(data->bitmap);
|
||||
if (data->frozen) {
|
||||
mutex_lock(&pm_mutex);
|
||||
thaw_processes();
|
||||
enable_nonboot_cpus();
|
||||
mutex_unlock(&pm_mutex);
|
||||
}
|
||||
atomic_inc(&device_available);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t snapshot_read(struct file *filp, char __user *buf,
|
||||
size_t count, loff_t *offp)
|
||||
{
|
||||
struct snapshot_data *data;
|
||||
ssize_t res;
|
||||
|
||||
data = filp->private_data;
|
||||
res = snapshot_read_next(&data->handle, count);
|
||||
if (res > 0) {
|
||||
if (copy_to_user(buf, data_of(data->handle), res))
|
||||
res = -EFAULT;
|
||||
else
|
||||
*offp = data->handle.offset;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
static ssize_t snapshot_write(struct file *filp, const char __user *buf,
|
||||
size_t count, loff_t *offp)
|
||||
{
|
||||
struct snapshot_data *data;
|
||||
ssize_t res;
|
||||
|
||||
data = filp->private_data;
|
||||
res = snapshot_write_next(&data->handle, count);
|
||||
if (res > 0) {
|
||||
if (copy_from_user(data_of(data->handle), buf, res))
|
||||
res = -EFAULT;
|
||||
else
|
||||
*offp = data->handle.offset;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
static inline int platform_prepare(void)
|
||||
{
|
||||
int error = 0;
|
||||
|
||||
if (pm_ops && pm_ops->prepare)
|
||||
error = pm_ops->prepare(PM_SUSPEND_DISK);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
static inline void platform_finish(void)
|
||||
{
|
||||
if (pm_ops && pm_ops->finish)
|
||||
pm_ops->finish(PM_SUSPEND_DISK);
|
||||
}
|
||||
|
||||
static inline int snapshot_suspend(int platform_suspend)
|
||||
{
|
||||
int error;
|
||||
|
||||
mutex_lock(&pm_mutex);
|
||||
/* Free memory before shutting down devices. */
|
||||
error = swsusp_shrink_memory();
|
||||
if (error)
|
||||
goto Finish;
|
||||
|
||||
if (platform_suspend) {
|
||||
error = platform_prepare();
|
||||
if (error)
|
||||
goto Finish;
|
||||
}
|
||||
suspend_console();
|
||||
error = device_suspend(PMSG_FREEZE);
|
||||
if (error)
|
||||
goto Resume_devices;
|
||||
|
||||
error = disable_nonboot_cpus();
|
||||
if (!error) {
|
||||
in_suspend = 1;
|
||||
error = swsusp_suspend();
|
||||
}
|
||||
enable_nonboot_cpus();
|
||||
Resume_devices:
|
||||
if (platform_suspend)
|
||||
platform_finish();
|
||||
|
||||
device_resume();
|
||||
resume_console();
|
||||
Finish:
|
||||
mutex_unlock(&pm_mutex);
|
||||
return error;
|
||||
}
|
||||
|
||||
static inline int snapshot_restore(int platform_suspend)
|
||||
{
|
||||
int error;
|
||||
|
||||
mutex_lock(&pm_mutex);
|
||||
pm_prepare_console();
|
||||
if (platform_suspend) {
|
||||
error = platform_prepare();
|
||||
if (error)
|
||||
goto Finish;
|
||||
}
|
||||
suspend_console();
|
||||
error = device_suspend(PMSG_PRETHAW);
|
||||
if (error)
|
||||
goto Resume_devices;
|
||||
|
||||
error = disable_nonboot_cpus();
|
||||
if (!error)
|
||||
error = swsusp_resume();
|
||||
|
||||
enable_nonboot_cpus();
|
||||
Resume_devices:
|
||||
if (platform_suspend)
|
||||
platform_finish();
|
||||
|
||||
device_resume();
|
||||
resume_console();
|
||||
Finish:
|
||||
pm_restore_console();
|
||||
mutex_unlock(&pm_mutex);
|
||||
return error;
|
||||
}
|
||||
|
||||
static int snapshot_ioctl(struct inode *inode, struct file *filp,
|
||||
unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
int error = 0;
|
||||
struct snapshot_data *data;
|
||||
loff_t avail;
|
||||
sector_t offset;
|
||||
|
||||
if (_IOC_TYPE(cmd) != SNAPSHOT_IOC_MAGIC)
|
||||
return -ENOTTY;
|
||||
if (_IOC_NR(cmd) > SNAPSHOT_IOC_MAXNR)
|
||||
return -ENOTTY;
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
data = filp->private_data;
|
||||
|
||||
switch (cmd) {
|
||||
|
||||
case SNAPSHOT_FREEZE:
|
||||
if (data->frozen)
|
||||
break;
|
||||
mutex_lock(&pm_mutex);
|
||||
if (freeze_processes()) {
|
||||
thaw_processes();
|
||||
error = -EBUSY;
|
||||
}
|
||||
mutex_unlock(&pm_mutex);
|
||||
if (!error)
|
||||
data->frozen = 1;
|
||||
break;
|
||||
|
||||
case SNAPSHOT_UNFREEZE:
|
||||
if (!data->frozen)
|
||||
break;
|
||||
mutex_lock(&pm_mutex);
|
||||
thaw_processes();
|
||||
mutex_unlock(&pm_mutex);
|
||||
data->frozen = 0;
|
||||
break;
|
||||
|
||||
case SNAPSHOT_ATOMIC_SNAPSHOT:
|
||||
if (data->mode != O_RDONLY || !data->frozen || data->ready) {
|
||||
error = -EPERM;
|
||||
break;
|
||||
}
|
||||
error = snapshot_suspend(data->platform_suspend);
|
||||
if (!error)
|
||||
error = put_user(in_suspend, (unsigned int __user *)arg);
|
||||
if (!error)
|
||||
data->ready = 1;
|
||||
break;
|
||||
|
||||
case SNAPSHOT_ATOMIC_RESTORE:
|
||||
snapshot_write_finalize(&data->handle);
|
||||
if (data->mode != O_WRONLY || !data->frozen ||
|
||||
!snapshot_image_loaded(&data->handle)) {
|
||||
error = -EPERM;
|
||||
break;
|
||||
}
|
||||
error = snapshot_restore(data->platform_suspend);
|
||||
break;
|
||||
|
||||
case SNAPSHOT_FREE:
|
||||
swsusp_free();
|
||||
memset(&data->handle, 0, sizeof(struct snapshot_handle));
|
||||
data->ready = 0;
|
||||
break;
|
||||
|
||||
case SNAPSHOT_SET_IMAGE_SIZE:
|
||||
image_size = arg;
|
||||
break;
|
||||
|
||||
case SNAPSHOT_AVAIL_SWAP:
|
||||
avail = count_swap_pages(data->swap, 1);
|
||||
avail <<= PAGE_SHIFT;
|
||||
error = put_user(avail, (loff_t __user *)arg);
|
||||
break;
|
||||
|
||||
case SNAPSHOT_GET_SWAP_PAGE:
|
||||
if (data->swap < 0 || data->swap >= MAX_SWAPFILES) {
|
||||
error = -ENODEV;
|
||||
break;
|
||||
}
|
||||
if (!data->bitmap) {
|
||||
data->bitmap = alloc_bitmap(count_swap_pages(data->swap, 0));
|
||||
if (!data->bitmap) {
|
||||
error = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
}
|
||||
offset = alloc_swapdev_block(data->swap, data->bitmap);
|
||||
if (offset) {
|
||||
offset <<= PAGE_SHIFT;
|
||||
error = put_user(offset, (sector_t __user *)arg);
|
||||
} else {
|
||||
error = -ENOSPC;
|
||||
}
|
||||
break;
|
||||
|
||||
case SNAPSHOT_FREE_SWAP_PAGES:
|
||||
if (data->swap < 0 || data->swap >= MAX_SWAPFILES) {
|
||||
error = -ENODEV;
|
||||
break;
|
||||
}
|
||||
free_all_swap_pages(data->swap, data->bitmap);
|
||||
free_bitmap(data->bitmap);
|
||||
data->bitmap = NULL;
|
||||
break;
|
||||
|
||||
case SNAPSHOT_SET_SWAP_FILE:
|
||||
if (!data->bitmap) {
|
||||
/*
|
||||
* User space encodes device types as two-byte values,
|
||||
* so we need to recode them
|
||||
*/
|
||||
if (old_decode_dev(arg)) {
|
||||
data->swap = swap_type_of(old_decode_dev(arg),
|
||||
0, NULL);
|
||||
if (data->swap < 0)
|
||||
error = -ENODEV;
|
||||
} else {
|
||||
data->swap = -1;
|
||||
error = -EINVAL;
|
||||
}
|
||||
} else {
|
||||
error = -EPERM;
|
||||
}
|
||||
break;
|
||||
|
||||
case SNAPSHOT_S2RAM:
|
||||
if (!pm_ops) {
|
||||
error = -ENOSYS;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!data->frozen) {
|
||||
error = -EPERM;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!mutex_trylock(&pm_mutex)) {
|
||||
error = -EBUSY;
|
||||
break;
|
||||
}
|
||||
|
||||
if (pm_ops->prepare) {
|
||||
error = pm_ops->prepare(PM_SUSPEND_MEM);
|
||||
if (error)
|
||||
goto OutS3;
|
||||
}
|
||||
|
||||
/* Put devices to sleep */
|
||||
suspend_console();
|
||||
error = device_suspend(PMSG_SUSPEND);
|
||||
if (error) {
|
||||
printk(KERN_ERR "Failed to suspend some devices.\n");
|
||||
} else {
|
||||
error = disable_nonboot_cpus();
|
||||
if (!error) {
|
||||
/* Enter S3, system is already frozen */
|
||||
suspend_enter(PM_SUSPEND_MEM);
|
||||
enable_nonboot_cpus();
|
||||
}
|
||||
/* Wake up devices */
|
||||
device_resume();
|
||||
}
|
||||
resume_console();
|
||||
if (pm_ops->finish)
|
||||
pm_ops->finish(PM_SUSPEND_MEM);
|
||||
|
||||
OutS3:
|
||||
mutex_unlock(&pm_mutex);
|
||||
break;
|
||||
|
||||
case SNAPSHOT_PMOPS:
|
||||
error = -EINVAL;
|
||||
|
||||
switch (arg) {
|
||||
|
||||
case PMOPS_PREPARE:
|
||||
if (pm_ops && pm_ops->enter) {
|
||||
data->platform_suspend = 1;
|
||||
error = 0;
|
||||
} else {
|
||||
error = -ENOSYS;
|
||||
}
|
||||
break;
|
||||
|
||||
case PMOPS_ENTER:
|
||||
if (data->platform_suspend) {
|
||||
kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK);
|
||||
error = pm_ops->enter(PM_SUSPEND_DISK);
|
||||
error = 0;
|
||||
}
|
||||
break;
|
||||
|
||||
case PMOPS_FINISH:
|
||||
if (data->platform_suspend)
|
||||
error = 0;
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
printk(KERN_ERR "SNAPSHOT_PMOPS: invalid argument %ld\n", arg);
|
||||
|
||||
}
|
||||
break;
|
||||
|
||||
case SNAPSHOT_SET_SWAP_AREA:
|
||||
if (data->bitmap) {
|
||||
error = -EPERM;
|
||||
} else {
|
||||
struct resume_swap_area swap_area;
|
||||
dev_t swdev;
|
||||
|
||||
error = copy_from_user(&swap_area, (void __user *)arg,
|
||||
sizeof(struct resume_swap_area));
|
||||
if (error) {
|
||||
error = -EFAULT;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* User space encodes device types as two-byte values,
|
||||
* so we need to recode them
|
||||
*/
|
||||
swdev = old_decode_dev(swap_area.dev);
|
||||
if (swdev) {
|
||||
offset = swap_area.offset;
|
||||
data->swap = swap_type_of(swdev, offset, NULL);
|
||||
if (data->swap < 0)
|
||||
error = -ENODEV;
|
||||
} else {
|
||||
data->swap = -1;
|
||||
error = -EINVAL;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
error = -ENOTTY;
|
||||
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
static const struct file_operations snapshot_fops = {
|
||||
.open = snapshot_open,
|
||||
.release = snapshot_release,
|
||||
.read = snapshot_read,
|
||||
.write = snapshot_write,
|
||||
.llseek = no_llseek,
|
||||
.ioctl = snapshot_ioctl,
|
||||
};
|
||||
|
||||
static struct miscdevice snapshot_device = {
|
||||
.minor = SNAPSHOT_MINOR,
|
||||
.name = "snapshot",
|
||||
.fops = &snapshot_fops,
|
||||
};
|
||||
|
||||
static int __init snapshot_device_init(void)
|
||||
{
|
||||
return misc_register(&snapshot_device);
|
||||
};
|
||||
|
||||
device_initcall(snapshot_device_init);
|
||||
1134
kernel/printk.c
Normal file
1134
kernel/printk.c
Normal file
File diff suppressed because it is too large
Load Diff
592
kernel/profile.c
Normal file
592
kernel/profile.c
Normal file
@@ -0,0 +1,592 @@
|
||||
/*
|
||||
* linux/kernel/profile.c
|
||||
* Simple profiling. Manages a direct-mapped profile hit count buffer,
|
||||
* with configurable resolution, support for restricting the cpus on
|
||||
* which profiling is done, and switching between cpu time and
|
||||
* schedule() calls via kernel command line parameters passed at boot.
|
||||
*
|
||||
* Scheduler profiling support, Arjan van de Ven and Ingo Molnar,
|
||||
* Red Hat, July 2004
|
||||
* Consolidation of architecture support code for profiling,
|
||||
* William Irwin, Oracle, July 2004
|
||||
* Amortized hit count accounting via per-cpu open-addressed hashtables
|
||||
* to resolve timer interrupt livelocks, William Irwin, Oracle, 2004
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/profile.h>
|
||||
#include <linux/bootmem.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/profile.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <asm/sections.h>
|
||||
#include <asm/semaphore.h>
|
||||
#include <asm/irq_regs.h>
|
||||
|
||||
struct profile_hit {
|
||||
u32 pc, hits;
|
||||
};
|
||||
#define PROFILE_GRPSHIFT 3
|
||||
#define PROFILE_GRPSZ (1 << PROFILE_GRPSHIFT)
|
||||
#define NR_PROFILE_HIT (PAGE_SIZE/sizeof(struct profile_hit))
|
||||
#define NR_PROFILE_GRP (NR_PROFILE_HIT/PROFILE_GRPSZ)
|
||||
|
||||
/* Oprofile timer tick hook */
|
||||
int (*timer_hook)(struct pt_regs *) __read_mostly;
|
||||
|
||||
static atomic_t *prof_buffer;
|
||||
static unsigned long prof_len, prof_shift;
|
||||
|
||||
int prof_on __read_mostly;
|
||||
EXPORT_SYMBOL_GPL(prof_on);
|
||||
|
||||
static cpumask_t prof_cpu_mask = CPU_MASK_ALL;
|
||||
#ifdef CONFIG_SMP
|
||||
static DEFINE_PER_CPU(struct profile_hit *[2], cpu_profile_hits);
|
||||
static DEFINE_PER_CPU(int, cpu_profile_flip);
|
||||
static DEFINE_MUTEX(profile_flip_mutex);
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
static int __init profile_setup(char * str)
|
||||
{
|
||||
static char __initdata schedstr[] = "schedule";
|
||||
static char __initdata sleepstr[] = "sleep";
|
||||
static char __initdata kvmstr[] = "kvm";
|
||||
int par;
|
||||
|
||||
if (!strncmp(str, sleepstr, strlen(sleepstr))) {
|
||||
prof_on = SLEEP_PROFILING;
|
||||
if (str[strlen(sleepstr)] == ',')
|
||||
str += strlen(sleepstr) + 1;
|
||||
if (get_option(&str, &par))
|
||||
prof_shift = par;
|
||||
printk(KERN_INFO
|
||||
"kernel sleep profiling enabled (shift: %ld)\n",
|
||||
prof_shift);
|
||||
} else if (!strncmp(str, schedstr, strlen(schedstr))) {
|
||||
prof_on = SCHED_PROFILING;
|
||||
if (str[strlen(schedstr)] == ',')
|
||||
str += strlen(schedstr) + 1;
|
||||
if (get_option(&str, &par))
|
||||
prof_shift = par;
|
||||
printk(KERN_INFO
|
||||
"kernel schedule profiling enabled (shift: %ld)\n",
|
||||
prof_shift);
|
||||
} else if (!strncmp(str, kvmstr, strlen(kvmstr))) {
|
||||
prof_on = KVM_PROFILING;
|
||||
if (str[strlen(kvmstr)] == ',')
|
||||
str += strlen(kvmstr) + 1;
|
||||
if (get_option(&str, &par))
|
||||
prof_shift = par;
|
||||
printk(KERN_INFO
|
||||
"kernel KVM profiling enabled (shift: %ld)\n",
|
||||
prof_shift);
|
||||
} else if (get_option(&str, &par)) {
|
||||
prof_shift = par;
|
||||
prof_on = CPU_PROFILING;
|
||||
printk(KERN_INFO "kernel profiling enabled (shift: %ld)\n",
|
||||
prof_shift);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
__setup("profile=", profile_setup);
|
||||
|
||||
|
||||
void __init profile_init(void)
|
||||
{
|
||||
if (!prof_on)
|
||||
return;
|
||||
|
||||
/* only text is profiled */
|
||||
prof_len = (_etext - _stext) >> prof_shift;
|
||||
prof_buffer = alloc_bootmem(prof_len*sizeof(atomic_t));
|
||||
}
|
||||
|
||||
/* Profile event notifications */
|
||||
|
||||
#ifdef CONFIG_PROFILING
|
||||
|
||||
static BLOCKING_NOTIFIER_HEAD(task_exit_notifier);
|
||||
static ATOMIC_NOTIFIER_HEAD(task_free_notifier);
|
||||
static BLOCKING_NOTIFIER_HEAD(munmap_notifier);
|
||||
|
||||
void profile_task_exit(struct task_struct * task)
|
||||
{
|
||||
blocking_notifier_call_chain(&task_exit_notifier, 0, task);
|
||||
}
|
||||
|
||||
int profile_handoff_task(struct task_struct * task)
|
||||
{
|
||||
int ret;
|
||||
ret = atomic_notifier_call_chain(&task_free_notifier, 0, task);
|
||||
return (ret == NOTIFY_OK) ? 1 : 0;
|
||||
}
|
||||
|
||||
void profile_munmap(unsigned long addr)
|
||||
{
|
||||
blocking_notifier_call_chain(&munmap_notifier, 0, (void *)addr);
|
||||
}
|
||||
|
||||
int task_handoff_register(struct notifier_block * n)
|
||||
{
|
||||
return atomic_notifier_chain_register(&task_free_notifier, n);
|
||||
}
|
||||
|
||||
int task_handoff_unregister(struct notifier_block * n)
|
||||
{
|
||||
return atomic_notifier_chain_unregister(&task_free_notifier, n);
|
||||
}
|
||||
|
||||
int profile_event_register(enum profile_type type, struct notifier_block * n)
|
||||
{
|
||||
int err = -EINVAL;
|
||||
|
||||
switch (type) {
|
||||
case PROFILE_TASK_EXIT:
|
||||
err = blocking_notifier_chain_register(
|
||||
&task_exit_notifier, n);
|
||||
break;
|
||||
case PROFILE_MUNMAP:
|
||||
err = blocking_notifier_chain_register(
|
||||
&munmap_notifier, n);
|
||||
break;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int profile_event_unregister(enum profile_type type, struct notifier_block * n)
|
||||
{
|
||||
int err = -EINVAL;
|
||||
|
||||
switch (type) {
|
||||
case PROFILE_TASK_EXIT:
|
||||
err = blocking_notifier_chain_unregister(
|
||||
&task_exit_notifier, n);
|
||||
break;
|
||||
case PROFILE_MUNMAP:
|
||||
err = blocking_notifier_chain_unregister(
|
||||
&munmap_notifier, n);
|
||||
break;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int register_timer_hook(int (*hook)(struct pt_regs *))
|
||||
{
|
||||
if (timer_hook)
|
||||
return -EBUSY;
|
||||
timer_hook = hook;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void unregister_timer_hook(int (*hook)(struct pt_regs *))
|
||||
{
|
||||
WARN_ON(hook != timer_hook);
|
||||
timer_hook = NULL;
|
||||
/* make sure all CPUs see the NULL hook */
|
||||
synchronize_sched(); /* Allow ongoing interrupts to complete. */
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(register_timer_hook);
|
||||
EXPORT_SYMBOL_GPL(unregister_timer_hook);
|
||||
EXPORT_SYMBOL_GPL(task_handoff_register);
|
||||
EXPORT_SYMBOL_GPL(task_handoff_unregister);
|
||||
|
||||
#endif /* CONFIG_PROFILING */
|
||||
|
||||
EXPORT_SYMBOL_GPL(profile_event_register);
|
||||
EXPORT_SYMBOL_GPL(profile_event_unregister);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
* Each cpu has a pair of open-addressed hashtables for pending
|
||||
* profile hits. read_profile() IPI's all cpus to request them
|
||||
* to flip buffers and flushes their contents to prof_buffer itself.
|
||||
* Flip requests are serialized by the profile_flip_mutex. The sole
|
||||
* use of having a second hashtable is for avoiding cacheline
|
||||
* contention that would otherwise happen during flushes of pending
|
||||
* profile hits required for the accuracy of reported profile hits
|
||||
* and so resurrect the interrupt livelock issue.
|
||||
*
|
||||
* The open-addressed hashtables are indexed by profile buffer slot
|
||||
* and hold the number of pending hits to that profile buffer slot on
|
||||
* a cpu in an entry. When the hashtable overflows, all pending hits
|
||||
* are accounted to their corresponding profile buffer slots with
|
||||
* atomic_add() and the hashtable emptied. As numerous pending hits
|
||||
* may be accounted to a profile buffer slot in a hashtable entry,
|
||||
* this amortizes a number of atomic profile buffer increments likely
|
||||
* to be far larger than the number of entries in the hashtable,
|
||||
* particularly given that the number of distinct profile buffer
|
||||
* positions to which hits are accounted during short intervals (e.g.
|
||||
* several seconds) is usually very small. Exclusion from buffer
|
||||
* flipping is provided by interrupt disablement (note that for
|
||||
* SCHED_PROFILING or SLEEP_PROFILING profile_hit() may be called from
|
||||
* process context).
|
||||
* The hash function is meant to be lightweight as opposed to strong,
|
||||
* and was vaguely inspired by ppc64 firmware-supported inverted
|
||||
* pagetable hash functions, but uses a full hashtable full of finite
|
||||
* collision chains, not just pairs of them.
|
||||
*
|
||||
* -- wli
|
||||
*/
|
||||
static void __profile_flip_buffers(void *unused)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
per_cpu(cpu_profile_flip, cpu) = !per_cpu(cpu_profile_flip, cpu);
|
||||
}
|
||||
|
||||
static void profile_flip_buffers(void)
|
||||
{
|
||||
int i, j, cpu;
|
||||
|
||||
mutex_lock(&profile_flip_mutex);
|
||||
j = per_cpu(cpu_profile_flip, get_cpu());
|
||||
put_cpu();
|
||||
on_each_cpu(__profile_flip_buffers, NULL, 0, 1);
|
||||
for_each_online_cpu(cpu) {
|
||||
struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[j];
|
||||
for (i = 0; i < NR_PROFILE_HIT; ++i) {
|
||||
if (!hits[i].hits) {
|
||||
if (hits[i].pc)
|
||||
hits[i].pc = 0;
|
||||
continue;
|
||||
}
|
||||
atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]);
|
||||
hits[i].hits = hits[i].pc = 0;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&profile_flip_mutex);
|
||||
}
|
||||
|
||||
static void profile_discard_flip_buffers(void)
|
||||
{
|
||||
int i, cpu;
|
||||
|
||||
mutex_lock(&profile_flip_mutex);
|
||||
i = per_cpu(cpu_profile_flip, get_cpu());
|
||||
put_cpu();
|
||||
on_each_cpu(__profile_flip_buffers, NULL, 0, 1);
|
||||
for_each_online_cpu(cpu) {
|
||||
struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[i];
|
||||
memset(hits, 0, NR_PROFILE_HIT*sizeof(struct profile_hit));
|
||||
}
|
||||
mutex_unlock(&profile_flip_mutex);
|
||||
}
|
||||
|
||||
void profile_hits(int type, void *__pc, unsigned int nr_hits)
|
||||
{
|
||||
unsigned long primary, secondary, flags, pc = (unsigned long)__pc;
|
||||
int i, j, cpu;
|
||||
struct profile_hit *hits;
|
||||
|
||||
if (prof_on != type || !prof_buffer)
|
||||
return;
|
||||
pc = min((pc - (unsigned long)_stext) >> prof_shift, prof_len - 1);
|
||||
i = primary = (pc & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT;
|
||||
secondary = (~(pc << 1) & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT;
|
||||
cpu = get_cpu();
|
||||
hits = per_cpu(cpu_profile_hits, cpu)[per_cpu(cpu_profile_flip, cpu)];
|
||||
if (!hits) {
|
||||
put_cpu();
|
||||
return;
|
||||
}
|
||||
/*
|
||||
* We buffer the global profiler buffer into a per-CPU
|
||||
* queue and thus reduce the number of global (and possibly
|
||||
* NUMA-alien) accesses. The write-queue is self-coalescing:
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
do {
|
||||
for (j = 0; j < PROFILE_GRPSZ; ++j) {
|
||||
if (hits[i + j].pc == pc) {
|
||||
hits[i + j].hits += nr_hits;
|
||||
goto out;
|
||||
} else if (!hits[i + j].hits) {
|
||||
hits[i + j].pc = pc;
|
||||
hits[i + j].hits = nr_hits;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
i = (i + secondary) & (NR_PROFILE_HIT - 1);
|
||||
} while (i != primary);
|
||||
|
||||
/*
|
||||
* Add the current hit(s) and flush the write-queue out
|
||||
* to the global buffer:
|
||||
*/
|
||||
atomic_add(nr_hits, &prof_buffer[pc]);
|
||||
for (i = 0; i < NR_PROFILE_HIT; ++i) {
|
||||
atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]);
|
||||
hits[i].pc = hits[i].hits = 0;
|
||||
}
|
||||
out:
|
||||
local_irq_restore(flags);
|
||||
put_cpu();
|
||||
}
|
||||
|
||||
static int __devinit profile_cpu_callback(struct notifier_block *info,
|
||||
unsigned long action, void *__cpu)
|
||||
{
|
||||
int node, cpu = (unsigned long)__cpu;
|
||||
struct page *page;
|
||||
|
||||
switch (action) {
|
||||
case CPU_UP_PREPARE:
|
||||
node = cpu_to_node(cpu);
|
||||
per_cpu(cpu_profile_flip, cpu) = 0;
|
||||
if (!per_cpu(cpu_profile_hits, cpu)[1]) {
|
||||
page = alloc_pages_node(node,
|
||||
GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
|
||||
0);
|
||||
if (!page)
|
||||
return NOTIFY_BAD;
|
||||
per_cpu(cpu_profile_hits, cpu)[1] = page_address(page);
|
||||
}
|
||||
if (!per_cpu(cpu_profile_hits, cpu)[0]) {
|
||||
page = alloc_pages_node(node,
|
||||
GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
|
||||
0);
|
||||
if (!page)
|
||||
goto out_free;
|
||||
per_cpu(cpu_profile_hits, cpu)[0] = page_address(page);
|
||||
}
|
||||
break;
|
||||
out_free:
|
||||
page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[1]);
|
||||
per_cpu(cpu_profile_hits, cpu)[1] = NULL;
|
||||
__free_page(page);
|
||||
return NOTIFY_BAD;
|
||||
case CPU_ONLINE:
|
||||
cpu_set(cpu, prof_cpu_mask);
|
||||
break;
|
||||
case CPU_UP_CANCELED:
|
||||
case CPU_DEAD:
|
||||
cpu_clear(cpu, prof_cpu_mask);
|
||||
if (per_cpu(cpu_profile_hits, cpu)[0]) {
|
||||
page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[0]);
|
||||
per_cpu(cpu_profile_hits, cpu)[0] = NULL;
|
||||
__free_page(page);
|
||||
}
|
||||
if (per_cpu(cpu_profile_hits, cpu)[1]) {
|
||||
page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[1]);
|
||||
per_cpu(cpu_profile_hits, cpu)[1] = NULL;
|
||||
__free_page(page);
|
||||
}
|
||||
break;
|
||||
}
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
#else /* !CONFIG_SMP */
|
||||
#define profile_flip_buffers() do { } while (0)
|
||||
#define profile_discard_flip_buffers() do { } while (0)
|
||||
#define profile_cpu_callback NULL
|
||||
|
||||
void profile_hits(int type, void *__pc, unsigned int nr_hits)
|
||||
{
|
||||
unsigned long pc;
|
||||
|
||||
if (prof_on != type || !prof_buffer)
|
||||
return;
|
||||
pc = ((unsigned long)__pc - (unsigned long)_stext) >> prof_shift;
|
||||
atomic_add(nr_hits, &prof_buffer[min(pc, prof_len - 1)]);
|
||||
}
|
||||
#endif /* !CONFIG_SMP */
|
||||
|
||||
EXPORT_SYMBOL_GPL(profile_hits);
|
||||
|
||||
void profile_tick(int type)
|
||||
{
|
||||
struct pt_regs *regs = get_irq_regs();
|
||||
|
||||
if (type == CPU_PROFILING && timer_hook)
|
||||
timer_hook(regs);
|
||||
if (!user_mode(regs) && cpu_isset(smp_processor_id(), prof_cpu_mask))
|
||||
profile_hit(type, (void *)profile_pc(regs));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
#include <linux/proc_fs.h>
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/ptrace.h>
|
||||
|
||||
static int prof_cpu_mask_read_proc (char *page, char **start, off_t off,
|
||||
int count, int *eof, void *data)
|
||||
{
|
||||
int len = cpumask_scnprintf(page, count, *(cpumask_t *)data);
|
||||
if (count - len < 2)
|
||||
return -EINVAL;
|
||||
len += sprintf(page + len, "\n");
|
||||
return len;
|
||||
}
|
||||
|
||||
static int prof_cpu_mask_write_proc (struct file *file, const char __user *buffer,
|
||||
unsigned long count, void *data)
|
||||
{
|
||||
cpumask_t *mask = (cpumask_t *)data;
|
||||
unsigned long full_count = count, err;
|
||||
cpumask_t new_value;
|
||||
|
||||
err = cpumask_parse_user(buffer, count, new_value);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
*mask = new_value;
|
||||
return full_count;
|
||||
}
|
||||
|
||||
void create_prof_cpu_mask(struct proc_dir_entry *root_irq_dir)
|
||||
{
|
||||
struct proc_dir_entry *entry;
|
||||
|
||||
/* create /proc/irq/prof_cpu_mask */
|
||||
if (!(entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir)))
|
||||
return;
|
||||
entry->data = (void *)&prof_cpu_mask;
|
||||
entry->read_proc = prof_cpu_mask_read_proc;
|
||||
entry->write_proc = prof_cpu_mask_write_proc;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function accesses profiling information. The returned data is
|
||||
* binary: the sampling step and the actual contents of the profile
|
||||
* buffer. Use of the program readprofile is recommended in order to
|
||||
* get meaningful info out of these data.
|
||||
*/
|
||||
static ssize_t
|
||||
read_profile(struct file *file, char __user *buf, size_t count, loff_t *ppos)
|
||||
{
|
||||
unsigned long p = *ppos;
|
||||
ssize_t read;
|
||||
char * pnt;
|
||||
unsigned int sample_step = 1 << prof_shift;
|
||||
|
||||
profile_flip_buffers();
|
||||
if (p >= (prof_len+1)*sizeof(unsigned int))
|
||||
return 0;
|
||||
if (count > (prof_len+1)*sizeof(unsigned int) - p)
|
||||
count = (prof_len+1)*sizeof(unsigned int) - p;
|
||||
read = 0;
|
||||
|
||||
while (p < sizeof(unsigned int) && count > 0) {
|
||||
if (put_user(*((char *)(&sample_step)+p),buf))
|
||||
return -EFAULT;
|
||||
buf++; p++; count--; read++;
|
||||
}
|
||||
pnt = (char *)prof_buffer + p - sizeof(atomic_t);
|
||||
if (copy_to_user(buf,(void *)pnt,count))
|
||||
return -EFAULT;
|
||||
read += count;
|
||||
*ppos += read;
|
||||
return read;
|
||||
}
|
||||
|
||||
/*
|
||||
* Writing to /proc/profile resets the counters
|
||||
*
|
||||
* Writing a 'profiling multiplier' value into it also re-sets the profiling
|
||||
* interrupt frequency, on architectures that support this.
|
||||
*/
|
||||
static ssize_t write_profile(struct file *file, const char __user *buf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
extern int setup_profiling_timer (unsigned int multiplier);
|
||||
|
||||
if (count == sizeof(int)) {
|
||||
unsigned int multiplier;
|
||||
|
||||
if (copy_from_user(&multiplier, buf, sizeof(int)))
|
||||
return -EFAULT;
|
||||
|
||||
if (setup_profiling_timer(multiplier))
|
||||
return -EINVAL;
|
||||
}
|
||||
#endif
|
||||
profile_discard_flip_buffers();
|
||||
memset(prof_buffer, 0, prof_len * sizeof(atomic_t));
|
||||
return count;
|
||||
}
|
||||
|
||||
static const struct file_operations proc_profile_operations = {
|
||||
.read = read_profile,
|
||||
.write = write_profile,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static void __init profile_nop(void *unused)
|
||||
{
|
||||
}
|
||||
|
||||
static int __init create_hash_tables(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
int node = cpu_to_node(cpu);
|
||||
struct page *page;
|
||||
|
||||
page = alloc_pages_node(node,
|
||||
GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
|
||||
0);
|
||||
if (!page)
|
||||
goto out_cleanup;
|
||||
per_cpu(cpu_profile_hits, cpu)[1]
|
||||
= (struct profile_hit *)page_address(page);
|
||||
page = alloc_pages_node(node,
|
||||
GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
|
||||
0);
|
||||
if (!page)
|
||||
goto out_cleanup;
|
||||
per_cpu(cpu_profile_hits, cpu)[0]
|
||||
= (struct profile_hit *)page_address(page);
|
||||
}
|
||||
return 0;
|
||||
out_cleanup:
|
||||
prof_on = 0;
|
||||
smp_mb();
|
||||
on_each_cpu(profile_nop, NULL, 0, 1);
|
||||
for_each_online_cpu(cpu) {
|
||||
struct page *page;
|
||||
|
||||
if (per_cpu(cpu_profile_hits, cpu)[0]) {
|
||||
page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[0]);
|
||||
per_cpu(cpu_profile_hits, cpu)[0] = NULL;
|
||||
__free_page(page);
|
||||
}
|
||||
if (per_cpu(cpu_profile_hits, cpu)[1]) {
|
||||
page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[1]);
|
||||
per_cpu(cpu_profile_hits, cpu)[1] = NULL;
|
||||
__free_page(page);
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
#else
|
||||
#define create_hash_tables() ({ 0; })
|
||||
#endif
|
||||
|
||||
static int __init create_proc_profile(void)
|
||||
{
|
||||
struct proc_dir_entry *entry;
|
||||
|
||||
if (!prof_on)
|
||||
return 0;
|
||||
if (create_hash_tables())
|
||||
return -1;
|
||||
if (!(entry = create_proc_entry("profile", S_IWUSR | S_IRUGO, NULL)))
|
||||
return 0;
|
||||
entry->proc_fops = &proc_profile_operations;
|
||||
entry->size = (1+prof_len) * sizeof(atomic_t);
|
||||
hotcpu_notifier(profile_cpu_callback, 0);
|
||||
return 0;
|
||||
}
|
||||
module_init(create_proc_profile);
|
||||
#endif /* CONFIG_PROC_FS */
|
||||
490
kernel/ptrace.c
Normal file
490
kernel/ptrace.c
Normal file
@@ -0,0 +1,490 @@
|
||||
/*
|
||||
* linux/kernel/ptrace.c
|
||||
*
|
||||
* (C) Copyright 1999 Linus Torvalds
|
||||
*
|
||||
* Common interfaces for "ptrace()" which we do not want
|
||||
* to continually duplicate across every architecture.
|
||||
*/
|
||||
|
||||
#include <linux/capability.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/smp_lock.h>
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/signal.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
/*
|
||||
* ptrace a task: make the debugger its new parent and
|
||||
* move it to the ptrace list.
|
||||
*
|
||||
* Must be called with the tasklist lock write-held.
|
||||
*/
|
||||
void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
|
||||
{
|
||||
BUG_ON(!list_empty(&child->ptrace_list));
|
||||
if (child->parent == new_parent)
|
||||
return;
|
||||
list_add(&child->ptrace_list, &child->parent->ptrace_children);
|
||||
remove_parent(child);
|
||||
child->parent = new_parent;
|
||||
add_parent(child);
|
||||
}
|
||||
|
||||
/*
|
||||
* Turn a tracing stop into a normal stop now, since with no tracer there
|
||||
* would be no way to wake it up with SIGCONT or SIGKILL. If there was a
|
||||
* signal sent that would resume the child, but didn't because it was in
|
||||
* TASK_TRACED, resume it now.
|
||||
* Requires that irqs be disabled.
|
||||
*/
|
||||
void ptrace_untrace(struct task_struct *child)
|
||||
{
|
||||
spin_lock(&child->sighand->siglock);
|
||||
if (child->state == TASK_TRACED) {
|
||||
if (child->signal->flags & SIGNAL_STOP_STOPPED) {
|
||||
child->state = TASK_STOPPED;
|
||||
} else {
|
||||
signal_wake_up(child, 1);
|
||||
}
|
||||
}
|
||||
spin_unlock(&child->sighand->siglock);
|
||||
}
|
||||
|
||||
/*
|
||||
* unptrace a task: move it back to its original parent and
|
||||
* remove it from the ptrace list.
|
||||
*
|
||||
* Must be called with the tasklist lock write-held.
|
||||
*/
|
||||
void __ptrace_unlink(struct task_struct *child)
|
||||
{
|
||||
BUG_ON(!child->ptrace);
|
||||
|
||||
child->ptrace = 0;
|
||||
if (!list_empty(&child->ptrace_list)) {
|
||||
list_del_init(&child->ptrace_list);
|
||||
remove_parent(child);
|
||||
child->parent = child->real_parent;
|
||||
add_parent(child);
|
||||
}
|
||||
|
||||
if (child->state == TASK_TRACED)
|
||||
ptrace_untrace(child);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check that we have indeed attached to the thing..
|
||||
*/
|
||||
int ptrace_check_attach(struct task_struct *child, int kill)
|
||||
{
|
||||
int ret = -ESRCH;
|
||||
|
||||
/*
|
||||
* We take the read lock around doing both checks to close a
|
||||
* possible race where someone else was tracing our child and
|
||||
* detached between these two checks. After this locked check,
|
||||
* we are sure that this is our traced child and that can only
|
||||
* be changed by us so it's not changing right after this.
|
||||
*/
|
||||
read_lock(&tasklist_lock);
|
||||
if ((child->ptrace & PT_PTRACED) && child->parent == current &&
|
||||
(!(child->ptrace & PT_ATTACHED) || child->real_parent != current)
|
||||
&& child->signal != NULL) {
|
||||
ret = 0;
|
||||
spin_lock_irq(&child->sighand->siglock);
|
||||
if (child->state == TASK_STOPPED) {
|
||||
child->state = TASK_TRACED;
|
||||
} else if (child->state != TASK_TRACED && !kill) {
|
||||
ret = -ESRCH;
|
||||
}
|
||||
spin_unlock_irq(&child->sighand->siglock);
|
||||
}
|
||||
read_unlock(&tasklist_lock);
|
||||
|
||||
if (!ret && !kill) {
|
||||
wait_task_inactive(child);
|
||||
}
|
||||
|
||||
/* All systems go.. */
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int may_attach(struct task_struct *task)
|
||||
{
|
||||
/* May we inspect the given task?
|
||||
* This check is used both for attaching with ptrace
|
||||
* and for allowing access to sensitive information in /proc.
|
||||
*
|
||||
* ptrace_attach denies several cases that /proc allows
|
||||
* because setting up the necessary parent/child relationship
|
||||
* or halting the specified task is impossible.
|
||||
*/
|
||||
int dumpable = 0;
|
||||
/* Don't let security modules deny introspection */
|
||||
if (task == current)
|
||||
return 0;
|
||||
if (((current->uid != task->euid) ||
|
||||
(current->uid != task->suid) ||
|
||||
(current->uid != task->uid) ||
|
||||
(current->gid != task->egid) ||
|
||||
(current->gid != task->sgid) ||
|
||||
(current->gid != task->gid)) && !capable(CAP_SYS_PTRACE))
|
||||
return -EPERM;
|
||||
smp_rmb();
|
||||
if (task->mm)
|
||||
dumpable = task->mm->dumpable;
|
||||
if (!dumpable && !capable(CAP_SYS_PTRACE))
|
||||
return -EPERM;
|
||||
|
||||
return security_ptrace(current, task);
|
||||
}
|
||||
|
||||
int ptrace_may_attach(struct task_struct *task)
|
||||
{
|
||||
int err;
|
||||
task_lock(task);
|
||||
err = may_attach(task);
|
||||
task_unlock(task);
|
||||
return !err;
|
||||
}
|
||||
|
||||
int ptrace_attach(struct task_struct *task)
|
||||
{
|
||||
int retval;
|
||||
|
||||
retval = -EPERM;
|
||||
if (task->pid <= 1)
|
||||
goto out;
|
||||
if (task->tgid == current->tgid)
|
||||
goto out;
|
||||
|
||||
repeat:
|
||||
/*
|
||||
* Nasty, nasty.
|
||||
*
|
||||
* We want to hold both the task-lock and the
|
||||
* tasklist_lock for writing at the same time.
|
||||
* But that's against the rules (tasklist_lock
|
||||
* is taken for reading by interrupts on other
|
||||
* cpu's that may have task_lock).
|
||||
*/
|
||||
task_lock(task);
|
||||
local_irq_disable();
|
||||
if (!write_trylock(&tasklist_lock)) {
|
||||
local_irq_enable();
|
||||
task_unlock(task);
|
||||
do {
|
||||
cpu_relax();
|
||||
} while (!write_can_lock(&tasklist_lock));
|
||||
goto repeat;
|
||||
}
|
||||
|
||||
if (!task->mm)
|
||||
goto bad;
|
||||
/* the same process cannot be attached many times */
|
||||
if (task->ptrace & PT_PTRACED)
|
||||
goto bad;
|
||||
retval = may_attach(task);
|
||||
if (retval)
|
||||
goto bad;
|
||||
|
||||
/* Go */
|
||||
task->ptrace |= PT_PTRACED | ((task->real_parent != current)
|
||||
? PT_ATTACHED : 0);
|
||||
if (capable(CAP_SYS_PTRACE))
|
||||
task->ptrace |= PT_PTRACE_CAP;
|
||||
|
||||
__ptrace_link(task, current);
|
||||
|
||||
force_sig_specific(SIGSTOP, task);
|
||||
|
||||
bad:
|
||||
write_unlock_irq(&tasklist_lock);
|
||||
task_unlock(task);
|
||||
out:
|
||||
return retval;
|
||||
}
|
||||
|
||||
static inline void __ptrace_detach(struct task_struct *child, unsigned int data)
|
||||
{
|
||||
child->exit_code = data;
|
||||
/* .. re-parent .. */
|
||||
__ptrace_unlink(child);
|
||||
/* .. and wake it up. */
|
||||
if (child->exit_state != EXIT_ZOMBIE)
|
||||
wake_up_process(child);
|
||||
}
|
||||
|
||||
int ptrace_detach(struct task_struct *child, unsigned int data)
|
||||
{
|
||||
if (!valid_signal(data))
|
||||
return -EIO;
|
||||
|
||||
/* Architecture-specific hardware disable .. */
|
||||
ptrace_disable(child);
|
||||
|
||||
write_lock_irq(&tasklist_lock);
|
||||
/* protect against de_thread()->release_task() */
|
||||
if (child->ptrace)
|
||||
__ptrace_detach(child, data);
|
||||
write_unlock_irq(&tasklist_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len)
|
||||
{
|
||||
int copied = 0;
|
||||
|
||||
while (len > 0) {
|
||||
char buf[128];
|
||||
int this_len, retval;
|
||||
|
||||
this_len = (len > sizeof(buf)) ? sizeof(buf) : len;
|
||||
retval = access_process_vm(tsk, src, buf, this_len, 0);
|
||||
if (!retval) {
|
||||
if (copied)
|
||||
break;
|
||||
return -EIO;
|
||||
}
|
||||
if (copy_to_user(dst, buf, retval))
|
||||
return -EFAULT;
|
||||
copied += retval;
|
||||
src += retval;
|
||||
dst += retval;
|
||||
len -= retval;
|
||||
}
|
||||
return copied;
|
||||
}
|
||||
|
||||
int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len)
|
||||
{
|
||||
int copied = 0;
|
||||
|
||||
while (len > 0) {
|
||||
char buf[128];
|
||||
int this_len, retval;
|
||||
|
||||
this_len = (len > sizeof(buf)) ? sizeof(buf) : len;
|
||||
if (copy_from_user(buf, src, this_len))
|
||||
return -EFAULT;
|
||||
retval = access_process_vm(tsk, dst, buf, this_len, 1);
|
||||
if (!retval) {
|
||||
if (copied)
|
||||
break;
|
||||
return -EIO;
|
||||
}
|
||||
copied += retval;
|
||||
src += retval;
|
||||
dst += retval;
|
||||
len -= retval;
|
||||
}
|
||||
return copied;
|
||||
}
|
||||
|
||||
static int ptrace_setoptions(struct task_struct *child, long data)
|
||||
{
|
||||
child->ptrace &= ~PT_TRACE_MASK;
|
||||
|
||||
if (data & PTRACE_O_TRACESYSGOOD)
|
||||
child->ptrace |= PT_TRACESYSGOOD;
|
||||
|
||||
if (data & PTRACE_O_TRACEFORK)
|
||||
child->ptrace |= PT_TRACE_FORK;
|
||||
|
||||
if (data & PTRACE_O_TRACEVFORK)
|
||||
child->ptrace |= PT_TRACE_VFORK;
|
||||
|
||||
if (data & PTRACE_O_TRACECLONE)
|
||||
child->ptrace |= PT_TRACE_CLONE;
|
||||
|
||||
if (data & PTRACE_O_TRACEEXEC)
|
||||
child->ptrace |= PT_TRACE_EXEC;
|
||||
|
||||
if (data & PTRACE_O_TRACEVFORKDONE)
|
||||
child->ptrace |= PT_TRACE_VFORK_DONE;
|
||||
|
||||
if (data & PTRACE_O_TRACEEXIT)
|
||||
child->ptrace |= PT_TRACE_EXIT;
|
||||
|
||||
return (data & ~PTRACE_O_MASK) ? -EINVAL : 0;
|
||||
}
|
||||
|
||||
static int ptrace_getsiginfo(struct task_struct *child, siginfo_t __user * data)
|
||||
{
|
||||
siginfo_t lastinfo;
|
||||
int error = -ESRCH;
|
||||
|
||||
read_lock(&tasklist_lock);
|
||||
if (likely(child->sighand != NULL)) {
|
||||
error = -EINVAL;
|
||||
spin_lock_irq(&child->sighand->siglock);
|
||||
if (likely(child->last_siginfo != NULL)) {
|
||||
lastinfo = *child->last_siginfo;
|
||||
error = 0;
|
||||
}
|
||||
spin_unlock_irq(&child->sighand->siglock);
|
||||
}
|
||||
read_unlock(&tasklist_lock);
|
||||
if (!error)
|
||||
return copy_siginfo_to_user(data, &lastinfo);
|
||||
return error;
|
||||
}
|
||||
|
||||
static int ptrace_setsiginfo(struct task_struct *child, siginfo_t __user * data)
|
||||
{
|
||||
siginfo_t newinfo;
|
||||
int error = -ESRCH;
|
||||
|
||||
if (copy_from_user(&newinfo, data, sizeof (siginfo_t)))
|
||||
return -EFAULT;
|
||||
|
||||
read_lock(&tasklist_lock);
|
||||
if (likely(child->sighand != NULL)) {
|
||||
error = -EINVAL;
|
||||
spin_lock_irq(&child->sighand->siglock);
|
||||
if (likely(child->last_siginfo != NULL)) {
|
||||
*child->last_siginfo = newinfo;
|
||||
error = 0;
|
||||
}
|
||||
spin_unlock_irq(&child->sighand->siglock);
|
||||
}
|
||||
read_unlock(&tasklist_lock);
|
||||
return error;
|
||||
}
|
||||
|
||||
int ptrace_request(struct task_struct *child, long request,
|
||||
long addr, long data)
|
||||
{
|
||||
int ret = -EIO;
|
||||
|
||||
switch (request) {
|
||||
#ifdef PTRACE_OLDSETOPTIONS
|
||||
case PTRACE_OLDSETOPTIONS:
|
||||
#endif
|
||||
case PTRACE_SETOPTIONS:
|
||||
ret = ptrace_setoptions(child, data);
|
||||
break;
|
||||
case PTRACE_GETEVENTMSG:
|
||||
ret = put_user(child->ptrace_message, (unsigned long __user *) data);
|
||||
break;
|
||||
case PTRACE_GETSIGINFO:
|
||||
ret = ptrace_getsiginfo(child, (siginfo_t __user *) data);
|
||||
break;
|
||||
case PTRACE_SETSIGINFO:
|
||||
ret = ptrace_setsiginfo(child, (siginfo_t __user *) data);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* ptrace_traceme -- helper for PTRACE_TRACEME
|
||||
*
|
||||
* Performs checks and sets PT_PTRACED.
|
||||
* Should be used by all ptrace implementations for PTRACE_TRACEME.
|
||||
*/
|
||||
int ptrace_traceme(void)
|
||||
{
|
||||
int ret = -EPERM;
|
||||
|
||||
/*
|
||||
* Are we already being traced?
|
||||
*/
|
||||
task_lock(current);
|
||||
if (!(current->ptrace & PT_PTRACED)) {
|
||||
ret = security_ptrace(current->parent, current);
|
||||
/*
|
||||
* Set the ptrace bit in the process ptrace flags.
|
||||
*/
|
||||
if (!ret)
|
||||
current->ptrace |= PT_PTRACED;
|
||||
}
|
||||
task_unlock(current);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* ptrace_get_task_struct -- grab a task struct reference for ptrace
|
||||
* @pid: process id to grab a task_struct reference of
|
||||
*
|
||||
* This function is a helper for ptrace implementations. It checks
|
||||
* permissions and then grabs a task struct for use of the actual
|
||||
* ptrace implementation.
|
||||
*
|
||||
* Returns the task_struct for @pid or an ERR_PTR() on failure.
|
||||
*/
|
||||
struct task_struct *ptrace_get_task_struct(pid_t pid)
|
||||
{
|
||||
struct task_struct *child;
|
||||
|
||||
/*
|
||||
* Tracing init is not allowed.
|
||||
*/
|
||||
if (pid == 1)
|
||||
return ERR_PTR(-EPERM);
|
||||
|
||||
read_lock(&tasklist_lock);
|
||||
child = find_task_by_pid(pid);
|
||||
if (child)
|
||||
get_task_struct(child);
|
||||
|
||||
read_unlock(&tasklist_lock);
|
||||
if (!child)
|
||||
return ERR_PTR(-ESRCH);
|
||||
return child;
|
||||
}
|
||||
|
||||
#ifndef __ARCH_SYS_PTRACE
|
||||
asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
|
||||
{
|
||||
struct task_struct *child;
|
||||
long ret;
|
||||
|
||||
/*
|
||||
* This lock_kernel fixes a subtle race with suid exec
|
||||
*/
|
||||
lock_kernel();
|
||||
if (request == PTRACE_TRACEME) {
|
||||
ret = ptrace_traceme();
|
||||
goto out;
|
||||
}
|
||||
|
||||
child = ptrace_get_task_struct(pid);
|
||||
if (IS_ERR(child)) {
|
||||
ret = PTR_ERR(child);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (request == PTRACE_ATTACH) {
|
||||
ret = ptrace_attach(child);
|
||||
goto out_put_task_struct;
|
||||
}
|
||||
|
||||
ret = ptrace_check_attach(child, request == PTRACE_KILL);
|
||||
if (ret < 0)
|
||||
goto out_put_task_struct;
|
||||
|
||||
ret = arch_ptrace(child, request, addr, data);
|
||||
if (ret < 0)
|
||||
goto out_put_task_struct;
|
||||
|
||||
out_put_task_struct:
|
||||
put_task_struct(child);
|
||||
out:
|
||||
unlock_kernel();
|
||||
return ret;
|
||||
}
|
||||
#endif /* __ARCH_SYS_PTRACE */
|
||||
635
kernel/rcupdate.c
Normal file
635
kernel/rcupdate.c
Normal file
@@ -0,0 +1,635 @@
|
||||
/*
|
||||
* Read-Copy Update mechanism for mutual exclusion
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
* Copyright (C) IBM Corporation, 2001
|
||||
*
|
||||
* Authors: Dipankar Sarma <dipankar@in.ibm.com>
|
||||
* Manfred Spraul <manfred@colorfullife.com>
|
||||
*
|
||||
* Based on the original work by Paul McKenney <paulmck@us.ibm.com>
|
||||
* and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
|
||||
* Papers:
|
||||
* http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
|
||||
* http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
|
||||
*
|
||||
* For detailed explanation of Read-Copy Update mechanism see -
|
||||
* http://lse.sourceforge.net/locking/rcupdate.html
|
||||
*
|
||||
*/
|
||||
#include <linux/types.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/sched.h>
|
||||
#include <asm/atomic.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/completion.h>
|
||||
#include <linux/moduleparam.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/mutex.h>
|
||||
|
||||
/* Definition for rcupdate control block. */
|
||||
static struct rcu_ctrlblk rcu_ctrlblk = {
|
||||
.cur = -300,
|
||||
.completed = -300,
|
||||
.lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock),
|
||||
.cpumask = CPU_MASK_NONE,
|
||||
};
|
||||
static struct rcu_ctrlblk rcu_bh_ctrlblk = {
|
||||
.cur = -300,
|
||||
.completed = -300,
|
||||
.lock = __SPIN_LOCK_UNLOCKED(&rcu_bh_ctrlblk.lock),
|
||||
.cpumask = CPU_MASK_NONE,
|
||||
};
|
||||
|
||||
DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L };
|
||||
DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
|
||||
|
||||
/* Fake initialization required by compiler */
|
||||
static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL};
|
||||
static int blimit = 10;
|
||||
static int qhimark = 10000;
|
||||
static int qlowmark = 100;
|
||||
|
||||
static atomic_t rcu_barrier_cpu_count;
|
||||
static DEFINE_MUTEX(rcu_barrier_mutex);
|
||||
static struct completion rcu_barrier_completion;
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static void force_quiescent_state(struct rcu_data *rdp,
|
||||
struct rcu_ctrlblk *rcp)
|
||||
{
|
||||
int cpu;
|
||||
cpumask_t cpumask;
|
||||
set_need_resched();
|
||||
if (unlikely(!rcp->signaled)) {
|
||||
rcp->signaled = 1;
|
||||
/*
|
||||
* Don't send IPI to itself. With irqs disabled,
|
||||
* rdp->cpu is the current cpu.
|
||||
*/
|
||||
cpumask = rcp->cpumask;
|
||||
cpu_clear(rdp->cpu, cpumask);
|
||||
for_each_cpu_mask(cpu, cpumask)
|
||||
smp_send_reschedule(cpu);
|
||||
}
|
||||
}
|
||||
#else
|
||||
static inline void force_quiescent_state(struct rcu_data *rdp,
|
||||
struct rcu_ctrlblk *rcp)
|
||||
{
|
||||
set_need_resched();
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* call_rcu - Queue an RCU callback for invocation after a grace period.
|
||||
* @head: structure to be used for queueing the RCU updates.
|
||||
* @func: actual update function to be invoked after the grace period
|
||||
*
|
||||
* The update function will be invoked some time after a full grace
|
||||
* period elapses, in other words after all currently executing RCU
|
||||
* read-side critical sections have completed. RCU read-side critical
|
||||
* sections are delimited by rcu_read_lock() and rcu_read_unlock(),
|
||||
* and may be nested.
|
||||
*/
|
||||
void fastcall call_rcu(struct rcu_head *head,
|
||||
void (*func)(struct rcu_head *rcu))
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp;
|
||||
|
||||
head->func = func;
|
||||
head->next = NULL;
|
||||
local_irq_save(flags);
|
||||
rdp = &__get_cpu_var(rcu_data);
|
||||
*rdp->nxttail = head;
|
||||
rdp->nxttail = &head->next;
|
||||
if (unlikely(++rdp->qlen > qhimark)) {
|
||||
rdp->blimit = INT_MAX;
|
||||
force_quiescent_state(rdp, &rcu_ctrlblk);
|
||||
}
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* call_rcu_bh - Queue an RCU for invocation after a quicker grace period.
|
||||
* @head: structure to be used for queueing the RCU updates.
|
||||
* @func: actual update function to be invoked after the grace period
|
||||
*
|
||||
* The update function will be invoked some time after a full grace
|
||||
* period elapses, in other words after all currently executing RCU
|
||||
* read-side critical sections have completed. call_rcu_bh() assumes
|
||||
* that the read-side critical sections end on completion of a softirq
|
||||
* handler. This means that read-side critical sections in process
|
||||
* context must not be interrupted by softirqs. This interface is to be
|
||||
* used when most of the read-side critical sections are in softirq context.
|
||||
* RCU read-side critical sections are delimited by rcu_read_lock() and
|
||||
* rcu_read_unlock(), * if in interrupt context or rcu_read_lock_bh()
|
||||
* and rcu_read_unlock_bh(), if in process context. These may be nested.
|
||||
*/
|
||||
void fastcall call_rcu_bh(struct rcu_head *head,
|
||||
void (*func)(struct rcu_head *rcu))
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp;
|
||||
|
||||
head->func = func;
|
||||
head->next = NULL;
|
||||
local_irq_save(flags);
|
||||
rdp = &__get_cpu_var(rcu_bh_data);
|
||||
*rdp->nxttail = head;
|
||||
rdp->nxttail = &head->next;
|
||||
|
||||
if (unlikely(++rdp->qlen > qhimark)) {
|
||||
rdp->blimit = INT_MAX;
|
||||
force_quiescent_state(rdp, &rcu_bh_ctrlblk);
|
||||
}
|
||||
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the number of RCU batches processed thus far. Useful
|
||||
* for debug and statistics.
|
||||
*/
|
||||
long rcu_batches_completed(void)
|
||||
{
|
||||
return rcu_ctrlblk.completed;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the number of RCU batches processed thus far. Useful
|
||||
* for debug and statistics.
|
||||
*/
|
||||
long rcu_batches_completed_bh(void)
|
||||
{
|
||||
return rcu_bh_ctrlblk.completed;
|
||||
}
|
||||
|
||||
static void rcu_barrier_callback(struct rcu_head *notused)
|
||||
{
|
||||
if (atomic_dec_and_test(&rcu_barrier_cpu_count))
|
||||
complete(&rcu_barrier_completion);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called with preemption disabled, and from cross-cpu IRQ context.
|
||||
*/
|
||||
static void rcu_barrier_func(void *notused)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
|
||||
struct rcu_head *head;
|
||||
|
||||
head = &rdp->barrier;
|
||||
atomic_inc(&rcu_barrier_cpu_count);
|
||||
call_rcu(head, rcu_barrier_callback);
|
||||
}
|
||||
|
||||
/**
|
||||
* rcu_barrier - Wait until all the in-flight RCUs are complete.
|
||||
*/
|
||||
void rcu_barrier(void)
|
||||
{
|
||||
BUG_ON(in_interrupt());
|
||||
/* Take cpucontrol mutex to protect against CPU hotplug */
|
||||
mutex_lock(&rcu_barrier_mutex);
|
||||
init_completion(&rcu_barrier_completion);
|
||||
atomic_set(&rcu_barrier_cpu_count, 0);
|
||||
on_each_cpu(rcu_barrier_func, NULL, 0, 1);
|
||||
wait_for_completion(&rcu_barrier_completion);
|
||||
mutex_unlock(&rcu_barrier_mutex);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_barrier);
|
||||
|
||||
/*
|
||||
* Invoke the completed RCU callbacks. They are expected to be in
|
||||
* a per-cpu list.
|
||||
*/
|
||||
static void rcu_do_batch(struct rcu_data *rdp)
|
||||
{
|
||||
struct rcu_head *next, *list;
|
||||
int count = 0;
|
||||
|
||||
list = rdp->donelist;
|
||||
while (list) {
|
||||
next = list->next;
|
||||
prefetch(next);
|
||||
list->func(list);
|
||||
list = next;
|
||||
if (++count >= rdp->blimit)
|
||||
break;
|
||||
}
|
||||
rdp->donelist = list;
|
||||
|
||||
local_irq_disable();
|
||||
rdp->qlen -= count;
|
||||
local_irq_enable();
|
||||
if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark)
|
||||
rdp->blimit = blimit;
|
||||
|
||||
if (!rdp->donelist)
|
||||
rdp->donetail = &rdp->donelist;
|
||||
else
|
||||
tasklet_schedule(&per_cpu(rcu_tasklet, rdp->cpu));
|
||||
}
|
||||
|
||||
/*
|
||||
* Grace period handling:
|
||||
* The grace period handling consists out of two steps:
|
||||
* - A new grace period is started.
|
||||
* This is done by rcu_start_batch. The start is not broadcasted to
|
||||
* all cpus, they must pick this up by comparing rcp->cur with
|
||||
* rdp->quiescbatch. All cpus are recorded in the
|
||||
* rcu_ctrlblk.cpumask bitmap.
|
||||
* - All cpus must go through a quiescent state.
|
||||
* Since the start of the grace period is not broadcasted, at least two
|
||||
* calls to rcu_check_quiescent_state are required:
|
||||
* The first call just notices that a new grace period is running. The
|
||||
* following calls check if there was a quiescent state since the beginning
|
||||
* of the grace period. If so, it updates rcu_ctrlblk.cpumask. If
|
||||
* the bitmap is empty, then the grace period is completed.
|
||||
* rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
|
||||
* period (if necessary).
|
||||
*/
|
||||
/*
|
||||
* Register a new batch of callbacks, and start it up if there is currently no
|
||||
* active batch and the batch to be registered has not already occurred.
|
||||
* Caller must hold rcu_ctrlblk.lock.
|
||||
*/
|
||||
static void rcu_start_batch(struct rcu_ctrlblk *rcp)
|
||||
{
|
||||
if (rcp->next_pending &&
|
||||
rcp->completed == rcp->cur) {
|
||||
rcp->next_pending = 0;
|
||||
/*
|
||||
* next_pending == 0 must be visible in
|
||||
* __rcu_process_callbacks() before it can see new value of cur.
|
||||
*/
|
||||
smp_wmb();
|
||||
rcp->cur++;
|
||||
|
||||
/*
|
||||
* Accessing nohz_cpu_mask before incrementing rcp->cur needs a
|
||||
* Barrier Otherwise it can cause tickless idle CPUs to be
|
||||
* included in rcp->cpumask, which will extend graceperiods
|
||||
* unnecessarily.
|
||||
*/
|
||||
smp_mb();
|
||||
cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask);
|
||||
|
||||
rcp->signaled = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* cpu went through a quiescent state since the beginning of the grace period.
|
||||
* Clear it from the cpu mask and complete the grace period if it was the last
|
||||
* cpu. Start another grace period if someone has further entries pending
|
||||
*/
|
||||
static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
|
||||
{
|
||||
cpu_clear(cpu, rcp->cpumask);
|
||||
if (cpus_empty(rcp->cpumask)) {
|
||||
/* batch completed ! */
|
||||
rcp->completed = rcp->cur;
|
||||
rcu_start_batch(rcp);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if the cpu has gone through a quiescent state (say context
|
||||
* switch). If so and if it already hasn't done so in this RCU
|
||||
* quiescent cycle, then indicate that it has done so.
|
||||
*/
|
||||
static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
|
||||
struct rcu_data *rdp)
|
||||
{
|
||||
if (rdp->quiescbatch != rcp->cur) {
|
||||
/* start new grace period: */
|
||||
rdp->qs_pending = 1;
|
||||
rdp->passed_quiesc = 0;
|
||||
rdp->quiescbatch = rcp->cur;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Grace period already completed for this cpu?
|
||||
* qs_pending is checked instead of the actual bitmap to avoid
|
||||
* cacheline trashing.
|
||||
*/
|
||||
if (!rdp->qs_pending)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Was there a quiescent state since the beginning of the grace
|
||||
* period? If no, then exit and wait for the next call.
|
||||
*/
|
||||
if (!rdp->passed_quiesc)
|
||||
return;
|
||||
rdp->qs_pending = 0;
|
||||
|
||||
spin_lock(&rcp->lock);
|
||||
/*
|
||||
* rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
|
||||
* during cpu startup. Ignore the quiescent state.
|
||||
*/
|
||||
if (likely(rdp->quiescbatch == rcp->cur))
|
||||
cpu_quiet(rdp->cpu, rcp);
|
||||
|
||||
spin_unlock(&rcp->lock);
|
||||
}
|
||||
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
|
||||
/* warning! helper for rcu_offline_cpu. do not use elsewhere without reviewing
|
||||
* locking requirements, the list it's pulling from has to belong to a cpu
|
||||
* which is dead and hence not processing interrupts.
|
||||
*/
|
||||
static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list,
|
||||
struct rcu_head **tail)
|
||||
{
|
||||
local_irq_disable();
|
||||
*this_rdp->nxttail = list;
|
||||
if (list)
|
||||
this_rdp->nxttail = tail;
|
||||
local_irq_enable();
|
||||
}
|
||||
|
||||
static void __rcu_offline_cpu(struct rcu_data *this_rdp,
|
||||
struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
|
||||
{
|
||||
/* if the cpu going offline owns the grace period
|
||||
* we can block indefinitely waiting for it, so flush
|
||||
* it here
|
||||
*/
|
||||
spin_lock_bh(&rcp->lock);
|
||||
if (rcp->cur != rcp->completed)
|
||||
cpu_quiet(rdp->cpu, rcp);
|
||||
spin_unlock_bh(&rcp->lock);
|
||||
rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
|
||||
rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
|
||||
rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail);
|
||||
}
|
||||
|
||||
static void rcu_offline_cpu(int cpu)
|
||||
{
|
||||
struct rcu_data *this_rdp = &get_cpu_var(rcu_data);
|
||||
struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data);
|
||||
|
||||
__rcu_offline_cpu(this_rdp, &rcu_ctrlblk,
|
||||
&per_cpu(rcu_data, cpu));
|
||||
__rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk,
|
||||
&per_cpu(rcu_bh_data, cpu));
|
||||
put_cpu_var(rcu_data);
|
||||
put_cpu_var(rcu_bh_data);
|
||||
tasklet_kill_immediate(&per_cpu(rcu_tasklet, cpu), cpu);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static void rcu_offline_cpu(int cpu)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This does the RCU processing work from tasklet context.
|
||||
*/
|
||||
static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
|
||||
struct rcu_data *rdp)
|
||||
{
|
||||
if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) {
|
||||
*rdp->donetail = rdp->curlist;
|
||||
rdp->donetail = rdp->curtail;
|
||||
rdp->curlist = NULL;
|
||||
rdp->curtail = &rdp->curlist;
|
||||
}
|
||||
|
||||
if (rdp->nxtlist && !rdp->curlist) {
|
||||
local_irq_disable();
|
||||
rdp->curlist = rdp->nxtlist;
|
||||
rdp->curtail = rdp->nxttail;
|
||||
rdp->nxtlist = NULL;
|
||||
rdp->nxttail = &rdp->nxtlist;
|
||||
local_irq_enable();
|
||||
|
||||
/*
|
||||
* start the next batch of callbacks
|
||||
*/
|
||||
|
||||
/* determine batch number */
|
||||
rdp->batch = rcp->cur + 1;
|
||||
/* see the comment and corresponding wmb() in
|
||||
* the rcu_start_batch()
|
||||
*/
|
||||
smp_rmb();
|
||||
|
||||
if (!rcp->next_pending) {
|
||||
/* and start it/schedule start if it's a new batch */
|
||||
spin_lock(&rcp->lock);
|
||||
rcp->next_pending = 1;
|
||||
rcu_start_batch(rcp);
|
||||
spin_unlock(&rcp->lock);
|
||||
}
|
||||
}
|
||||
|
||||
rcu_check_quiescent_state(rcp, rdp);
|
||||
if (rdp->donelist)
|
||||
rcu_do_batch(rdp);
|
||||
}
|
||||
|
||||
static void rcu_process_callbacks(unsigned long unused)
|
||||
{
|
||||
__rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data));
|
||||
__rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
|
||||
}
|
||||
|
||||
static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
|
||||
{
|
||||
/* This cpu has pending rcu entries and the grace period
|
||||
* for them has completed.
|
||||
*/
|
||||
if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch))
|
||||
return 1;
|
||||
|
||||
/* This cpu has no pending entries, but there are new entries */
|
||||
if (!rdp->curlist && rdp->nxtlist)
|
||||
return 1;
|
||||
|
||||
/* This cpu has finished callbacks to invoke */
|
||||
if (rdp->donelist)
|
||||
return 1;
|
||||
|
||||
/* The rcu core waits for a quiescent state from the cpu */
|
||||
if (rdp->quiescbatch != rcp->cur || rdp->qs_pending)
|
||||
return 1;
|
||||
|
||||
/* nothing to do */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to see if there is any immediate RCU-related work to be done
|
||||
* by the current CPU, returning 1 if so. This function is part of the
|
||||
* RCU implementation; it is -not- an exported member of the RCU API.
|
||||
*/
|
||||
int rcu_pending(int cpu)
|
||||
{
|
||||
return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) ||
|
||||
__rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu));
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to see if any future RCU-related work will need to be done
|
||||
* by the current CPU, even if none need be done immediately, returning
|
||||
* 1 if so. This function is part of the RCU implementation; it is -not-
|
||||
* an exported member of the RCU API.
|
||||
*/
|
||||
int rcu_needs_cpu(int cpu)
|
||||
{
|
||||
struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
|
||||
struct rcu_data *rdp_bh = &per_cpu(rcu_bh_data, cpu);
|
||||
|
||||
return (!!rdp->curlist || !!rdp_bh->curlist || rcu_pending(cpu));
|
||||
}
|
||||
|
||||
void rcu_check_callbacks(int cpu, int user)
|
||||
{
|
||||
if (user ||
|
||||
(idle_cpu(cpu) && !in_softirq() &&
|
||||
hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
|
||||
rcu_qsctr_inc(cpu);
|
||||
rcu_bh_qsctr_inc(cpu);
|
||||
} else if (!in_softirq())
|
||||
rcu_bh_qsctr_inc(cpu);
|
||||
tasklet_schedule(&per_cpu(rcu_tasklet, cpu));
|
||||
}
|
||||
|
||||
static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
|
||||
struct rcu_data *rdp)
|
||||
{
|
||||
memset(rdp, 0, sizeof(*rdp));
|
||||
rdp->curtail = &rdp->curlist;
|
||||
rdp->nxttail = &rdp->nxtlist;
|
||||
rdp->donetail = &rdp->donelist;
|
||||
rdp->quiescbatch = rcp->completed;
|
||||
rdp->qs_pending = 0;
|
||||
rdp->cpu = cpu;
|
||||
rdp->blimit = blimit;
|
||||
}
|
||||
|
||||
static void __devinit rcu_online_cpu(int cpu)
|
||||
{
|
||||
struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
|
||||
struct rcu_data *bh_rdp = &per_cpu(rcu_bh_data, cpu);
|
||||
|
||||
rcu_init_percpu_data(cpu, &rcu_ctrlblk, rdp);
|
||||
rcu_init_percpu_data(cpu, &rcu_bh_ctrlblk, bh_rdp);
|
||||
tasklet_init(&per_cpu(rcu_tasklet, cpu), rcu_process_callbacks, 0UL);
|
||||
}
|
||||
|
||||
static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
|
||||
unsigned long action, void *hcpu)
|
||||
{
|
||||
long cpu = (long)hcpu;
|
||||
switch (action) {
|
||||
case CPU_UP_PREPARE:
|
||||
rcu_online_cpu(cpu);
|
||||
break;
|
||||
case CPU_DEAD:
|
||||
rcu_offline_cpu(cpu);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block __cpuinitdata rcu_nb = {
|
||||
.notifier_call = rcu_cpu_notify,
|
||||
};
|
||||
|
||||
/*
|
||||
* Initializes rcu mechanism. Assumed to be called early.
|
||||
* That is before local timer(SMP) or jiffie timer (uniproc) is setup.
|
||||
* Note that rcu_qsctr and friends are implicitly
|
||||
* initialized due to the choice of ``0'' for RCU_CTR_INVALID.
|
||||
*/
|
||||
void __init rcu_init(void)
|
||||
{
|
||||
rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,
|
||||
(void *)(long)smp_processor_id());
|
||||
/* Register notifier for non-boot CPUs */
|
||||
register_cpu_notifier(&rcu_nb);
|
||||
}
|
||||
|
||||
struct rcu_synchronize {
|
||||
struct rcu_head head;
|
||||
struct completion completion;
|
||||
};
|
||||
|
||||
/* Because of FASTCALL declaration of complete, we use this wrapper */
|
||||
static void wakeme_after_rcu(struct rcu_head *head)
|
||||
{
|
||||
struct rcu_synchronize *rcu;
|
||||
|
||||
rcu = container_of(head, struct rcu_synchronize, head);
|
||||
complete(&rcu->completion);
|
||||
}
|
||||
|
||||
/**
|
||||
* synchronize_rcu - wait until a grace period has elapsed.
|
||||
*
|
||||
* Control will return to the caller some time after a full grace
|
||||
* period has elapsed, in other words after all currently executing RCU
|
||||
* read-side critical sections have completed. RCU read-side critical
|
||||
* sections are delimited by rcu_read_lock() and rcu_read_unlock(),
|
||||
* and may be nested.
|
||||
*
|
||||
* If your read-side code is not protected by rcu_read_lock(), do -not-
|
||||
* use synchronize_rcu().
|
||||
*/
|
||||
void synchronize_rcu(void)
|
||||
{
|
||||
struct rcu_synchronize rcu;
|
||||
|
||||
init_completion(&rcu.completion);
|
||||
/* Will wake me after RCU finished */
|
||||
call_rcu(&rcu.head, wakeme_after_rcu);
|
||||
|
||||
/* Wait for it */
|
||||
wait_for_completion(&rcu.completion);
|
||||
}
|
||||
|
||||
module_param(blimit, int, 0);
|
||||
module_param(qhimark, int, 0);
|
||||
module_param(qlowmark, int, 0);
|
||||
EXPORT_SYMBOL_GPL(rcu_batches_completed);
|
||||
EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
|
||||
EXPORT_SYMBOL_GPL(call_rcu);
|
||||
EXPORT_SYMBOL_GPL(call_rcu_bh);
|
||||
EXPORT_SYMBOL_GPL(synchronize_rcu);
|
||||
1004
kernel/rcutorture.c
Normal file
1004
kernel/rcutorture.c
Normal file
File diff suppressed because it is too large
Load Diff
1097
kernel/relay.c
Normal file
1097
kernel/relay.c
Normal file
File diff suppressed because it is too large
Load Diff
712
kernel/resource.c
Normal file
712
kernel/resource.c
Normal file
@@ -0,0 +1,712 @@
|
||||
/*
|
||||
* linux/kernel/resource.c
|
||||
*
|
||||
* Copyright (C) 1999 Linus Torvalds
|
||||
* Copyright (C) 1999 Martin Mares <mj@ucw.cz>
|
||||
*
|
||||
* Arbitrary resource management.
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/ioport.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/device.h>
|
||||
#include <asm/io.h>
|
||||
|
||||
|
||||
struct resource ioport_resource = {
|
||||
.name = "PCI IO",
|
||||
.start = 0,
|
||||
.end = IO_SPACE_LIMIT,
|
||||
.flags = IORESOURCE_IO,
|
||||
};
|
||||
EXPORT_SYMBOL(ioport_resource);
|
||||
|
||||
struct resource iomem_resource = {
|
||||
.name = "PCI mem",
|
||||
.start = 0,
|
||||
.end = -1,
|
||||
.flags = IORESOURCE_MEM,
|
||||
};
|
||||
EXPORT_SYMBOL(iomem_resource);
|
||||
|
||||
static DEFINE_RWLOCK(resource_lock);
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
|
||||
enum { MAX_IORES_LEVEL = 5 };
|
||||
|
||||
static void *r_next(struct seq_file *m, void *v, loff_t *pos)
|
||||
{
|
||||
struct resource *p = v;
|
||||
(*pos)++;
|
||||
if (p->child)
|
||||
return p->child;
|
||||
while (!p->sibling && p->parent)
|
||||
p = p->parent;
|
||||
return p->sibling;
|
||||
}
|
||||
|
||||
static void *r_start(struct seq_file *m, loff_t *pos)
|
||||
__acquires(resource_lock)
|
||||
{
|
||||
struct resource *p = m->private;
|
||||
loff_t l = 0;
|
||||
read_lock(&resource_lock);
|
||||
for (p = p->child; p && l < *pos; p = r_next(m, p, &l))
|
||||
;
|
||||
return p;
|
||||
}
|
||||
|
||||
static void r_stop(struct seq_file *m, void *v)
|
||||
__releases(resource_lock)
|
||||
{
|
||||
read_unlock(&resource_lock);
|
||||
}
|
||||
|
||||
static int r_show(struct seq_file *m, void *v)
|
||||
{
|
||||
struct resource *root = m->private;
|
||||
struct resource *r = v, *p;
|
||||
int width = root->end < 0x10000 ? 4 : 8;
|
||||
int depth;
|
||||
|
||||
for (depth = 0, p = r; depth < MAX_IORES_LEVEL; depth++, p = p->parent)
|
||||
if (p->parent == root)
|
||||
break;
|
||||
seq_printf(m, "%*s%0*llx-%0*llx : %s\n",
|
||||
depth * 2, "",
|
||||
width, (unsigned long long) r->start,
|
||||
width, (unsigned long long) r->end,
|
||||
r->name ? r->name : "<BAD>");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct seq_operations resource_op = {
|
||||
.start = r_start,
|
||||
.next = r_next,
|
||||
.stop = r_stop,
|
||||
.show = r_show,
|
||||
};
|
||||
|
||||
static int ioports_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
int res = seq_open(file, &resource_op);
|
||||
if (!res) {
|
||||
struct seq_file *m = file->private_data;
|
||||
m->private = &ioport_resource;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
static int iomem_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
int res = seq_open(file, &resource_op);
|
||||
if (!res) {
|
||||
struct seq_file *m = file->private_data;
|
||||
m->private = &iomem_resource;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
static const struct file_operations proc_ioports_operations = {
|
||||
.open = ioports_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = seq_release,
|
||||
};
|
||||
|
||||
static const struct file_operations proc_iomem_operations = {
|
||||
.open = iomem_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = seq_release,
|
||||
};
|
||||
|
||||
static int __init ioresources_init(void)
|
||||
{
|
||||
struct proc_dir_entry *entry;
|
||||
|
||||
entry = create_proc_entry("ioports", 0, NULL);
|
||||
if (entry)
|
||||
entry->proc_fops = &proc_ioports_operations;
|
||||
entry = create_proc_entry("iomem", 0, NULL);
|
||||
if (entry)
|
||||
entry->proc_fops = &proc_iomem_operations;
|
||||
return 0;
|
||||
}
|
||||
__initcall(ioresources_init);
|
||||
|
||||
#endif /* CONFIG_PROC_FS */
|
||||
|
||||
/* Return the conflict entry if you can't request it */
|
||||
static struct resource * __request_resource(struct resource *root, struct resource *new)
|
||||
{
|
||||
resource_size_t start = new->start;
|
||||
resource_size_t end = new->end;
|
||||
struct resource *tmp, **p;
|
||||
|
||||
if (end < start)
|
||||
return root;
|
||||
if (start < root->start)
|
||||
return root;
|
||||
if (end > root->end)
|
||||
return root;
|
||||
p = &root->child;
|
||||
for (;;) {
|
||||
tmp = *p;
|
||||
if (!tmp || tmp->start > end) {
|
||||
new->sibling = tmp;
|
||||
*p = new;
|
||||
new->parent = root;
|
||||
return NULL;
|
||||
}
|
||||
p = &tmp->sibling;
|
||||
if (tmp->end < start)
|
||||
continue;
|
||||
return tmp;
|
||||
}
|
||||
}
|
||||
|
||||
static int __release_resource(struct resource *old)
|
||||
{
|
||||
struct resource *tmp, **p;
|
||||
|
||||
p = &old->parent->child;
|
||||
for (;;) {
|
||||
tmp = *p;
|
||||
if (!tmp)
|
||||
break;
|
||||
if (tmp == old) {
|
||||
*p = tmp->sibling;
|
||||
old->parent = NULL;
|
||||
return 0;
|
||||
}
|
||||
p = &tmp->sibling;
|
||||
}
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/**
|
||||
* request_resource - request and reserve an I/O or memory resource
|
||||
* @root: root resource descriptor
|
||||
* @new: resource descriptor desired by caller
|
||||
*
|
||||
* Returns 0 for success, negative error code on error.
|
||||
*/
|
||||
int request_resource(struct resource *root, struct resource *new)
|
||||
{
|
||||
struct resource *conflict;
|
||||
|
||||
write_lock(&resource_lock);
|
||||
conflict = __request_resource(root, new);
|
||||
write_unlock(&resource_lock);
|
||||
return conflict ? -EBUSY : 0;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(request_resource);
|
||||
|
||||
/**
|
||||
* ____request_resource - reserve a resource, with resource conflict returned
|
||||
* @root: root resource descriptor
|
||||
* @new: resource descriptor desired by caller
|
||||
*
|
||||
* Returns:
|
||||
* On success, NULL is returned.
|
||||
* On error, a pointer to the conflicting resource is returned.
|
||||
*/
|
||||
struct resource *____request_resource(struct resource *root, struct resource *new)
|
||||
{
|
||||
struct resource *conflict;
|
||||
|
||||
write_lock(&resource_lock);
|
||||
conflict = __request_resource(root, new);
|
||||
write_unlock(&resource_lock);
|
||||
return conflict;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(____request_resource);
|
||||
|
||||
/**
|
||||
* release_resource - release a previously reserved resource
|
||||
* @old: resource pointer
|
||||
*/
|
||||
int release_resource(struct resource *old)
|
||||
{
|
||||
int retval;
|
||||
|
||||
write_lock(&resource_lock);
|
||||
retval = __release_resource(old);
|
||||
write_unlock(&resource_lock);
|
||||
return retval;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(release_resource);
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
/*
|
||||
* Finds the lowest memory reosurce exists within [res->start.res->end)
|
||||
* the caller must specify res->start, res->end, res->flags.
|
||||
* If found, returns 0, res is overwritten, if not found, returns -1.
|
||||
*/
|
||||
int find_next_system_ram(struct resource *res)
|
||||
{
|
||||
resource_size_t start, end;
|
||||
struct resource *p;
|
||||
|
||||
BUG_ON(!res);
|
||||
|
||||
start = res->start;
|
||||
end = res->end;
|
||||
BUG_ON(start >= end);
|
||||
|
||||
read_lock(&resource_lock);
|
||||
for (p = iomem_resource.child; p ; p = p->sibling) {
|
||||
/* system ram is just marked as IORESOURCE_MEM */
|
||||
if (p->flags != res->flags)
|
||||
continue;
|
||||
if (p->start > end) {
|
||||
p = NULL;
|
||||
break;
|
||||
}
|
||||
if ((p->end >= start) && (p->start < end))
|
||||
break;
|
||||
}
|
||||
read_unlock(&resource_lock);
|
||||
if (!p)
|
||||
return -1;
|
||||
/* copy data */
|
||||
if (res->start < p->start)
|
||||
res->start = p->start;
|
||||
if (res->end > p->end)
|
||||
res->end = p->end;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Find empty slot in the resource tree given range and alignment.
|
||||
*/
|
||||
static int find_resource(struct resource *root, struct resource *new,
|
||||
resource_size_t size, resource_size_t min,
|
||||
resource_size_t max, resource_size_t align,
|
||||
void (*alignf)(void *, struct resource *,
|
||||
resource_size_t, resource_size_t),
|
||||
void *alignf_data)
|
||||
{
|
||||
struct resource *this = root->child;
|
||||
|
||||
new->start = root->start;
|
||||
/*
|
||||
* Skip past an allocated resource that starts at 0, since the assignment
|
||||
* of this->start - 1 to new->end below would cause an underflow.
|
||||
*/
|
||||
if (this && this->start == 0) {
|
||||
new->start = this->end + 1;
|
||||
this = this->sibling;
|
||||
}
|
||||
for(;;) {
|
||||
if (this)
|
||||
new->end = this->start - 1;
|
||||
else
|
||||
new->end = root->end;
|
||||
if (new->start < min)
|
||||
new->start = min;
|
||||
if (new->end > max)
|
||||
new->end = max;
|
||||
new->start = ALIGN(new->start, align);
|
||||
if (alignf)
|
||||
alignf(alignf_data, new, size, align);
|
||||
if (new->start < new->end && new->end - new->start >= size - 1) {
|
||||
new->end = new->start + size - 1;
|
||||
return 0;
|
||||
}
|
||||
if (!this)
|
||||
break;
|
||||
new->start = this->end + 1;
|
||||
this = this->sibling;
|
||||
}
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
/**
|
||||
* allocate_resource - allocate empty slot in the resource tree given range & alignment
|
||||
* @root: root resource descriptor
|
||||
* @new: resource descriptor desired by caller
|
||||
* @size: requested resource region size
|
||||
* @min: minimum size to allocate
|
||||
* @max: maximum size to allocate
|
||||
* @align: alignment requested, in bytes
|
||||
* @alignf: alignment function, optional, called if not NULL
|
||||
* @alignf_data: arbitrary data to pass to the @alignf function
|
||||
*/
|
||||
int allocate_resource(struct resource *root, struct resource *new,
|
||||
resource_size_t size, resource_size_t min,
|
||||
resource_size_t max, resource_size_t align,
|
||||
void (*alignf)(void *, struct resource *,
|
||||
resource_size_t, resource_size_t),
|
||||
void *alignf_data)
|
||||
{
|
||||
int err;
|
||||
|
||||
write_lock(&resource_lock);
|
||||
err = find_resource(root, new, size, min, max, align, alignf, alignf_data);
|
||||
if (err >= 0 && __request_resource(root, new))
|
||||
err = -EBUSY;
|
||||
write_unlock(&resource_lock);
|
||||
return err;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(allocate_resource);
|
||||
|
||||
/**
|
||||
* insert_resource - Inserts a resource in the resource tree
|
||||
* @parent: parent of the new resource
|
||||
* @new: new resource to insert
|
||||
*
|
||||
* Returns 0 on success, -EBUSY if the resource can't be inserted.
|
||||
*
|
||||
* This function is equivalent to request_resource when no conflict
|
||||
* happens. If a conflict happens, and the conflicting resources
|
||||
* entirely fit within the range of the new resource, then the new
|
||||
* resource is inserted and the conflicting resources become children of
|
||||
* the new resource.
|
||||
*/
|
||||
int insert_resource(struct resource *parent, struct resource *new)
|
||||
{
|
||||
int result;
|
||||
struct resource *first, *next;
|
||||
|
||||
write_lock(&resource_lock);
|
||||
|
||||
for (;; parent = first) {
|
||||
result = 0;
|
||||
first = __request_resource(parent, new);
|
||||
if (!first)
|
||||
goto out;
|
||||
|
||||
result = -EBUSY;
|
||||
if (first == parent)
|
||||
goto out;
|
||||
|
||||
if ((first->start > new->start) || (first->end < new->end))
|
||||
break;
|
||||
if ((first->start == new->start) && (first->end == new->end))
|
||||
break;
|
||||
}
|
||||
|
||||
for (next = first; ; next = next->sibling) {
|
||||
/* Partial overlap? Bad, and unfixable */
|
||||
if (next->start < new->start || next->end > new->end)
|
||||
goto out;
|
||||
if (!next->sibling)
|
||||
break;
|
||||
if (next->sibling->start > new->end)
|
||||
break;
|
||||
}
|
||||
|
||||
result = 0;
|
||||
|
||||
new->parent = parent;
|
||||
new->sibling = next->sibling;
|
||||
new->child = first;
|
||||
|
||||
next->sibling = NULL;
|
||||
for (next = first; next; next = next->sibling)
|
||||
next->parent = new;
|
||||
|
||||
if (parent->child == first) {
|
||||
parent->child = new;
|
||||
} else {
|
||||
next = parent->child;
|
||||
while (next->sibling != first)
|
||||
next = next->sibling;
|
||||
next->sibling = new;
|
||||
}
|
||||
|
||||
out:
|
||||
write_unlock(&resource_lock);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* adjust_resource - modify a resource's start and size
|
||||
* @res: resource to modify
|
||||
* @start: new start value
|
||||
* @size: new size
|
||||
*
|
||||
* Given an existing resource, change its start and size to match the
|
||||
* arguments. Returns 0 on success, -EBUSY if it can't fit.
|
||||
* Existing children of the resource are assumed to be immutable.
|
||||
*/
|
||||
int adjust_resource(struct resource *res, resource_size_t start, resource_size_t size)
|
||||
{
|
||||
struct resource *tmp, *parent = res->parent;
|
||||
resource_size_t end = start + size - 1;
|
||||
int result = -EBUSY;
|
||||
|
||||
write_lock(&resource_lock);
|
||||
|
||||
if ((start < parent->start) || (end > parent->end))
|
||||
goto out;
|
||||
|
||||
for (tmp = res->child; tmp; tmp = tmp->sibling) {
|
||||
if ((tmp->start < start) || (tmp->end > end))
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (res->sibling && (res->sibling->start <= end))
|
||||
goto out;
|
||||
|
||||
tmp = parent->child;
|
||||
if (tmp != res) {
|
||||
while (tmp->sibling != res)
|
||||
tmp = tmp->sibling;
|
||||
if (start <= tmp->end)
|
||||
goto out;
|
||||
}
|
||||
|
||||
res->start = start;
|
||||
res->end = end;
|
||||
result = 0;
|
||||
|
||||
out:
|
||||
write_unlock(&resource_lock);
|
||||
return result;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(adjust_resource);
|
||||
|
||||
/*
|
||||
* This is compatibility stuff for IO resources.
|
||||
*
|
||||
* Note how this, unlike the above, knows about
|
||||
* the IO flag meanings (busy etc).
|
||||
*
|
||||
* request_region creates a new busy region.
|
||||
*
|
||||
* check_region returns non-zero if the area is already busy.
|
||||
*
|
||||
* release_region releases a matching busy region.
|
||||
*/
|
||||
|
||||
/**
|
||||
* __request_region - create a new busy resource region
|
||||
* @parent: parent resource descriptor
|
||||
* @start: resource start address
|
||||
* @n: resource region size
|
||||
* @name: reserving caller's ID string
|
||||
*/
|
||||
struct resource * __request_region(struct resource *parent,
|
||||
resource_size_t start, resource_size_t n,
|
||||
const char *name)
|
||||
{
|
||||
struct resource *res = kzalloc(sizeof(*res), GFP_KERNEL);
|
||||
|
||||
if (res) {
|
||||
res->name = name;
|
||||
res->start = start;
|
||||
res->end = start + n - 1;
|
||||
res->flags = IORESOURCE_BUSY;
|
||||
|
||||
write_lock(&resource_lock);
|
||||
|
||||
for (;;) {
|
||||
struct resource *conflict;
|
||||
|
||||
conflict = __request_resource(parent, res);
|
||||
if (!conflict)
|
||||
break;
|
||||
if (conflict != parent) {
|
||||
parent = conflict;
|
||||
if (!(conflict->flags & IORESOURCE_BUSY))
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Uhhuh, that didn't work out.. */
|
||||
kfree(res);
|
||||
res = NULL;
|
||||
break;
|
||||
}
|
||||
write_unlock(&resource_lock);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
EXPORT_SYMBOL(__request_region);
|
||||
|
||||
/**
|
||||
* __check_region - check if a resource region is busy or free
|
||||
* @parent: parent resource descriptor
|
||||
* @start: resource start address
|
||||
* @n: resource region size
|
||||
*
|
||||
* Returns 0 if the region is free at the moment it is checked,
|
||||
* returns %-EBUSY if the region is busy.
|
||||
*
|
||||
* NOTE:
|
||||
* This function is deprecated because its use is racy.
|
||||
* Even if it returns 0, a subsequent call to request_region()
|
||||
* may fail because another driver etc. just allocated the region.
|
||||
* Do NOT use it. It will be removed from the kernel.
|
||||
*/
|
||||
int __check_region(struct resource *parent, resource_size_t start,
|
||||
resource_size_t n)
|
||||
{
|
||||
struct resource * res;
|
||||
|
||||
res = __request_region(parent, start, n, "check-region");
|
||||
if (!res)
|
||||
return -EBUSY;
|
||||
|
||||
release_resource(res);
|
||||
kfree(res);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(__check_region);
|
||||
|
||||
/**
|
||||
* __release_region - release a previously reserved resource region
|
||||
* @parent: parent resource descriptor
|
||||
* @start: resource start address
|
||||
* @n: resource region size
|
||||
*
|
||||
* The described resource region must match a currently busy region.
|
||||
*/
|
||||
void __release_region(struct resource *parent, resource_size_t start,
|
||||
resource_size_t n)
|
||||
{
|
||||
struct resource **p;
|
||||
resource_size_t end;
|
||||
|
||||
p = &parent->child;
|
||||
end = start + n - 1;
|
||||
|
||||
write_lock(&resource_lock);
|
||||
|
||||
for (;;) {
|
||||
struct resource *res = *p;
|
||||
|
||||
if (!res)
|
||||
break;
|
||||
if (res->start <= start && res->end >= end) {
|
||||
if (!(res->flags & IORESOURCE_BUSY)) {
|
||||
p = &res->child;
|
||||
continue;
|
||||
}
|
||||
if (res->start != start || res->end != end)
|
||||
break;
|
||||
*p = res->sibling;
|
||||
write_unlock(&resource_lock);
|
||||
kfree(res);
|
||||
return;
|
||||
}
|
||||
p = &res->sibling;
|
||||
}
|
||||
|
||||
write_unlock(&resource_lock);
|
||||
|
||||
printk(KERN_WARNING "Trying to free nonexistent resource "
|
||||
"<%016llx-%016llx>\n", (unsigned long long)start,
|
||||
(unsigned long long)end);
|
||||
}
|
||||
EXPORT_SYMBOL(__release_region);
|
||||
|
||||
/*
|
||||
* Managed region resource
|
||||
*/
|
||||
struct region_devres {
|
||||
struct resource *parent;
|
||||
resource_size_t start;
|
||||
resource_size_t n;
|
||||
};
|
||||
|
||||
static void devm_region_release(struct device *dev, void *res)
|
||||
{
|
||||
struct region_devres *this = res;
|
||||
|
||||
__release_region(this->parent, this->start, this->n);
|
||||
}
|
||||
|
||||
static int devm_region_match(struct device *dev, void *res, void *match_data)
|
||||
{
|
||||
struct region_devres *this = res, *match = match_data;
|
||||
|
||||
return this->parent == match->parent &&
|
||||
this->start == match->start && this->n == match->n;
|
||||
}
|
||||
|
||||
struct resource * __devm_request_region(struct device *dev,
|
||||
struct resource *parent, resource_size_t start,
|
||||
resource_size_t n, const char *name)
|
||||
{
|
||||
struct region_devres *dr = NULL;
|
||||
struct resource *res;
|
||||
|
||||
dr = devres_alloc(devm_region_release, sizeof(struct region_devres),
|
||||
GFP_KERNEL);
|
||||
if (!dr)
|
||||
return NULL;
|
||||
|
||||
dr->parent = parent;
|
||||
dr->start = start;
|
||||
dr->n = n;
|
||||
|
||||
res = __request_region(parent, start, n, name);
|
||||
if (res)
|
||||
devres_add(dev, dr);
|
||||
else
|
||||
devres_free(dr);
|
||||
|
||||
return res;
|
||||
}
|
||||
EXPORT_SYMBOL(__devm_request_region);
|
||||
|
||||
void __devm_release_region(struct device *dev, struct resource *parent,
|
||||
resource_size_t start, resource_size_t n)
|
||||
{
|
||||
struct region_devres match_data = { parent, start, n };
|
||||
|
||||
__release_region(parent, start, n);
|
||||
WARN_ON(devres_destroy(dev, devm_region_release, devm_region_match,
|
||||
&match_data));
|
||||
}
|
||||
EXPORT_SYMBOL(__devm_release_region);
|
||||
|
||||
/*
|
||||
* Called from init/main.c to reserve IO ports.
|
||||
*/
|
||||
#define MAXRESERVE 4
|
||||
static int __init reserve_setup(char *str)
|
||||
{
|
||||
static int reserved;
|
||||
static struct resource reserve[MAXRESERVE];
|
||||
|
||||
for (;;) {
|
||||
int io_start, io_num;
|
||||
int x = reserved;
|
||||
|
||||
if (get_option (&str, &io_start) != 2)
|
||||
break;
|
||||
if (get_option (&str, &io_num) == 0)
|
||||
break;
|
||||
if (x < MAXRESERVE) {
|
||||
struct resource *res = reserve + x;
|
||||
res->name = "reserved";
|
||||
res->start = io_start;
|
||||
res->end = io_start + io_num - 1;
|
||||
res->flags = IORESOURCE_BUSY;
|
||||
res->child = NULL;
|
||||
if (request_resource(res->start >= 0x10000 ? &iomem_resource : &ioport_resource, res) == 0)
|
||||
reserved = x+1;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("reserve=", reserve_setup);
|
||||
241
kernel/rtmutex-debug.c
Normal file
241
kernel/rtmutex-debug.c
Normal file
@@ -0,0 +1,241 @@
|
||||
/*
|
||||
* RT-Mutexes: blocking mutual exclusion locks with PI support
|
||||
*
|
||||
* started by Ingo Molnar and Thomas Gleixner:
|
||||
*
|
||||
* Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
|
||||
* Copyright (C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
|
||||
*
|
||||
* This code is based on the rt.c implementation in the preempt-rt tree.
|
||||
* Portions of said code are
|
||||
*
|
||||
* Copyright (C) 2004 LynuxWorks, Inc., Igor Manyilov, Bill Huey
|
||||
* Copyright (C) 2006 Esben Nielsen
|
||||
* Copyright (C) 2006 Kihon Technologies Inc.,
|
||||
* Steven Rostedt <rostedt@goodmis.org>
|
||||
*
|
||||
* See rt.c in preempt-rt for proper credits and further information
|
||||
*/
|
||||
#include <linux/sched.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/plist.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/debug_locks.h>
|
||||
|
||||
#include "rtmutex_common.h"
|
||||
|
||||
#ifdef CONFIG_DEBUG_RT_MUTEXES
|
||||
# include "rtmutex-debug.h"
|
||||
#else
|
||||
# include "rtmutex.h"
|
||||
#endif
|
||||
|
||||
# define TRACE_WARN_ON(x) WARN_ON(x)
|
||||
# define TRACE_BUG_ON(x) BUG_ON(x)
|
||||
|
||||
# define TRACE_OFF() \
|
||||
do { \
|
||||
if (rt_trace_on) { \
|
||||
rt_trace_on = 0; \
|
||||
console_verbose(); \
|
||||
if (spin_is_locked(¤t->pi_lock)) \
|
||||
spin_unlock(¤t->pi_lock); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
# define TRACE_OFF_NOLOCK() \
|
||||
do { \
|
||||
if (rt_trace_on) { \
|
||||
rt_trace_on = 0; \
|
||||
console_verbose(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
# define TRACE_BUG_LOCKED() \
|
||||
do { \
|
||||
TRACE_OFF(); \
|
||||
BUG(); \
|
||||
} while (0)
|
||||
|
||||
# define TRACE_WARN_ON_LOCKED(c) \
|
||||
do { \
|
||||
if (unlikely(c)) { \
|
||||
TRACE_OFF(); \
|
||||
WARN_ON(1); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
# define TRACE_BUG_ON_LOCKED(c) \
|
||||
do { \
|
||||
if (unlikely(c)) \
|
||||
TRACE_BUG_LOCKED(); \
|
||||
} while (0)
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
# define SMP_TRACE_BUG_ON_LOCKED(c) TRACE_BUG_ON_LOCKED(c)
|
||||
#else
|
||||
# define SMP_TRACE_BUG_ON_LOCKED(c) do { } while (0)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* deadlock detection flag. We turn it off when we detect
|
||||
* the first problem because we dont want to recurse back
|
||||
* into the tracing code when doing error printk or
|
||||
* executing a BUG():
|
||||
*/
|
||||
int rt_trace_on = 1;
|
||||
|
||||
void deadlock_trace_off(void)
|
||||
{
|
||||
rt_trace_on = 0;
|
||||
}
|
||||
|
||||
static void printk_task(struct task_struct *p)
|
||||
{
|
||||
if (p)
|
||||
printk("%16s:%5d [%p, %3d]", p->comm, p->pid, p, p->prio);
|
||||
else
|
||||
printk("<none>");
|
||||
}
|
||||
|
||||
static void printk_lock(struct rt_mutex *lock, int print_owner)
|
||||
{
|
||||
if (lock->name)
|
||||
printk(" [%p] {%s}\n",
|
||||
lock, lock->name);
|
||||
else
|
||||
printk(" [%p] {%s:%d}\n",
|
||||
lock, lock->file, lock->line);
|
||||
|
||||
if (print_owner && rt_mutex_owner(lock)) {
|
||||
printk(".. ->owner: %p\n", lock->owner);
|
||||
printk(".. held by: ");
|
||||
printk_task(rt_mutex_owner(lock));
|
||||
printk("\n");
|
||||
}
|
||||
}
|
||||
|
||||
void rt_mutex_debug_task_free(struct task_struct *task)
|
||||
{
|
||||
WARN_ON(!plist_head_empty(&task->pi_waiters));
|
||||
WARN_ON(task->pi_blocked_on);
|
||||
}
|
||||
|
||||
/*
|
||||
* We fill out the fields in the waiter to store the information about
|
||||
* the deadlock. We print when we return. act_waiter can be NULL in
|
||||
* case of a remove waiter operation.
|
||||
*/
|
||||
void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *act_waiter,
|
||||
struct rt_mutex *lock)
|
||||
{
|
||||
struct task_struct *task;
|
||||
|
||||
if (!rt_trace_on || detect || !act_waiter)
|
||||
return;
|
||||
|
||||
task = rt_mutex_owner(act_waiter->lock);
|
||||
if (task && task != current) {
|
||||
act_waiter->deadlock_task_pid = task->pid;
|
||||
act_waiter->deadlock_lock = lock;
|
||||
}
|
||||
}
|
||||
|
||||
void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter)
|
||||
{
|
||||
struct task_struct *task;
|
||||
|
||||
if (!waiter->deadlock_lock || !rt_trace_on)
|
||||
return;
|
||||
|
||||
task = find_task_by_pid(waiter->deadlock_task_pid);
|
||||
if (!task)
|
||||
return;
|
||||
|
||||
TRACE_OFF_NOLOCK();
|
||||
|
||||
printk("\n============================================\n");
|
||||
printk( "[ BUG: circular locking deadlock detected! ]\n");
|
||||
printk( "--------------------------------------------\n");
|
||||
printk("%s/%d is deadlocking current task %s/%d\n\n",
|
||||
task->comm, task->pid, current->comm, current->pid);
|
||||
|
||||
printk("\n1) %s/%d is trying to acquire this lock:\n",
|
||||
current->comm, current->pid);
|
||||
printk_lock(waiter->lock, 1);
|
||||
|
||||
printk("\n2) %s/%d is blocked on this lock:\n", task->comm, task->pid);
|
||||
printk_lock(waiter->deadlock_lock, 1);
|
||||
|
||||
debug_show_held_locks(current);
|
||||
debug_show_held_locks(task);
|
||||
|
||||
printk("\n%s/%d's [blocked] stackdump:\n\n", task->comm, task->pid);
|
||||
show_stack(task, NULL);
|
||||
printk("\n%s/%d's [current] stackdump:\n\n",
|
||||
current->comm, current->pid);
|
||||
dump_stack();
|
||||
debug_show_all_locks();
|
||||
|
||||
printk("[ turning off deadlock detection."
|
||||
"Please report this trace. ]\n\n");
|
||||
local_irq_disable();
|
||||
}
|
||||
|
||||
void debug_rt_mutex_lock(struct rt_mutex *lock)
|
||||
{
|
||||
}
|
||||
|
||||
void debug_rt_mutex_unlock(struct rt_mutex *lock)
|
||||
{
|
||||
TRACE_WARN_ON_LOCKED(rt_mutex_owner(lock) != current);
|
||||
}
|
||||
|
||||
void
|
||||
debug_rt_mutex_proxy_lock(struct rt_mutex *lock, struct task_struct *powner)
|
||||
{
|
||||
}
|
||||
|
||||
void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock)
|
||||
{
|
||||
TRACE_WARN_ON_LOCKED(!rt_mutex_owner(lock));
|
||||
}
|
||||
|
||||
void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
|
||||
{
|
||||
memset(waiter, 0x11, sizeof(*waiter));
|
||||
plist_node_init(&waiter->list_entry, MAX_PRIO);
|
||||
plist_node_init(&waiter->pi_list_entry, MAX_PRIO);
|
||||
}
|
||||
|
||||
void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
|
||||
{
|
||||
TRACE_WARN_ON(!plist_node_empty(&waiter->list_entry));
|
||||
TRACE_WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
|
||||
TRACE_WARN_ON(waiter->task);
|
||||
memset(waiter, 0x22, sizeof(*waiter));
|
||||
}
|
||||
|
||||
void debug_rt_mutex_init(struct rt_mutex *lock, const char *name)
|
||||
{
|
||||
/*
|
||||
* Make sure we are not reinitializing a held lock:
|
||||
*/
|
||||
debug_check_no_locks_freed((void *)lock, sizeof(*lock));
|
||||
lock->name = name;
|
||||
}
|
||||
|
||||
void
|
||||
rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task)
|
||||
{
|
||||
}
|
||||
|
||||
void rt_mutex_deadlock_account_unlock(struct task_struct *task)
|
||||
{
|
||||
}
|
||||
|
||||
33
kernel/rtmutex-debug.h
Normal file
33
kernel/rtmutex-debug.h
Normal file
@@ -0,0 +1,33 @@
|
||||
/*
|
||||
* RT-Mutexes: blocking mutual exclusion locks with PI support
|
||||
*
|
||||
* started by Ingo Molnar and Thomas Gleixner:
|
||||
*
|
||||
* Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
|
||||
* Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
|
||||
*
|
||||
* This file contains macros used solely by rtmutex.c. Debug version.
|
||||
*/
|
||||
|
||||
extern void
|
||||
rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task);
|
||||
extern void rt_mutex_deadlock_account_unlock(struct task_struct *task);
|
||||
extern void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
|
||||
extern void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter);
|
||||
extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name);
|
||||
extern void debug_rt_mutex_lock(struct rt_mutex *lock);
|
||||
extern void debug_rt_mutex_unlock(struct rt_mutex *lock);
|
||||
extern void debug_rt_mutex_proxy_lock(struct rt_mutex *lock,
|
||||
struct task_struct *powner);
|
||||
extern void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock);
|
||||
extern void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *waiter,
|
||||
struct rt_mutex *lock);
|
||||
extern void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter);
|
||||
# define debug_rt_mutex_reset_waiter(w) \
|
||||
do { (w)->deadlock_lock = NULL; } while (0)
|
||||
|
||||
static inline int debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *waiter,
|
||||
int detect)
|
||||
{
|
||||
return (waiter != NULL);
|
||||
}
|
||||
441
kernel/rtmutex-tester.c
Normal file
441
kernel/rtmutex-tester.c
Normal file
@@ -0,0 +1,441 @@
|
||||
/*
|
||||
* RT-Mutex-tester: scriptable tester for rt mutexes
|
||||
*
|
||||
* started by Thomas Gleixner:
|
||||
*
|
||||
* Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
|
||||
*
|
||||
*/
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/smp_lock.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/sysdev.h>
|
||||
#include <linux/timer.h>
|
||||
#include <linux/freezer.h>
|
||||
|
||||
#include "rtmutex.h"
|
||||
|
||||
#define MAX_RT_TEST_THREADS 8
|
||||
#define MAX_RT_TEST_MUTEXES 8
|
||||
|
||||
static spinlock_t rttest_lock;
|
||||
static atomic_t rttest_event;
|
||||
|
||||
struct test_thread_data {
|
||||
int opcode;
|
||||
int opdata;
|
||||
int mutexes[MAX_RT_TEST_MUTEXES];
|
||||
int bkl;
|
||||
int event;
|
||||
struct sys_device sysdev;
|
||||
};
|
||||
|
||||
static struct test_thread_data thread_data[MAX_RT_TEST_THREADS];
|
||||
static struct task_struct *threads[MAX_RT_TEST_THREADS];
|
||||
static struct rt_mutex mutexes[MAX_RT_TEST_MUTEXES];
|
||||
|
||||
enum test_opcodes {
|
||||
RTTEST_NOP = 0,
|
||||
RTTEST_SCHEDOT, /* 1 Sched other, data = nice */
|
||||
RTTEST_SCHEDRT, /* 2 Sched fifo, data = prio */
|
||||
RTTEST_LOCK, /* 3 Lock uninterruptible, data = lockindex */
|
||||
RTTEST_LOCKNOWAIT, /* 4 Lock uninterruptible no wait in wakeup, data = lockindex */
|
||||
RTTEST_LOCKINT, /* 5 Lock interruptible, data = lockindex */
|
||||
RTTEST_LOCKINTNOWAIT, /* 6 Lock interruptible no wait in wakeup, data = lockindex */
|
||||
RTTEST_LOCKCONT, /* 7 Continue locking after the wakeup delay */
|
||||
RTTEST_UNLOCK, /* 8 Unlock, data = lockindex */
|
||||
RTTEST_LOCKBKL, /* 9 Lock BKL */
|
||||
RTTEST_UNLOCKBKL, /* 10 Unlock BKL */
|
||||
RTTEST_SIGNAL, /* 11 Signal other test thread, data = thread id */
|
||||
RTTEST_RESETEVENT = 98, /* 98 Reset event counter */
|
||||
RTTEST_RESET = 99, /* 99 Reset all pending operations */
|
||||
};
|
||||
|
||||
static int handle_op(struct test_thread_data *td, int lockwakeup)
|
||||
{
|
||||
int i, id, ret = -EINVAL;
|
||||
|
||||
switch(td->opcode) {
|
||||
|
||||
case RTTEST_NOP:
|
||||
return 0;
|
||||
|
||||
case RTTEST_LOCKCONT:
|
||||
td->mutexes[td->opdata] = 1;
|
||||
td->event = atomic_add_return(1, &rttest_event);
|
||||
return 0;
|
||||
|
||||
case RTTEST_RESET:
|
||||
for (i = 0; i < MAX_RT_TEST_MUTEXES; i++) {
|
||||
if (td->mutexes[i] == 4) {
|
||||
rt_mutex_unlock(&mutexes[i]);
|
||||
td->mutexes[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (!lockwakeup && td->bkl == 4) {
|
||||
unlock_kernel();
|
||||
td->bkl = 0;
|
||||
}
|
||||
return 0;
|
||||
|
||||
case RTTEST_RESETEVENT:
|
||||
atomic_set(&rttest_event, 0);
|
||||
return 0;
|
||||
|
||||
default:
|
||||
if (lockwakeup)
|
||||
return ret;
|
||||
}
|
||||
|
||||
switch(td->opcode) {
|
||||
|
||||
case RTTEST_LOCK:
|
||||
case RTTEST_LOCKNOWAIT:
|
||||
id = td->opdata;
|
||||
if (id < 0 || id >= MAX_RT_TEST_MUTEXES)
|
||||
return ret;
|
||||
|
||||
td->mutexes[id] = 1;
|
||||
td->event = atomic_add_return(1, &rttest_event);
|
||||
rt_mutex_lock(&mutexes[id]);
|
||||
td->event = atomic_add_return(1, &rttest_event);
|
||||
td->mutexes[id] = 4;
|
||||
return 0;
|
||||
|
||||
case RTTEST_LOCKINT:
|
||||
case RTTEST_LOCKINTNOWAIT:
|
||||
id = td->opdata;
|
||||
if (id < 0 || id >= MAX_RT_TEST_MUTEXES)
|
||||
return ret;
|
||||
|
||||
td->mutexes[id] = 1;
|
||||
td->event = atomic_add_return(1, &rttest_event);
|
||||
ret = rt_mutex_lock_interruptible(&mutexes[id], 0);
|
||||
td->event = atomic_add_return(1, &rttest_event);
|
||||
td->mutexes[id] = ret ? 0 : 4;
|
||||
return ret ? -EINTR : 0;
|
||||
|
||||
case RTTEST_UNLOCK:
|
||||
id = td->opdata;
|
||||
if (id < 0 || id >= MAX_RT_TEST_MUTEXES || td->mutexes[id] != 4)
|
||||
return ret;
|
||||
|
||||
td->event = atomic_add_return(1, &rttest_event);
|
||||
rt_mutex_unlock(&mutexes[id]);
|
||||
td->event = atomic_add_return(1, &rttest_event);
|
||||
td->mutexes[id] = 0;
|
||||
return 0;
|
||||
|
||||
case RTTEST_LOCKBKL:
|
||||
if (td->bkl)
|
||||
return 0;
|
||||
td->bkl = 1;
|
||||
lock_kernel();
|
||||
td->bkl = 4;
|
||||
return 0;
|
||||
|
||||
case RTTEST_UNLOCKBKL:
|
||||
if (td->bkl != 4)
|
||||
break;
|
||||
unlock_kernel();
|
||||
td->bkl = 0;
|
||||
return 0;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Schedule replacement for rtsem_down(). Only called for threads with
|
||||
* PF_MUTEX_TESTER set.
|
||||
*
|
||||
* This allows us to have finegrained control over the event flow.
|
||||
*
|
||||
*/
|
||||
void schedule_rt_mutex_test(struct rt_mutex *mutex)
|
||||
{
|
||||
int tid, op, dat;
|
||||
struct test_thread_data *td;
|
||||
|
||||
/* We have to lookup the task */
|
||||
for (tid = 0; tid < MAX_RT_TEST_THREADS; tid++) {
|
||||
if (threads[tid] == current)
|
||||
break;
|
||||
}
|
||||
|
||||
BUG_ON(tid == MAX_RT_TEST_THREADS);
|
||||
|
||||
td = &thread_data[tid];
|
||||
|
||||
op = td->opcode;
|
||||
dat = td->opdata;
|
||||
|
||||
switch (op) {
|
||||
case RTTEST_LOCK:
|
||||
case RTTEST_LOCKINT:
|
||||
case RTTEST_LOCKNOWAIT:
|
||||
case RTTEST_LOCKINTNOWAIT:
|
||||
if (mutex != &mutexes[dat])
|
||||
break;
|
||||
|
||||
if (td->mutexes[dat] != 1)
|
||||
break;
|
||||
|
||||
td->mutexes[dat] = 2;
|
||||
td->event = atomic_add_return(1, &rttest_event);
|
||||
break;
|
||||
|
||||
case RTTEST_LOCKBKL:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
schedule();
|
||||
|
||||
|
||||
switch (op) {
|
||||
case RTTEST_LOCK:
|
||||
case RTTEST_LOCKINT:
|
||||
if (mutex != &mutexes[dat])
|
||||
return;
|
||||
|
||||
if (td->mutexes[dat] != 2)
|
||||
return;
|
||||
|
||||
td->mutexes[dat] = 3;
|
||||
td->event = atomic_add_return(1, &rttest_event);
|
||||
break;
|
||||
|
||||
case RTTEST_LOCKNOWAIT:
|
||||
case RTTEST_LOCKINTNOWAIT:
|
||||
if (mutex != &mutexes[dat])
|
||||
return;
|
||||
|
||||
if (td->mutexes[dat] != 2)
|
||||
return;
|
||||
|
||||
td->mutexes[dat] = 1;
|
||||
td->event = atomic_add_return(1, &rttest_event);
|
||||
return;
|
||||
|
||||
case RTTEST_LOCKBKL:
|
||||
return;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
|
||||
td->opcode = 0;
|
||||
|
||||
for (;;) {
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
|
||||
if (td->opcode > 0) {
|
||||
int ret;
|
||||
|
||||
set_current_state(TASK_RUNNING);
|
||||
ret = handle_op(td, 1);
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
if (td->opcode == RTTEST_LOCKCONT)
|
||||
break;
|
||||
td->opcode = ret;
|
||||
}
|
||||
|
||||
/* Wait for the next command to be executed */
|
||||
schedule();
|
||||
}
|
||||
|
||||
/* Restore previous command and data */
|
||||
td->opcode = op;
|
||||
td->opdata = dat;
|
||||
}
|
||||
|
||||
static int test_func(void *data)
|
||||
{
|
||||
struct test_thread_data *td = data;
|
||||
int ret;
|
||||
|
||||
current->flags |= PF_MUTEX_TESTER;
|
||||
allow_signal(SIGHUP);
|
||||
|
||||
for(;;) {
|
||||
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
|
||||
if (td->opcode > 0) {
|
||||
set_current_state(TASK_RUNNING);
|
||||
ret = handle_op(td, 0);
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
td->opcode = ret;
|
||||
}
|
||||
|
||||
/* Wait for the next command to be executed */
|
||||
schedule();
|
||||
try_to_freeze();
|
||||
|
||||
if (signal_pending(current))
|
||||
flush_signals(current);
|
||||
|
||||
if(kthread_should_stop())
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* sysfs_test_command - interface for test commands
|
||||
* @dev: thread reference
|
||||
* @buf: command for actual step
|
||||
* @count: length of buffer
|
||||
*
|
||||
* command syntax:
|
||||
*
|
||||
* opcode:data
|
||||
*/
|
||||
static ssize_t sysfs_test_command(struct sys_device *dev, const char *buf,
|
||||
size_t count)
|
||||
{
|
||||
struct sched_param schedpar;
|
||||
struct test_thread_data *td;
|
||||
char cmdbuf[32];
|
||||
int op, dat, tid, ret;
|
||||
|
||||
td = container_of(dev, struct test_thread_data, sysdev);
|
||||
tid = td->sysdev.id;
|
||||
|
||||
/* strings from sysfs write are not 0 terminated! */
|
||||
if (count >= sizeof(cmdbuf))
|
||||
return -EINVAL;
|
||||
|
||||
/* strip of \n: */
|
||||
if (buf[count-1] == '\n')
|
||||
count--;
|
||||
if (count < 1)
|
||||
return -EINVAL;
|
||||
|
||||
memcpy(cmdbuf, buf, count);
|
||||
cmdbuf[count] = 0;
|
||||
|
||||
if (sscanf(cmdbuf, "%d:%d", &op, &dat) != 2)
|
||||
return -EINVAL;
|
||||
|
||||
switch (op) {
|
||||
case RTTEST_SCHEDOT:
|
||||
schedpar.sched_priority = 0;
|
||||
ret = sched_setscheduler(threads[tid], SCHED_NORMAL, &schedpar);
|
||||
if (ret)
|
||||
return ret;
|
||||
set_user_nice(current, 0);
|
||||
break;
|
||||
|
||||
case RTTEST_SCHEDRT:
|
||||
schedpar.sched_priority = dat;
|
||||
ret = sched_setscheduler(threads[tid], SCHED_FIFO, &schedpar);
|
||||
if (ret)
|
||||
return ret;
|
||||
break;
|
||||
|
||||
case RTTEST_SIGNAL:
|
||||
send_sig(SIGHUP, threads[tid], 0);
|
||||
break;
|
||||
|
||||
default:
|
||||
if (td->opcode > 0)
|
||||
return -EBUSY;
|
||||
td->opdata = dat;
|
||||
td->opcode = op;
|
||||
wake_up_process(threads[tid]);
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* sysfs_test_status - sysfs interface for rt tester
|
||||
* @dev: thread to query
|
||||
* @buf: char buffer to be filled with thread status info
|
||||
*/
|
||||
static ssize_t sysfs_test_status(struct sys_device *dev, char *buf)
|
||||
{
|
||||
struct test_thread_data *td;
|
||||
struct task_struct *tsk;
|
||||
char *curr = buf;
|
||||
int i;
|
||||
|
||||
td = container_of(dev, struct test_thread_data, sysdev);
|
||||
tsk = threads[td->sysdev.id];
|
||||
|
||||
spin_lock(&rttest_lock);
|
||||
|
||||
curr += sprintf(curr,
|
||||
"O: %4d, E:%8d, S: 0x%08lx, P: %4d, N: %4d, B: %p, K: %d, M:",
|
||||
td->opcode, td->event, tsk->state,
|
||||
(MAX_RT_PRIO - 1) - tsk->prio,
|
||||
(MAX_RT_PRIO - 1) - tsk->normal_prio,
|
||||
tsk->pi_blocked_on, td->bkl);
|
||||
|
||||
for (i = MAX_RT_TEST_MUTEXES - 1; i >=0 ; i--)
|
||||
curr += sprintf(curr, "%d", td->mutexes[i]);
|
||||
|
||||
spin_unlock(&rttest_lock);
|
||||
|
||||
curr += sprintf(curr, ", T: %p, R: %p\n", tsk,
|
||||
mutexes[td->sysdev.id].owner);
|
||||
|
||||
return curr - buf;
|
||||
}
|
||||
|
||||
static SYSDEV_ATTR(status, 0600, sysfs_test_status, NULL);
|
||||
static SYSDEV_ATTR(command, 0600, NULL, sysfs_test_command);
|
||||
|
||||
static struct sysdev_class rttest_sysclass = {
|
||||
set_kset_name("rttest"),
|
||||
};
|
||||
|
||||
static int init_test_thread(int id)
|
||||
{
|
||||
thread_data[id].sysdev.cls = &rttest_sysclass;
|
||||
thread_data[id].sysdev.id = id;
|
||||
|
||||
threads[id] = kthread_run(test_func, &thread_data[id], "rt-test-%d", id);
|
||||
if (IS_ERR(threads[id]))
|
||||
return PTR_ERR(threads[id]);
|
||||
|
||||
return sysdev_register(&thread_data[id].sysdev);
|
||||
}
|
||||
|
||||
static int init_rttest(void)
|
||||
{
|
||||
int ret, i;
|
||||
|
||||
spin_lock_init(&rttest_lock);
|
||||
|
||||
for (i = 0; i < MAX_RT_TEST_MUTEXES; i++)
|
||||
rt_mutex_init(&mutexes[i]);
|
||||
|
||||
ret = sysdev_class_register(&rttest_sysclass);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
for (i = 0; i < MAX_RT_TEST_THREADS; i++) {
|
||||
ret = init_test_thread(i);
|
||||
if (ret)
|
||||
break;
|
||||
ret = sysdev_create_file(&thread_data[i].sysdev, &attr_status);
|
||||
if (ret)
|
||||
break;
|
||||
ret = sysdev_create_file(&thread_data[i].sysdev, &attr_command);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
printk("Initializing RT-Tester: %s\n", ret ? "Failed" : "OK" );
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
device_initcall(init_rttest);
|
||||
990
kernel/rtmutex.c
Normal file
990
kernel/rtmutex.c
Normal file
@@ -0,0 +1,990 @@
|
||||
/*
|
||||
* RT-Mutexes: simple blocking mutual exclusion locks with PI support
|
||||
*
|
||||
* started by Ingo Molnar and Thomas Gleixner.
|
||||
*
|
||||
* Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
|
||||
* Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
|
||||
* Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
|
||||
* Copyright (C) 2006 Esben Nielsen
|
||||
*
|
||||
* See Documentation/rt-mutex-design.txt for details.
|
||||
*/
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/timer.h>
|
||||
|
||||
#include "rtmutex_common.h"
|
||||
|
||||
#ifdef CONFIG_DEBUG_RT_MUTEXES
|
||||
# include "rtmutex-debug.h"
|
||||
#else
|
||||
# include "rtmutex.h"
|
||||
#endif
|
||||
|
||||
/*
|
||||
* lock->owner state tracking:
|
||||
*
|
||||
* lock->owner holds the task_struct pointer of the owner. Bit 0 and 1
|
||||
* are used to keep track of the "owner is pending" and "lock has
|
||||
* waiters" state.
|
||||
*
|
||||
* owner bit1 bit0
|
||||
* NULL 0 0 lock is free (fast acquire possible)
|
||||
* NULL 0 1 invalid state
|
||||
* NULL 1 0 Transitional State*
|
||||
* NULL 1 1 invalid state
|
||||
* taskpointer 0 0 lock is held (fast release possible)
|
||||
* taskpointer 0 1 task is pending owner
|
||||
* taskpointer 1 0 lock is held and has waiters
|
||||
* taskpointer 1 1 task is pending owner and lock has more waiters
|
||||
*
|
||||
* Pending ownership is assigned to the top (highest priority)
|
||||
* waiter of the lock, when the lock is released. The thread is woken
|
||||
* up and can now take the lock. Until the lock is taken (bit 0
|
||||
* cleared) a competing higher priority thread can steal the lock
|
||||
* which puts the woken up thread back on the waiters list.
|
||||
*
|
||||
* The fast atomic compare exchange based acquire and release is only
|
||||
* possible when bit 0 and 1 of lock->owner are 0.
|
||||
*
|
||||
* (*) There's a small time where the owner can be NULL and the
|
||||
* "lock has waiters" bit is set. This can happen when grabbing the lock.
|
||||
* To prevent a cmpxchg of the owner releasing the lock, we need to set this
|
||||
* bit before looking at the lock, hence the reason this is a transitional
|
||||
* state.
|
||||
*/
|
||||
|
||||
static void
|
||||
rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner,
|
||||
unsigned long mask)
|
||||
{
|
||||
unsigned long val = (unsigned long)owner | mask;
|
||||
|
||||
if (rt_mutex_has_waiters(lock))
|
||||
val |= RT_MUTEX_HAS_WAITERS;
|
||||
|
||||
lock->owner = (struct task_struct *)val;
|
||||
}
|
||||
|
||||
static inline void clear_rt_mutex_waiters(struct rt_mutex *lock)
|
||||
{
|
||||
lock->owner = (struct task_struct *)
|
||||
((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
|
||||
}
|
||||
|
||||
static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
|
||||
{
|
||||
if (!rt_mutex_has_waiters(lock))
|
||||
clear_rt_mutex_waiters(lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* We can speed up the acquire/release, if the architecture
|
||||
* supports cmpxchg and if there's no debugging state to be set up
|
||||
*/
|
||||
#if defined(__HAVE_ARCH_CMPXCHG) && !defined(CONFIG_DEBUG_RT_MUTEXES)
|
||||
# define rt_mutex_cmpxchg(l,c,n) (cmpxchg(&l->owner, c, n) == c)
|
||||
static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
|
||||
{
|
||||
unsigned long owner, *p = (unsigned long *) &lock->owner;
|
||||
|
||||
do {
|
||||
owner = *p;
|
||||
} while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner);
|
||||
}
|
||||
#else
|
||||
# define rt_mutex_cmpxchg(l,c,n) (0)
|
||||
static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
|
||||
{
|
||||
lock->owner = (struct task_struct *)
|
||||
((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Calculate task priority from the waiter list priority
|
||||
*
|
||||
* Return task->normal_prio when the waiter list is empty or when
|
||||
* the waiter is not allowed to do priority boosting
|
||||
*/
|
||||
int rt_mutex_getprio(struct task_struct *task)
|
||||
{
|
||||
if (likely(!task_has_pi_waiters(task)))
|
||||
return task->normal_prio;
|
||||
|
||||
return min(task_top_pi_waiter(task)->pi_list_entry.prio,
|
||||
task->normal_prio);
|
||||
}
|
||||
|
||||
/*
|
||||
* Adjust the priority of a task, after its pi_waiters got modified.
|
||||
*
|
||||
* This can be both boosting and unboosting. task->pi_lock must be held.
|
||||
*/
|
||||
static void __rt_mutex_adjust_prio(struct task_struct *task)
|
||||
{
|
||||
int prio = rt_mutex_getprio(task);
|
||||
|
||||
if (task->prio != prio)
|
||||
rt_mutex_setprio(task, prio);
|
||||
}
|
||||
|
||||
/*
|
||||
* Adjust task priority (undo boosting). Called from the exit path of
|
||||
* rt_mutex_slowunlock() and rt_mutex_slowlock().
|
||||
*
|
||||
* (Note: We do this outside of the protection of lock->wait_lock to
|
||||
* allow the lock to be taken while or before we readjust the priority
|
||||
* of task. We do not use the spin_xx_mutex() variants here as we are
|
||||
* outside of the debug path.)
|
||||
*/
|
||||
static void rt_mutex_adjust_prio(struct task_struct *task)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&task->pi_lock, flags);
|
||||
__rt_mutex_adjust_prio(task);
|
||||
spin_unlock_irqrestore(&task->pi_lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Max number of times we'll walk the boosting chain:
|
||||
*/
|
||||
int max_lock_depth = 1024;
|
||||
|
||||
/*
|
||||
* Adjust the priority chain. Also used for deadlock detection.
|
||||
* Decreases task's usage by one - may thus free the task.
|
||||
* Returns 0 or -EDEADLK.
|
||||
*/
|
||||
static int rt_mutex_adjust_prio_chain(struct task_struct *task,
|
||||
int deadlock_detect,
|
||||
struct rt_mutex *orig_lock,
|
||||
struct rt_mutex_waiter *orig_waiter,
|
||||
struct task_struct *top_task)
|
||||
{
|
||||
struct rt_mutex *lock;
|
||||
struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
|
||||
int detect_deadlock, ret = 0, depth = 0;
|
||||
unsigned long flags;
|
||||
|
||||
detect_deadlock = debug_rt_mutex_detect_deadlock(orig_waiter,
|
||||
deadlock_detect);
|
||||
|
||||
/*
|
||||
* The (de)boosting is a step by step approach with a lot of
|
||||
* pitfalls. We want this to be preemptible and we want hold a
|
||||
* maximum of two locks per step. So we have to check
|
||||
* carefully whether things change under us.
|
||||
*/
|
||||
again:
|
||||
if (++depth > max_lock_depth) {
|
||||
static int prev_max;
|
||||
|
||||
/*
|
||||
* Print this only once. If the admin changes the limit,
|
||||
* print a new message when reaching the limit again.
|
||||
*/
|
||||
if (prev_max != max_lock_depth) {
|
||||
prev_max = max_lock_depth;
|
||||
printk(KERN_WARNING "Maximum lock depth %d reached "
|
||||
"task: %s (%d)\n", max_lock_depth,
|
||||
top_task->comm, top_task->pid);
|
||||
}
|
||||
put_task_struct(task);
|
||||
|
||||
return deadlock_detect ? -EDEADLK : 0;
|
||||
}
|
||||
retry:
|
||||
/*
|
||||
* Task can not go away as we did a get_task() before !
|
||||
*/
|
||||
spin_lock_irqsave(&task->pi_lock, flags);
|
||||
|
||||
waiter = task->pi_blocked_on;
|
||||
/*
|
||||
* Check whether the end of the boosting chain has been
|
||||
* reached or the state of the chain has changed while we
|
||||
* dropped the locks.
|
||||
*/
|
||||
if (!waiter || !waiter->task)
|
||||
goto out_unlock_pi;
|
||||
|
||||
if (top_waiter && (!task_has_pi_waiters(task) ||
|
||||
top_waiter != task_top_pi_waiter(task)))
|
||||
goto out_unlock_pi;
|
||||
|
||||
/*
|
||||
* When deadlock detection is off then we check, if further
|
||||
* priority adjustment is necessary.
|
||||
*/
|
||||
if (!detect_deadlock && waiter->list_entry.prio == task->prio)
|
||||
goto out_unlock_pi;
|
||||
|
||||
lock = waiter->lock;
|
||||
if (!spin_trylock(&lock->wait_lock)) {
|
||||
spin_unlock_irqrestore(&task->pi_lock, flags);
|
||||
cpu_relax();
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/* Deadlock detection */
|
||||
if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
|
||||
debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock);
|
||||
spin_unlock(&lock->wait_lock);
|
||||
ret = deadlock_detect ? -EDEADLK : 0;
|
||||
goto out_unlock_pi;
|
||||
}
|
||||
|
||||
top_waiter = rt_mutex_top_waiter(lock);
|
||||
|
||||
/* Requeue the waiter */
|
||||
plist_del(&waiter->list_entry, &lock->wait_list);
|
||||
waiter->list_entry.prio = task->prio;
|
||||
plist_add(&waiter->list_entry, &lock->wait_list);
|
||||
|
||||
/* Release the task */
|
||||
spin_unlock_irqrestore(&task->pi_lock, flags);
|
||||
put_task_struct(task);
|
||||
|
||||
/* Grab the next task */
|
||||
task = rt_mutex_owner(lock);
|
||||
get_task_struct(task);
|
||||
spin_lock_irqsave(&task->pi_lock, flags);
|
||||
|
||||
if (waiter == rt_mutex_top_waiter(lock)) {
|
||||
/* Boost the owner */
|
||||
plist_del(&top_waiter->pi_list_entry, &task->pi_waiters);
|
||||
waiter->pi_list_entry.prio = waiter->list_entry.prio;
|
||||
plist_add(&waiter->pi_list_entry, &task->pi_waiters);
|
||||
__rt_mutex_adjust_prio(task);
|
||||
|
||||
} else if (top_waiter == waiter) {
|
||||
/* Deboost the owner */
|
||||
plist_del(&waiter->pi_list_entry, &task->pi_waiters);
|
||||
waiter = rt_mutex_top_waiter(lock);
|
||||
waiter->pi_list_entry.prio = waiter->list_entry.prio;
|
||||
plist_add(&waiter->pi_list_entry, &task->pi_waiters);
|
||||
__rt_mutex_adjust_prio(task);
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&task->pi_lock, flags);
|
||||
|
||||
top_waiter = rt_mutex_top_waiter(lock);
|
||||
spin_unlock(&lock->wait_lock);
|
||||
|
||||
if (!detect_deadlock && waiter != top_waiter)
|
||||
goto out_put_task;
|
||||
|
||||
goto again;
|
||||
|
||||
out_unlock_pi:
|
||||
spin_unlock_irqrestore(&task->pi_lock, flags);
|
||||
out_put_task:
|
||||
put_task_struct(task);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Optimization: check if we can steal the lock from the
|
||||
* assigned pending owner [which might not have taken the
|
||||
* lock yet]:
|
||||
*/
|
||||
static inline int try_to_steal_lock(struct rt_mutex *lock)
|
||||
{
|
||||
struct task_struct *pendowner = rt_mutex_owner(lock);
|
||||
struct rt_mutex_waiter *next;
|
||||
unsigned long flags;
|
||||
|
||||
if (!rt_mutex_owner_pending(lock))
|
||||
return 0;
|
||||
|
||||
if (pendowner == current)
|
||||
return 1;
|
||||
|
||||
spin_lock_irqsave(&pendowner->pi_lock, flags);
|
||||
if (current->prio >= pendowner->prio) {
|
||||
spin_unlock_irqrestore(&pendowner->pi_lock, flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if a waiter is enqueued on the pending owners
|
||||
* pi_waiters list. Remove it and readjust pending owners
|
||||
* priority.
|
||||
*/
|
||||
if (likely(!rt_mutex_has_waiters(lock))) {
|
||||
spin_unlock_irqrestore(&pendowner->pi_lock, flags);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* No chain handling, pending owner is not blocked on anything: */
|
||||
next = rt_mutex_top_waiter(lock);
|
||||
plist_del(&next->pi_list_entry, &pendowner->pi_waiters);
|
||||
__rt_mutex_adjust_prio(pendowner);
|
||||
spin_unlock_irqrestore(&pendowner->pi_lock, flags);
|
||||
|
||||
/*
|
||||
* We are going to steal the lock and a waiter was
|
||||
* enqueued on the pending owners pi_waiters queue. So
|
||||
* we have to enqueue this waiter into
|
||||
* current->pi_waiters list. This covers the case,
|
||||
* where current is boosted because it holds another
|
||||
* lock and gets unboosted because the booster is
|
||||
* interrupted, so we would delay a waiter with higher
|
||||
* priority as current->normal_prio.
|
||||
*
|
||||
* Note: in the rare case of a SCHED_OTHER task changing
|
||||
* its priority and thus stealing the lock, next->task
|
||||
* might be current:
|
||||
*/
|
||||
if (likely(next->task != current)) {
|
||||
spin_lock_irqsave(¤t->pi_lock, flags);
|
||||
plist_add(&next->pi_list_entry, ¤t->pi_waiters);
|
||||
__rt_mutex_adjust_prio(current);
|
||||
spin_unlock_irqrestore(¤t->pi_lock, flags);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Try to take an rt-mutex
|
||||
*
|
||||
* This fails
|
||||
* - when the lock has a real owner
|
||||
* - when a different pending owner exists and has higher priority than current
|
||||
*
|
||||
* Must be called with lock->wait_lock held.
|
||||
*/
|
||||
static int try_to_take_rt_mutex(struct rt_mutex *lock)
|
||||
{
|
||||
/*
|
||||
* We have to be careful here if the atomic speedups are
|
||||
* enabled, such that, when
|
||||
* - no other waiter is on the lock
|
||||
* - the lock has been released since we did the cmpxchg
|
||||
* the lock can be released or taken while we are doing the
|
||||
* checks and marking the lock with RT_MUTEX_HAS_WAITERS.
|
||||
*
|
||||
* The atomic acquire/release aware variant of
|
||||
* mark_rt_mutex_waiters uses a cmpxchg loop. After setting
|
||||
* the WAITERS bit, the atomic release / acquire can not
|
||||
* happen anymore and lock->wait_lock protects us from the
|
||||
* non-atomic case.
|
||||
*
|
||||
* Note, that this might set lock->owner =
|
||||
* RT_MUTEX_HAS_WAITERS in the case the lock is not contended
|
||||
* any more. This is fixed up when we take the ownership.
|
||||
* This is the transitional state explained at the top of this file.
|
||||
*/
|
||||
mark_rt_mutex_waiters(lock);
|
||||
|
||||
if (rt_mutex_owner(lock) && !try_to_steal_lock(lock))
|
||||
return 0;
|
||||
|
||||
/* We got the lock. */
|
||||
debug_rt_mutex_lock(lock);
|
||||
|
||||
rt_mutex_set_owner(lock, current, 0);
|
||||
|
||||
rt_mutex_deadlock_account_lock(lock, current);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Task blocks on lock.
|
||||
*
|
||||
* Prepare waiter and propagate pi chain
|
||||
*
|
||||
* This must be called with lock->wait_lock held.
|
||||
*/
|
||||
static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
|
||||
struct rt_mutex_waiter *waiter,
|
||||
int detect_deadlock)
|
||||
{
|
||||
struct task_struct *owner = rt_mutex_owner(lock);
|
||||
struct rt_mutex_waiter *top_waiter = waiter;
|
||||
unsigned long flags;
|
||||
int chain_walk = 0, res;
|
||||
|
||||
spin_lock_irqsave(¤t->pi_lock, flags);
|
||||
__rt_mutex_adjust_prio(current);
|
||||
waiter->task = current;
|
||||
waiter->lock = lock;
|
||||
plist_node_init(&waiter->list_entry, current->prio);
|
||||
plist_node_init(&waiter->pi_list_entry, current->prio);
|
||||
|
||||
/* Get the top priority waiter on the lock */
|
||||
if (rt_mutex_has_waiters(lock))
|
||||
top_waiter = rt_mutex_top_waiter(lock);
|
||||
plist_add(&waiter->list_entry, &lock->wait_list);
|
||||
|
||||
current->pi_blocked_on = waiter;
|
||||
|
||||
spin_unlock_irqrestore(¤t->pi_lock, flags);
|
||||
|
||||
if (waiter == rt_mutex_top_waiter(lock)) {
|
||||
spin_lock_irqsave(&owner->pi_lock, flags);
|
||||
plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
|
||||
plist_add(&waiter->pi_list_entry, &owner->pi_waiters);
|
||||
|
||||
__rt_mutex_adjust_prio(owner);
|
||||
if (owner->pi_blocked_on)
|
||||
chain_walk = 1;
|
||||
spin_unlock_irqrestore(&owner->pi_lock, flags);
|
||||
}
|
||||
else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock))
|
||||
chain_walk = 1;
|
||||
|
||||
if (!chain_walk)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* The owner can't disappear while holding a lock,
|
||||
* so the owner struct is protected by wait_lock.
|
||||
* Gets dropped in rt_mutex_adjust_prio_chain()!
|
||||
*/
|
||||
get_task_struct(owner);
|
||||
|
||||
spin_unlock(&lock->wait_lock);
|
||||
|
||||
res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter,
|
||||
current);
|
||||
|
||||
spin_lock(&lock->wait_lock);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/*
|
||||
* Wake up the next waiter on the lock.
|
||||
*
|
||||
* Remove the top waiter from the current tasks waiter list and from
|
||||
* the lock waiter list. Set it as pending owner. Then wake it up.
|
||||
*
|
||||
* Called with lock->wait_lock held.
|
||||
*/
|
||||
static void wakeup_next_waiter(struct rt_mutex *lock)
|
||||
{
|
||||
struct rt_mutex_waiter *waiter;
|
||||
struct task_struct *pendowner;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(¤t->pi_lock, flags);
|
||||
|
||||
waiter = rt_mutex_top_waiter(lock);
|
||||
plist_del(&waiter->list_entry, &lock->wait_list);
|
||||
|
||||
/*
|
||||
* Remove it from current->pi_waiters. We do not adjust a
|
||||
* possible priority boost right now. We execute wakeup in the
|
||||
* boosted mode and go back to normal after releasing
|
||||
* lock->wait_lock.
|
||||
*/
|
||||
plist_del(&waiter->pi_list_entry, ¤t->pi_waiters);
|
||||
pendowner = waiter->task;
|
||||
waiter->task = NULL;
|
||||
|
||||
rt_mutex_set_owner(lock, pendowner, RT_MUTEX_OWNER_PENDING);
|
||||
|
||||
spin_unlock_irqrestore(¤t->pi_lock, flags);
|
||||
|
||||
/*
|
||||
* Clear the pi_blocked_on variable and enqueue a possible
|
||||
* waiter into the pi_waiters list of the pending owner. This
|
||||
* prevents that in case the pending owner gets unboosted a
|
||||
* waiter with higher priority than pending-owner->normal_prio
|
||||
* is blocked on the unboosted (pending) owner.
|
||||
*/
|
||||
spin_lock_irqsave(&pendowner->pi_lock, flags);
|
||||
|
||||
WARN_ON(!pendowner->pi_blocked_on);
|
||||
WARN_ON(pendowner->pi_blocked_on != waiter);
|
||||
WARN_ON(pendowner->pi_blocked_on->lock != lock);
|
||||
|
||||
pendowner->pi_blocked_on = NULL;
|
||||
|
||||
if (rt_mutex_has_waiters(lock)) {
|
||||
struct rt_mutex_waiter *next;
|
||||
|
||||
next = rt_mutex_top_waiter(lock);
|
||||
plist_add(&next->pi_list_entry, &pendowner->pi_waiters);
|
||||
}
|
||||
spin_unlock_irqrestore(&pendowner->pi_lock, flags);
|
||||
|
||||
wake_up_process(pendowner);
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove a waiter from a lock
|
||||
*
|
||||
* Must be called with lock->wait_lock held
|
||||
*/
|
||||
static void remove_waiter(struct rt_mutex *lock,
|
||||
struct rt_mutex_waiter *waiter)
|
||||
{
|
||||
int first = (waiter == rt_mutex_top_waiter(lock));
|
||||
struct task_struct *owner = rt_mutex_owner(lock);
|
||||
unsigned long flags;
|
||||
int chain_walk = 0;
|
||||
|
||||
spin_lock_irqsave(¤t->pi_lock, flags);
|
||||
plist_del(&waiter->list_entry, &lock->wait_list);
|
||||
waiter->task = NULL;
|
||||
current->pi_blocked_on = NULL;
|
||||
spin_unlock_irqrestore(¤t->pi_lock, flags);
|
||||
|
||||
if (first && owner != current) {
|
||||
|
||||
spin_lock_irqsave(&owner->pi_lock, flags);
|
||||
|
||||
plist_del(&waiter->pi_list_entry, &owner->pi_waiters);
|
||||
|
||||
if (rt_mutex_has_waiters(lock)) {
|
||||
struct rt_mutex_waiter *next;
|
||||
|
||||
next = rt_mutex_top_waiter(lock);
|
||||
plist_add(&next->pi_list_entry, &owner->pi_waiters);
|
||||
}
|
||||
__rt_mutex_adjust_prio(owner);
|
||||
|
||||
if (owner->pi_blocked_on)
|
||||
chain_walk = 1;
|
||||
|
||||
spin_unlock_irqrestore(&owner->pi_lock, flags);
|
||||
}
|
||||
|
||||
WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
|
||||
|
||||
if (!chain_walk)
|
||||
return;
|
||||
|
||||
/* gets dropped in rt_mutex_adjust_prio_chain()! */
|
||||
get_task_struct(owner);
|
||||
|
||||
spin_unlock(&lock->wait_lock);
|
||||
|
||||
rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current);
|
||||
|
||||
spin_lock(&lock->wait_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Recheck the pi chain, in case we got a priority setting
|
||||
*
|
||||
* Called from sched_setscheduler
|
||||
*/
|
||||
void rt_mutex_adjust_pi(struct task_struct *task)
|
||||
{
|
||||
struct rt_mutex_waiter *waiter;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&task->pi_lock, flags);
|
||||
|
||||
waiter = task->pi_blocked_on;
|
||||
if (!waiter || waiter->list_entry.prio == task->prio) {
|
||||
spin_unlock_irqrestore(&task->pi_lock, flags);
|
||||
return;
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&task->pi_lock, flags);
|
||||
|
||||
/* gets dropped in rt_mutex_adjust_prio_chain()! */
|
||||
get_task_struct(task);
|
||||
rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task);
|
||||
}
|
||||
|
||||
/*
|
||||
* Slow path lock function:
|
||||
*/
|
||||
static int __sched
|
||||
rt_mutex_slowlock(struct rt_mutex *lock, int state,
|
||||
struct hrtimer_sleeper *timeout,
|
||||
int detect_deadlock)
|
||||
{
|
||||
struct rt_mutex_waiter waiter;
|
||||
int ret = 0;
|
||||
|
||||
debug_rt_mutex_init_waiter(&waiter);
|
||||
waiter.task = NULL;
|
||||
|
||||
spin_lock(&lock->wait_lock);
|
||||
|
||||
/* Try to acquire the lock again: */
|
||||
if (try_to_take_rt_mutex(lock)) {
|
||||
spin_unlock(&lock->wait_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
set_current_state(state);
|
||||
|
||||
/* Setup the timer, when timeout != NULL */
|
||||
if (unlikely(timeout))
|
||||
hrtimer_start(&timeout->timer, timeout->timer.expires,
|
||||
HRTIMER_MODE_ABS);
|
||||
|
||||
for (;;) {
|
||||
/* Try to acquire the lock: */
|
||||
if (try_to_take_rt_mutex(lock))
|
||||
break;
|
||||
|
||||
/*
|
||||
* TASK_INTERRUPTIBLE checks for signals and
|
||||
* timeout. Ignored otherwise.
|
||||
*/
|
||||
if (unlikely(state == TASK_INTERRUPTIBLE)) {
|
||||
/* Signal pending? */
|
||||
if (signal_pending(current))
|
||||
ret = -EINTR;
|
||||
if (timeout && !timeout->task)
|
||||
ret = -ETIMEDOUT;
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* waiter.task is NULL the first time we come here and
|
||||
* when we have been woken up by the previous owner
|
||||
* but the lock got stolen by a higher prio task.
|
||||
*/
|
||||
if (!waiter.task) {
|
||||
ret = task_blocks_on_rt_mutex(lock, &waiter,
|
||||
detect_deadlock);
|
||||
/*
|
||||
* If we got woken up by the owner then start loop
|
||||
* all over without going into schedule to try
|
||||
* to get the lock now:
|
||||
*/
|
||||
if (unlikely(!waiter.task))
|
||||
continue;
|
||||
|
||||
if (unlikely(ret))
|
||||
break;
|
||||
}
|
||||
|
||||
spin_unlock(&lock->wait_lock);
|
||||
|
||||
debug_rt_mutex_print_deadlock(&waiter);
|
||||
|
||||
if (waiter.task)
|
||||
schedule_rt_mutex(lock);
|
||||
|
||||
spin_lock(&lock->wait_lock);
|
||||
set_current_state(state);
|
||||
}
|
||||
|
||||
set_current_state(TASK_RUNNING);
|
||||
|
||||
if (unlikely(waiter.task))
|
||||
remove_waiter(lock, &waiter);
|
||||
|
||||
/*
|
||||
* try_to_take_rt_mutex() sets the waiter bit
|
||||
* unconditionally. We might have to fix that up.
|
||||
*/
|
||||
fixup_rt_mutex_waiters(lock);
|
||||
|
||||
spin_unlock(&lock->wait_lock);
|
||||
|
||||
/* Remove pending timer: */
|
||||
if (unlikely(timeout))
|
||||
hrtimer_cancel(&timeout->timer);
|
||||
|
||||
/*
|
||||
* Readjust priority, when we did not get the lock. We might
|
||||
* have been the pending owner and boosted. Since we did not
|
||||
* take the lock, the PI boost has to go.
|
||||
*/
|
||||
if (unlikely(ret))
|
||||
rt_mutex_adjust_prio(current);
|
||||
|
||||
debug_rt_mutex_free_waiter(&waiter);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Slow path try-lock function:
|
||||
*/
|
||||
static inline int
|
||||
rt_mutex_slowtrylock(struct rt_mutex *lock)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
spin_lock(&lock->wait_lock);
|
||||
|
||||
if (likely(rt_mutex_owner(lock) != current)) {
|
||||
|
||||
ret = try_to_take_rt_mutex(lock);
|
||||
/*
|
||||
* try_to_take_rt_mutex() sets the lock waiters
|
||||
* bit unconditionally. Clean this up.
|
||||
*/
|
||||
fixup_rt_mutex_waiters(lock);
|
||||
}
|
||||
|
||||
spin_unlock(&lock->wait_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Slow path to release a rt-mutex:
|
||||
*/
|
||||
static void __sched
|
||||
rt_mutex_slowunlock(struct rt_mutex *lock)
|
||||
{
|
||||
spin_lock(&lock->wait_lock);
|
||||
|
||||
debug_rt_mutex_unlock(lock);
|
||||
|
||||
rt_mutex_deadlock_account_unlock(current);
|
||||
|
||||
if (!rt_mutex_has_waiters(lock)) {
|
||||
lock->owner = NULL;
|
||||
spin_unlock(&lock->wait_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
wakeup_next_waiter(lock);
|
||||
|
||||
spin_unlock(&lock->wait_lock);
|
||||
|
||||
/* Undo pi boosting if necessary: */
|
||||
rt_mutex_adjust_prio(current);
|
||||
}
|
||||
|
||||
/*
|
||||
* debug aware fast / slowpath lock,trylock,unlock
|
||||
*
|
||||
* The atomic acquire/release ops are compiled away, when either the
|
||||
* architecture does not support cmpxchg or when debugging is enabled.
|
||||
*/
|
||||
static inline int
|
||||
rt_mutex_fastlock(struct rt_mutex *lock, int state,
|
||||
int detect_deadlock,
|
||||
int (*slowfn)(struct rt_mutex *lock, int state,
|
||||
struct hrtimer_sleeper *timeout,
|
||||
int detect_deadlock))
|
||||
{
|
||||
if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
|
||||
rt_mutex_deadlock_account_lock(lock, current);
|
||||
return 0;
|
||||
} else
|
||||
return slowfn(lock, state, NULL, detect_deadlock);
|
||||
}
|
||||
|
||||
static inline int
|
||||
rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
|
||||
struct hrtimer_sleeper *timeout, int detect_deadlock,
|
||||
int (*slowfn)(struct rt_mutex *lock, int state,
|
||||
struct hrtimer_sleeper *timeout,
|
||||
int detect_deadlock))
|
||||
{
|
||||
if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
|
||||
rt_mutex_deadlock_account_lock(lock, current);
|
||||
return 0;
|
||||
} else
|
||||
return slowfn(lock, state, timeout, detect_deadlock);
|
||||
}
|
||||
|
||||
static inline int
|
||||
rt_mutex_fasttrylock(struct rt_mutex *lock,
|
||||
int (*slowfn)(struct rt_mutex *lock))
|
||||
{
|
||||
if (likely(rt_mutex_cmpxchg(lock, NULL, current))) {
|
||||
rt_mutex_deadlock_account_lock(lock, current);
|
||||
return 1;
|
||||
}
|
||||
return slowfn(lock);
|
||||
}
|
||||
|
||||
static inline void
|
||||
rt_mutex_fastunlock(struct rt_mutex *lock,
|
||||
void (*slowfn)(struct rt_mutex *lock))
|
||||
{
|
||||
if (likely(rt_mutex_cmpxchg(lock, current, NULL)))
|
||||
rt_mutex_deadlock_account_unlock(current);
|
||||
else
|
||||
slowfn(lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* rt_mutex_lock - lock a rt_mutex
|
||||
*
|
||||
* @lock: the rt_mutex to be locked
|
||||
*/
|
||||
void __sched rt_mutex_lock(struct rt_mutex *lock)
|
||||
{
|
||||
might_sleep();
|
||||
|
||||
rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, 0, rt_mutex_slowlock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rt_mutex_lock);
|
||||
|
||||
/**
|
||||
* rt_mutex_lock_interruptible - lock a rt_mutex interruptible
|
||||
*
|
||||
* @lock: the rt_mutex to be locked
|
||||
* @detect_deadlock: deadlock detection on/off
|
||||
*
|
||||
* Returns:
|
||||
* 0 on success
|
||||
* -EINTR when interrupted by a signal
|
||||
* -EDEADLK when the lock would deadlock (when deadlock detection is on)
|
||||
*/
|
||||
int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock,
|
||||
int detect_deadlock)
|
||||
{
|
||||
might_sleep();
|
||||
|
||||
return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE,
|
||||
detect_deadlock, rt_mutex_slowlock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
|
||||
|
||||
/**
|
||||
* rt_mutex_lock_interruptible_ktime - lock a rt_mutex interruptible
|
||||
* the timeout structure is provided
|
||||
* by the caller
|
||||
*
|
||||
* @lock: the rt_mutex to be locked
|
||||
* @timeout: timeout structure or NULL (no timeout)
|
||||
* @detect_deadlock: deadlock detection on/off
|
||||
*
|
||||
* Returns:
|
||||
* 0 on success
|
||||
* -EINTR when interrupted by a signal
|
||||
* -ETIMEOUT when the timeout expired
|
||||
* -EDEADLK when the lock would deadlock (when deadlock detection is on)
|
||||
*/
|
||||
int
|
||||
rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout,
|
||||
int detect_deadlock)
|
||||
{
|
||||
might_sleep();
|
||||
|
||||
return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
|
||||
detect_deadlock, rt_mutex_slowlock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
|
||||
|
||||
/**
|
||||
* rt_mutex_trylock - try to lock a rt_mutex
|
||||
*
|
||||
* @lock: the rt_mutex to be locked
|
||||
*
|
||||
* Returns 1 on success and 0 on contention
|
||||
*/
|
||||
int __sched rt_mutex_trylock(struct rt_mutex *lock)
|
||||
{
|
||||
return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rt_mutex_trylock);
|
||||
|
||||
/**
|
||||
* rt_mutex_unlock - unlock a rt_mutex
|
||||
*
|
||||
* @lock: the rt_mutex to be unlocked
|
||||
*/
|
||||
void __sched rt_mutex_unlock(struct rt_mutex *lock)
|
||||
{
|
||||
rt_mutex_fastunlock(lock, rt_mutex_slowunlock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rt_mutex_unlock);
|
||||
|
||||
/***
|
||||
* rt_mutex_destroy - mark a mutex unusable
|
||||
* @lock: the mutex to be destroyed
|
||||
*
|
||||
* This function marks the mutex uninitialized, and any subsequent
|
||||
* use of the mutex is forbidden. The mutex must not be locked when
|
||||
* this function is called.
|
||||
*/
|
||||
void rt_mutex_destroy(struct rt_mutex *lock)
|
||||
{
|
||||
WARN_ON(rt_mutex_is_locked(lock));
|
||||
#ifdef CONFIG_DEBUG_RT_MUTEXES
|
||||
lock->magic = NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(rt_mutex_destroy);
|
||||
|
||||
/**
|
||||
* __rt_mutex_init - initialize the rt lock
|
||||
*
|
||||
* @lock: the rt lock to be initialized
|
||||
*
|
||||
* Initialize the rt lock to unlocked state.
|
||||
*
|
||||
* Initializing of a locked rt lock is not allowed
|
||||
*/
|
||||
void __rt_mutex_init(struct rt_mutex *lock, const char *name)
|
||||
{
|
||||
lock->owner = NULL;
|
||||
spin_lock_init(&lock->wait_lock);
|
||||
plist_head_init(&lock->wait_list, &lock->wait_lock);
|
||||
|
||||
debug_rt_mutex_init(lock, name);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__rt_mutex_init);
|
||||
|
||||
/**
|
||||
* rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
|
||||
* proxy owner
|
||||
*
|
||||
* @lock: the rt_mutex to be locked
|
||||
* @proxy_owner:the task to set as owner
|
||||
*
|
||||
* No locking. Caller has to do serializing itself
|
||||
* Special API call for PI-futex support
|
||||
*/
|
||||
void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
|
||||
struct task_struct *proxy_owner)
|
||||
{
|
||||
__rt_mutex_init(lock, NULL);
|
||||
debug_rt_mutex_proxy_lock(lock, proxy_owner);
|
||||
rt_mutex_set_owner(lock, proxy_owner, 0);
|
||||
rt_mutex_deadlock_account_lock(lock, proxy_owner);
|
||||
}
|
||||
|
||||
/**
|
||||
* rt_mutex_proxy_unlock - release a lock on behalf of owner
|
||||
*
|
||||
* @lock: the rt_mutex to be locked
|
||||
*
|
||||
* No locking. Caller has to do serializing itself
|
||||
* Special API call for PI-futex support
|
||||
*/
|
||||
void rt_mutex_proxy_unlock(struct rt_mutex *lock,
|
||||
struct task_struct *proxy_owner)
|
||||
{
|
||||
debug_rt_mutex_proxy_unlock(lock);
|
||||
rt_mutex_set_owner(lock, NULL, 0);
|
||||
rt_mutex_deadlock_account_unlock(proxy_owner);
|
||||
}
|
||||
|
||||
/**
|
||||
* rt_mutex_next_owner - return the next owner of the lock
|
||||
*
|
||||
* @lock: the rt lock query
|
||||
*
|
||||
* Returns the next owner of the lock or NULL
|
||||
*
|
||||
* Caller has to serialize against other accessors to the lock
|
||||
* itself.
|
||||
*
|
||||
* Special API call for PI-futex support
|
||||
*/
|
||||
struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock)
|
||||
{
|
||||
if (!rt_mutex_has_waiters(lock))
|
||||
return NULL;
|
||||
|
||||
return rt_mutex_top_waiter(lock)->task;
|
||||
}
|
||||
26
kernel/rtmutex.h
Normal file
26
kernel/rtmutex.h
Normal file
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
* RT-Mutexes: blocking mutual exclusion locks with PI support
|
||||
*
|
||||
* started by Ingo Molnar and Thomas Gleixner:
|
||||
*
|
||||
* Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
|
||||
* Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
|
||||
*
|
||||
* This file contains macros used solely by rtmutex.c.
|
||||
* Non-debug version.
|
||||
*/
|
||||
|
||||
#define rt_mutex_deadlock_check(l) (0)
|
||||
#define rt_mutex_deadlock_account_lock(m, t) do { } while (0)
|
||||
#define rt_mutex_deadlock_account_unlock(l) do { } while (0)
|
||||
#define debug_rt_mutex_init_waiter(w) do { } while (0)
|
||||
#define debug_rt_mutex_free_waiter(w) do { } while (0)
|
||||
#define debug_rt_mutex_lock(l) do { } while (0)
|
||||
#define debug_rt_mutex_proxy_lock(l,p) do { } while (0)
|
||||
#define debug_rt_mutex_proxy_unlock(l) do { } while (0)
|
||||
#define debug_rt_mutex_unlock(l) do { } while (0)
|
||||
#define debug_rt_mutex_init(m, n) do { } while (0)
|
||||
#define debug_rt_mutex_deadlock(d, a ,l) do { } while (0)
|
||||
#define debug_rt_mutex_print_deadlock(w) do { } while (0)
|
||||
#define debug_rt_mutex_detect_deadlock(w,d) (d)
|
||||
#define debug_rt_mutex_reset_waiter(w) do { } while (0)
|
||||
123
kernel/rtmutex_common.h
Normal file
123
kernel/rtmutex_common.h
Normal file
@@ -0,0 +1,123 @@
|
||||
/*
|
||||
* RT Mutexes: blocking mutual exclusion locks with PI support
|
||||
*
|
||||
* started by Ingo Molnar and Thomas Gleixner:
|
||||
*
|
||||
* Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
|
||||
* Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
|
||||
*
|
||||
* This file contains the private data structure and API definitions.
|
||||
*/
|
||||
|
||||
#ifndef __KERNEL_RTMUTEX_COMMON_H
|
||||
#define __KERNEL_RTMUTEX_COMMON_H
|
||||
|
||||
#include <linux/rtmutex.h>
|
||||
|
||||
/*
|
||||
* The rtmutex in kernel tester is independent of rtmutex debugging. We
|
||||
* call schedule_rt_mutex_test() instead of schedule() for the tasks which
|
||||
* belong to the tester. That way we can delay the wakeup path of those
|
||||
* threads to provoke lock stealing and testing of complex boosting scenarios.
|
||||
*/
|
||||
#ifdef CONFIG_RT_MUTEX_TESTER
|
||||
|
||||
extern void schedule_rt_mutex_test(struct rt_mutex *lock);
|
||||
|
||||
#define schedule_rt_mutex(_lock) \
|
||||
do { \
|
||||
if (!(current->flags & PF_MUTEX_TESTER)) \
|
||||
schedule(); \
|
||||
else \
|
||||
schedule_rt_mutex_test(_lock); \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
# define schedule_rt_mutex(_lock) schedule()
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This is the control structure for tasks blocked on a rt_mutex,
|
||||
* which is allocated on the kernel stack on of the blocked task.
|
||||
*
|
||||
* @list_entry: pi node to enqueue into the mutex waiters list
|
||||
* @pi_list_entry: pi node to enqueue into the mutex owner waiters list
|
||||
* @task: task reference to the blocked task
|
||||
*/
|
||||
struct rt_mutex_waiter {
|
||||
struct plist_node list_entry;
|
||||
struct plist_node pi_list_entry;
|
||||
struct task_struct *task;
|
||||
struct rt_mutex *lock;
|
||||
#ifdef CONFIG_DEBUG_RT_MUTEXES
|
||||
unsigned long ip;
|
||||
pid_t deadlock_task_pid;
|
||||
struct rt_mutex *deadlock_lock;
|
||||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
* Various helpers to access the waiters-plist:
|
||||
*/
|
||||
static inline int rt_mutex_has_waiters(struct rt_mutex *lock)
|
||||
{
|
||||
return !plist_head_empty(&lock->wait_list);
|
||||
}
|
||||
|
||||
static inline struct rt_mutex_waiter *
|
||||
rt_mutex_top_waiter(struct rt_mutex *lock)
|
||||
{
|
||||
struct rt_mutex_waiter *w;
|
||||
|
||||
w = plist_first_entry(&lock->wait_list, struct rt_mutex_waiter,
|
||||
list_entry);
|
||||
BUG_ON(w->lock != lock);
|
||||
|
||||
return w;
|
||||
}
|
||||
|
||||
static inline int task_has_pi_waiters(struct task_struct *p)
|
||||
{
|
||||
return !plist_head_empty(&p->pi_waiters);
|
||||
}
|
||||
|
||||
static inline struct rt_mutex_waiter *
|
||||
task_top_pi_waiter(struct task_struct *p)
|
||||
{
|
||||
return plist_first_entry(&p->pi_waiters, struct rt_mutex_waiter,
|
||||
pi_list_entry);
|
||||
}
|
||||
|
||||
/*
|
||||
* lock->owner state tracking:
|
||||
*/
|
||||
#define RT_MUTEX_OWNER_PENDING 1UL
|
||||
#define RT_MUTEX_HAS_WAITERS 2UL
|
||||
#define RT_MUTEX_OWNER_MASKALL 3UL
|
||||
|
||||
static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock)
|
||||
{
|
||||
return (struct task_struct *)
|
||||
((unsigned long)lock->owner & ~RT_MUTEX_OWNER_MASKALL);
|
||||
}
|
||||
|
||||
static inline struct task_struct *rt_mutex_real_owner(struct rt_mutex *lock)
|
||||
{
|
||||
return (struct task_struct *)
|
||||
((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
|
||||
}
|
||||
|
||||
static inline unsigned long rt_mutex_owner_pending(struct rt_mutex *lock)
|
||||
{
|
||||
return (unsigned long)lock->owner & RT_MUTEX_OWNER_PENDING;
|
||||
}
|
||||
|
||||
/*
|
||||
* PI-futex support (proxy locking functions, etc.):
|
||||
*/
|
||||
extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
|
||||
extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
|
||||
struct task_struct *proxy_owner);
|
||||
extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
|
||||
struct task_struct *proxy_owner);
|
||||
#endif
|
||||
147
kernel/rwsem.c
Normal file
147
kernel/rwsem.c
Normal file
@@ -0,0 +1,147 @@
|
||||
/* kernel/rwsem.c: R/W semaphores, public implementation
|
||||
*
|
||||
* Written by David Howells (dhowells@redhat.com).
|
||||
* Derived from asm-i386/semaphore.h
|
||||
*/
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/rwsem.h>
|
||||
|
||||
#include <asm/system.h>
|
||||
#include <asm/atomic.h>
|
||||
|
||||
/*
|
||||
* lock for reading
|
||||
*/
|
||||
void down_read(struct rw_semaphore *sem)
|
||||
{
|
||||
might_sleep();
|
||||
rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
|
||||
|
||||
__down_read(sem);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(down_read);
|
||||
|
||||
/*
|
||||
* trylock for reading -- returns 1 if successful, 0 if contention
|
||||
*/
|
||||
int down_read_trylock(struct rw_semaphore *sem)
|
||||
{
|
||||
int ret = __down_read_trylock(sem);
|
||||
|
||||
if (ret == 1)
|
||||
rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
|
||||
return ret;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(down_read_trylock);
|
||||
|
||||
/*
|
||||
* lock for writing
|
||||
*/
|
||||
void down_write(struct rw_semaphore *sem)
|
||||
{
|
||||
might_sleep();
|
||||
rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
|
||||
|
||||
__down_write(sem);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(down_write);
|
||||
|
||||
/*
|
||||
* trylock for writing -- returns 1 if successful, 0 if contention
|
||||
*/
|
||||
int down_write_trylock(struct rw_semaphore *sem)
|
||||
{
|
||||
int ret = __down_write_trylock(sem);
|
||||
|
||||
if (ret == 1)
|
||||
rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
|
||||
return ret;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(down_write_trylock);
|
||||
|
||||
/*
|
||||
* release a read lock
|
||||
*/
|
||||
void up_read(struct rw_semaphore *sem)
|
||||
{
|
||||
rwsem_release(&sem->dep_map, 1, _RET_IP_);
|
||||
|
||||
__up_read(sem);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(up_read);
|
||||
|
||||
/*
|
||||
* release a write lock
|
||||
*/
|
||||
void up_write(struct rw_semaphore *sem)
|
||||
{
|
||||
rwsem_release(&sem->dep_map, 1, _RET_IP_);
|
||||
|
||||
__up_write(sem);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(up_write);
|
||||
|
||||
/*
|
||||
* downgrade write lock to read lock
|
||||
*/
|
||||
void downgrade_write(struct rw_semaphore *sem)
|
||||
{
|
||||
/*
|
||||
* lockdep: a downgraded write will live on as a write
|
||||
* dependency.
|
||||
*/
|
||||
__downgrade_write(sem);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(downgrade_write);
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
|
||||
void down_read_nested(struct rw_semaphore *sem, int subclass)
|
||||
{
|
||||
might_sleep();
|
||||
rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
|
||||
|
||||
__down_read(sem);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(down_read_nested);
|
||||
|
||||
void down_read_non_owner(struct rw_semaphore *sem)
|
||||
{
|
||||
might_sleep();
|
||||
|
||||
__down_read(sem);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(down_read_non_owner);
|
||||
|
||||
void down_write_nested(struct rw_semaphore *sem, int subclass)
|
||||
{
|
||||
might_sleep();
|
||||
rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
|
||||
|
||||
__down_write_nested(sem, subclass);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(down_write_nested);
|
||||
|
||||
void up_read_non_owner(struct rw_semaphore *sem)
|
||||
{
|
||||
__up_read(sem);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(up_read_non_owner);
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
6462
kernel/sched.c
Normal file
6462
kernel/sched.c
Normal file
File diff suppressed because it is too large
Load Diff
276
kernel/sched_debug.c
Normal file
276
kernel/sched_debug.c
Normal file
@@ -0,0 +1,276 @@
|
||||
/*
|
||||
* kernel/time/sched_debug.c
|
||||
*
|
||||
* Print the CFS rbtree
|
||||
*
|
||||
* Copyright(C) 2007, Red Hat, Inc., Ingo Molnar
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/utsname.h>
|
||||
|
||||
typedef void (*print_fn_t)(struct seq_file *m, unsigned int *classes);
|
||||
|
||||
/*
|
||||
* This allows printing both to /proc/sched_debug and
|
||||
* to the console
|
||||
*/
|
||||
#define SEQ_printf(m, x...) \
|
||||
do { \
|
||||
if (m) \
|
||||
seq_printf(m, x); \
|
||||
else \
|
||||
printk(x); \
|
||||
} while (0)
|
||||
|
||||
static void
|
||||
print_task(struct seq_file *m, struct rq *rq, struct task_struct *p, u64 now)
|
||||
{
|
||||
if (rq->curr == p)
|
||||
SEQ_printf(m, "R");
|
||||
else
|
||||
SEQ_printf(m, " ");
|
||||
|
||||
SEQ_printf(m, "%15s %5d %15Ld %13Ld %13Ld %9Ld %5d "
|
||||
"%15Ld %15Ld %15Ld %15Ld %15Ld\n",
|
||||
p->comm, p->pid,
|
||||
(long long)p->se.fair_key,
|
||||
(long long)(p->se.fair_key - rq->cfs.fair_clock),
|
||||
(long long)p->se.wait_runtime,
|
||||
(long long)(p->nvcsw + p->nivcsw),
|
||||
p->prio,
|
||||
(long long)p->se.sum_exec_runtime,
|
||||
(long long)p->se.sum_wait_runtime,
|
||||
(long long)p->se.sum_sleep_runtime,
|
||||
(long long)p->se.wait_runtime_overruns,
|
||||
(long long)p->se.wait_runtime_underruns);
|
||||
}
|
||||
|
||||
static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu, u64 now)
|
||||
{
|
||||
struct task_struct *g, *p;
|
||||
|
||||
SEQ_printf(m,
|
||||
"\nrunnable tasks:\n"
|
||||
" task PID tree-key delta waiting"
|
||||
" switches prio"
|
||||
" sum-exec sum-wait sum-sleep"
|
||||
" wait-overrun wait-underrun\n"
|
||||
"------------------------------------------------------------------"
|
||||
"----------------"
|
||||
"------------------------------------------------"
|
||||
"--------------------------------\n");
|
||||
|
||||
read_lock_irq(&tasklist_lock);
|
||||
|
||||
do_each_thread(g, p) {
|
||||
if (!p->se.on_rq || task_cpu(p) != rq_cpu)
|
||||
continue;
|
||||
|
||||
print_task(m, rq, p, now);
|
||||
} while_each_thread(g, p);
|
||||
|
||||
read_unlock_irq(&tasklist_lock);
|
||||
}
|
||||
|
||||
static void
|
||||
print_cfs_rq_runtime_sum(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
|
||||
{
|
||||
s64 wait_runtime_rq_sum = 0;
|
||||
struct task_struct *p;
|
||||
struct rb_node *curr;
|
||||
unsigned long flags;
|
||||
struct rq *rq = &per_cpu(runqueues, cpu);
|
||||
|
||||
spin_lock_irqsave(&rq->lock, flags);
|
||||
curr = first_fair(cfs_rq);
|
||||
while (curr) {
|
||||
p = rb_entry(curr, struct task_struct, se.run_node);
|
||||
wait_runtime_rq_sum += p->se.wait_runtime;
|
||||
|
||||
curr = rb_next(curr);
|
||||
}
|
||||
spin_unlock_irqrestore(&rq->lock, flags);
|
||||
|
||||
SEQ_printf(m, " .%-30s: %Ld\n", "wait_runtime_rq_sum",
|
||||
(long long)wait_runtime_rq_sum);
|
||||
}
|
||||
|
||||
void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq, u64 now)
|
||||
{
|
||||
SEQ_printf(m, "\ncfs_rq %p\n", cfs_rq);
|
||||
|
||||
#define P(x) \
|
||||
SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(cfs_rq->x))
|
||||
|
||||
P(fair_clock);
|
||||
P(exec_clock);
|
||||
P(wait_runtime);
|
||||
P(wait_runtime_overruns);
|
||||
P(wait_runtime_underruns);
|
||||
P(sleeper_bonus);
|
||||
#undef P
|
||||
|
||||
print_cfs_rq_runtime_sum(m, cpu, cfs_rq);
|
||||
}
|
||||
|
||||
static void print_cpu(struct seq_file *m, int cpu, u64 now)
|
||||
{
|
||||
struct rq *rq = &per_cpu(runqueues, cpu);
|
||||
|
||||
#ifdef CONFIG_X86
|
||||
{
|
||||
unsigned int freq = cpu_khz ? : 1;
|
||||
|
||||
SEQ_printf(m, "\ncpu#%d, %u.%03u MHz\n",
|
||||
cpu, freq / 1000, (freq % 1000));
|
||||
}
|
||||
#else
|
||||
SEQ_printf(m, "\ncpu#%d\n", cpu);
|
||||
#endif
|
||||
|
||||
#define P(x) \
|
||||
SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rq->x))
|
||||
|
||||
P(nr_running);
|
||||
SEQ_printf(m, " .%-30s: %lu\n", "load",
|
||||
rq->ls.load.weight);
|
||||
P(ls.delta_fair);
|
||||
P(ls.delta_exec);
|
||||
P(nr_switches);
|
||||
P(nr_load_updates);
|
||||
P(nr_uninterruptible);
|
||||
SEQ_printf(m, " .%-30s: %lu\n", "jiffies", jiffies);
|
||||
P(next_balance);
|
||||
P(curr->pid);
|
||||
P(clock);
|
||||
P(prev_clock_raw);
|
||||
P(clock_warps);
|
||||
P(clock_overflows);
|
||||
P(clock_unstable_events);
|
||||
P(clock_max_delta);
|
||||
P(cpu_load[0]);
|
||||
P(cpu_load[1]);
|
||||
P(cpu_load[2]);
|
||||
P(cpu_load[3]);
|
||||
P(cpu_load[4]);
|
||||
#undef P
|
||||
|
||||
print_cfs_stats(m, cpu, now);
|
||||
|
||||
print_rq(m, rq, cpu, now);
|
||||
}
|
||||
|
||||
static int sched_debug_show(struct seq_file *m, void *v)
|
||||
{
|
||||
u64 now = ktime_to_ns(ktime_get());
|
||||
int cpu;
|
||||
|
||||
SEQ_printf(m, "Sched Debug Version: v0.04, cfs-v19, %s %.*s\n",
|
||||
init_utsname()->release,
|
||||
(int)strcspn(init_utsname()->version, " "),
|
||||
init_utsname()->version);
|
||||
|
||||
SEQ_printf(m, "now at %Lu nsecs\n", (unsigned long long)now);
|
||||
|
||||
for_each_online_cpu(cpu)
|
||||
print_cpu(m, cpu, now);
|
||||
|
||||
SEQ_printf(m, "\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void sysrq_sched_debug_show(void)
|
||||
{
|
||||
sched_debug_show(NULL, NULL);
|
||||
}
|
||||
|
||||
static int sched_debug_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
return single_open(filp, sched_debug_show, NULL);
|
||||
}
|
||||
|
||||
static struct file_operations sched_debug_fops = {
|
||||
.open = sched_debug_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = seq_release,
|
||||
};
|
||||
|
||||
static int __init init_sched_debug_procfs(void)
|
||||
{
|
||||
struct proc_dir_entry *pe;
|
||||
|
||||
pe = create_proc_entry("sched_debug", 0644, NULL);
|
||||
if (!pe)
|
||||
return -ENOMEM;
|
||||
|
||||
pe->proc_fops = &sched_debug_fops;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
__initcall(init_sched_debug_procfs);
|
||||
|
||||
void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
|
||||
{
|
||||
unsigned long flags;
|
||||
int num_threads = 1;
|
||||
|
||||
rcu_read_lock();
|
||||
if (lock_task_sighand(p, &flags)) {
|
||||
num_threads = atomic_read(&p->signal->count);
|
||||
unlock_task_sighand(p, &flags);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, p->pid, num_threads);
|
||||
SEQ_printf(m, "----------------------------------------------\n");
|
||||
#define P(F) \
|
||||
SEQ_printf(m, "%-25s:%20Ld\n", #F, (long long)p->F)
|
||||
|
||||
P(se.wait_start);
|
||||
P(se.wait_start_fair);
|
||||
P(se.exec_start);
|
||||
P(se.sleep_start);
|
||||
P(se.sleep_start_fair);
|
||||
P(se.block_start);
|
||||
P(se.sleep_max);
|
||||
P(se.block_max);
|
||||
P(se.exec_max);
|
||||
P(se.wait_max);
|
||||
P(se.wait_runtime);
|
||||
P(se.wait_runtime_overruns);
|
||||
P(se.wait_runtime_underruns);
|
||||
P(se.sum_wait_runtime);
|
||||
P(se.sum_exec_runtime);
|
||||
SEQ_printf(m, "%-25s:%20Ld\n",
|
||||
"nr_switches", (long long)(p->nvcsw + p->nivcsw));
|
||||
P(se.load.weight);
|
||||
P(policy);
|
||||
P(prio);
|
||||
#undef P
|
||||
|
||||
{
|
||||
u64 t0, t1;
|
||||
|
||||
t0 = sched_clock();
|
||||
t1 = sched_clock();
|
||||
SEQ_printf(m, "%-25s:%20Ld\n", "clock-delta", (long long)(t1-t0));
|
||||
}
|
||||
}
|
||||
|
||||
void proc_sched_set_task(struct task_struct *p)
|
||||
{
|
||||
p->se.sleep_max = p->se.block_max = p->se.exec_max = p->se.wait_max = 0;
|
||||
p->se.wait_runtime_overruns = p->se.wait_runtime_underruns = 0;
|
||||
p->se.sum_exec_runtime = 0;
|
||||
}
|
||||
1107
kernel/sched_fair.c
Normal file
1107
kernel/sched_fair.c
Normal file
File diff suppressed because it is too large
Load Diff
71
kernel/sched_idletask.c
Normal file
71
kernel/sched_idletask.c
Normal file
@@ -0,0 +1,71 @@
|
||||
/*
|
||||
* idle-task scheduling class.
|
||||
*
|
||||
* (NOTE: these are not related to SCHED_IDLE tasks which are
|
||||
* handled in sched_fair.c)
|
||||
*/
|
||||
|
||||
/*
|
||||
* Idle tasks are unconditionally rescheduled:
|
||||
*/
|
||||
static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
resched_task(rq->idle);
|
||||
}
|
||||
|
||||
static struct task_struct *pick_next_task_idle(struct rq *rq, u64 now)
|
||||
{
|
||||
schedstat_inc(rq, sched_goidle);
|
||||
|
||||
return rq->idle;
|
||||
}
|
||||
|
||||
/*
|
||||
* It is not legal to sleep in the idle task - print a warning
|
||||
* message if some code attempts to do it:
|
||||
*/
|
||||
static void
|
||||
dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep, u64 now)
|
||||
{
|
||||
spin_unlock_irq(&rq->lock);
|
||||
printk(KERN_ERR "bad: scheduling from the idle thread!\n");
|
||||
dump_stack();
|
||||
spin_lock_irq(&rq->lock);
|
||||
}
|
||||
|
||||
static void put_prev_task_idle(struct rq *rq, struct task_struct *prev, u64 now)
|
||||
{
|
||||
}
|
||||
|
||||
static int
|
||||
load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
unsigned long max_nr_move, unsigned long max_load_move,
|
||||
struct sched_domain *sd, enum cpu_idle_type idle,
|
||||
int *all_pinned, unsigned long *total_load_moved)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void task_tick_idle(struct rq *rq, struct task_struct *curr)
|
||||
{
|
||||
}
|
||||
|
||||
/*
|
||||
* Simple, special scheduling class for the per-CPU idle tasks:
|
||||
*/
|
||||
static struct sched_class idle_sched_class __read_mostly = {
|
||||
/* no enqueue/yield_task for idle tasks */
|
||||
|
||||
/* dequeue is not valid, we print a debug message there: */
|
||||
.dequeue_task = dequeue_task_idle,
|
||||
|
||||
.check_preempt_curr = check_preempt_curr_idle,
|
||||
|
||||
.pick_next_task = pick_next_task_idle,
|
||||
.put_prev_task = put_prev_task_idle,
|
||||
|
||||
.load_balance = load_balance_idle,
|
||||
|
||||
.task_tick = task_tick_idle,
|
||||
/* no .task_new for idle tasks */
|
||||
};
|
||||
255
kernel/sched_rt.c
Normal file
255
kernel/sched_rt.c
Normal file
@@ -0,0 +1,255 @@
|
||||
/*
|
||||
* Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR
|
||||
* policies)
|
||||
*/
|
||||
|
||||
/*
|
||||
* Update the current task's runtime statistics. Skip current tasks that
|
||||
* are not in our scheduling class.
|
||||
*/
|
||||
static inline void update_curr_rt(struct rq *rq, u64 now)
|
||||
{
|
||||
struct task_struct *curr = rq->curr;
|
||||
u64 delta_exec;
|
||||
|
||||
if (!task_has_rt_policy(curr))
|
||||
return;
|
||||
|
||||
delta_exec = now - curr->se.exec_start;
|
||||
if (unlikely((s64)delta_exec < 0))
|
||||
delta_exec = 0;
|
||||
if (unlikely(delta_exec > curr->se.exec_max))
|
||||
curr->se.exec_max = delta_exec;
|
||||
|
||||
curr->se.sum_exec_runtime += delta_exec;
|
||||
curr->se.exec_start = now;
|
||||
}
|
||||
|
||||
static void
|
||||
enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup, u64 now)
|
||||
{
|
||||
struct prio_array *array = &rq->rt.active;
|
||||
|
||||
list_add_tail(&p->run_list, array->queue + p->prio);
|
||||
__set_bit(p->prio, array->bitmap);
|
||||
}
|
||||
|
||||
/*
|
||||
* Adding/removing a task to/from a priority array:
|
||||
*/
|
||||
static void
|
||||
dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep, u64 now)
|
||||
{
|
||||
struct prio_array *array = &rq->rt.active;
|
||||
|
||||
update_curr_rt(rq, now);
|
||||
|
||||
list_del(&p->run_list);
|
||||
if (list_empty(array->queue + p->prio))
|
||||
__clear_bit(p->prio, array->bitmap);
|
||||
}
|
||||
|
||||
/*
|
||||
* Put task to the end of the run list without the overhead of dequeue
|
||||
* followed by enqueue.
|
||||
*/
|
||||
static void requeue_task_rt(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
struct prio_array *array = &rq->rt.active;
|
||||
|
||||
list_move_tail(&p->run_list, array->queue + p->prio);
|
||||
}
|
||||
|
||||
static void
|
||||
yield_task_rt(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
requeue_task_rt(rq, p);
|
||||
}
|
||||
|
||||
/*
|
||||
* Preempt the current task with a newly woken task if needed:
|
||||
*/
|
||||
static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
if (p->prio < rq->curr->prio)
|
||||
resched_task(rq->curr);
|
||||
}
|
||||
|
||||
static struct task_struct * pick_next_task_rt(struct rq *rq, u64 now)
|
||||
{
|
||||
struct prio_array *array = &rq->rt.active;
|
||||
struct task_struct *next;
|
||||
struct list_head *queue;
|
||||
int idx;
|
||||
|
||||
idx = sched_find_first_bit(array->bitmap);
|
||||
if (idx >= MAX_RT_PRIO)
|
||||
return NULL;
|
||||
|
||||
queue = array->queue + idx;
|
||||
next = list_entry(queue->next, struct task_struct, run_list);
|
||||
|
||||
next->se.exec_start = now;
|
||||
|
||||
return next;
|
||||
}
|
||||
|
||||
static void put_prev_task_rt(struct rq *rq, struct task_struct *p, u64 now)
|
||||
{
|
||||
update_curr_rt(rq, now);
|
||||
p->se.exec_start = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Load-balancing iterator. Note: while the runqueue stays locked
|
||||
* during the whole iteration, the current task might be
|
||||
* dequeued so the iterator has to be dequeue-safe. Here we
|
||||
* achieve that by always pre-iterating before returning
|
||||
* the current task:
|
||||
*/
|
||||
static struct task_struct *load_balance_start_rt(void *arg)
|
||||
{
|
||||
struct rq *rq = arg;
|
||||
struct prio_array *array = &rq->rt.active;
|
||||
struct list_head *head, *curr;
|
||||
struct task_struct *p;
|
||||
int idx;
|
||||
|
||||
idx = sched_find_first_bit(array->bitmap);
|
||||
if (idx >= MAX_RT_PRIO)
|
||||
return NULL;
|
||||
|
||||
head = array->queue + idx;
|
||||
curr = head->prev;
|
||||
|
||||
p = list_entry(curr, struct task_struct, run_list);
|
||||
|
||||
curr = curr->prev;
|
||||
|
||||
rq->rt.rt_load_balance_idx = idx;
|
||||
rq->rt.rt_load_balance_head = head;
|
||||
rq->rt.rt_load_balance_curr = curr;
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
static struct task_struct *load_balance_next_rt(void *arg)
|
||||
{
|
||||
struct rq *rq = arg;
|
||||
struct prio_array *array = &rq->rt.active;
|
||||
struct list_head *head, *curr;
|
||||
struct task_struct *p;
|
||||
int idx;
|
||||
|
||||
idx = rq->rt.rt_load_balance_idx;
|
||||
head = rq->rt.rt_load_balance_head;
|
||||
curr = rq->rt.rt_load_balance_curr;
|
||||
|
||||
/*
|
||||
* If we arrived back to the head again then
|
||||
* iterate to the next queue (if any):
|
||||
*/
|
||||
if (unlikely(head == curr)) {
|
||||
int next_idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
|
||||
|
||||
if (next_idx >= MAX_RT_PRIO)
|
||||
return NULL;
|
||||
|
||||
idx = next_idx;
|
||||
head = array->queue + idx;
|
||||
curr = head->prev;
|
||||
|
||||
rq->rt.rt_load_balance_idx = idx;
|
||||
rq->rt.rt_load_balance_head = head;
|
||||
}
|
||||
|
||||
p = list_entry(curr, struct task_struct, run_list);
|
||||
|
||||
curr = curr->prev;
|
||||
|
||||
rq->rt.rt_load_balance_curr = curr;
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
static int
|
||||
load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
unsigned long max_nr_move, unsigned long max_load_move,
|
||||
struct sched_domain *sd, enum cpu_idle_type idle,
|
||||
int *all_pinned, unsigned long *load_moved)
|
||||
{
|
||||
int this_best_prio, best_prio, best_prio_seen = 0;
|
||||
int nr_moved;
|
||||
struct rq_iterator rt_rq_iterator;
|
||||
|
||||
best_prio = sched_find_first_bit(busiest->rt.active.bitmap);
|
||||
this_best_prio = sched_find_first_bit(this_rq->rt.active.bitmap);
|
||||
|
||||
/*
|
||||
* Enable handling of the case where there is more than one task
|
||||
* with the best priority. If the current running task is one
|
||||
* of those with prio==best_prio we know it won't be moved
|
||||
* and therefore it's safe to override the skip (based on load)
|
||||
* of any task we find with that prio.
|
||||
*/
|
||||
if (busiest->curr->prio == best_prio)
|
||||
best_prio_seen = 1;
|
||||
|
||||
rt_rq_iterator.start = load_balance_start_rt;
|
||||
rt_rq_iterator.next = load_balance_next_rt;
|
||||
/* pass 'busiest' rq argument into
|
||||
* load_balance_[start|next]_rt iterators
|
||||
*/
|
||||
rt_rq_iterator.arg = busiest;
|
||||
|
||||
nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move,
|
||||
max_load_move, sd, idle, all_pinned, load_moved,
|
||||
this_best_prio, best_prio, best_prio_seen,
|
||||
&rt_rq_iterator);
|
||||
|
||||
return nr_moved;
|
||||
}
|
||||
|
||||
static void task_tick_rt(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
/*
|
||||
* RR tasks need a special form of timeslice management.
|
||||
* FIFO tasks have no timeslices.
|
||||
*/
|
||||
if (p->policy != SCHED_RR)
|
||||
return;
|
||||
|
||||
if (--p->time_slice)
|
||||
return;
|
||||
|
||||
p->time_slice = static_prio_timeslice(p->static_prio);
|
||||
set_tsk_need_resched(p);
|
||||
|
||||
/* put it at the end of the queue: */
|
||||
requeue_task_rt(rq, p);
|
||||
}
|
||||
|
||||
/*
|
||||
* No parent/child timeslice management necessary for RT tasks,
|
||||
* just activate them:
|
||||
*/
|
||||
static void task_new_rt(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
activate_task(rq, p, 1);
|
||||
}
|
||||
|
||||
static struct sched_class rt_sched_class __read_mostly = {
|
||||
.enqueue_task = enqueue_task_rt,
|
||||
.dequeue_task = dequeue_task_rt,
|
||||
.yield_task = yield_task_rt,
|
||||
|
||||
.check_preempt_curr = check_preempt_curr_rt,
|
||||
|
||||
.pick_next_task = pick_next_task_rt,
|
||||
.put_prev_task = put_prev_task_rt,
|
||||
|
||||
.load_balance = load_balance_rt,
|
||||
|
||||
.task_tick = task_tick_rt,
|
||||
.task_new = task_new_rt,
|
||||
};
|
||||
235
kernel/sched_stats.h
Normal file
235
kernel/sched_stats.h
Normal file
@@ -0,0 +1,235 @@
|
||||
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
/*
|
||||
* bump this up when changing the output format or the meaning of an existing
|
||||
* format, so that tools can adapt (or abort)
|
||||
*/
|
||||
#define SCHEDSTAT_VERSION 14
|
||||
|
||||
static int show_schedstat(struct seq_file *seq, void *v)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION);
|
||||
seq_printf(seq, "timestamp %lu\n", jiffies);
|
||||
for_each_online_cpu(cpu) {
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
#ifdef CONFIG_SMP
|
||||
struct sched_domain *sd;
|
||||
int dcnt = 0;
|
||||
#endif
|
||||
|
||||
/* runqueue-specific stats */
|
||||
seq_printf(seq,
|
||||
"cpu%d %lu %lu %lu %lu %lu %lu %lu %lu %lu %llu %llu %lu",
|
||||
cpu, rq->yld_both_empty,
|
||||
rq->yld_act_empty, rq->yld_exp_empty, rq->yld_cnt,
|
||||
rq->sched_switch, rq->sched_cnt, rq->sched_goidle,
|
||||
rq->ttwu_cnt, rq->ttwu_local,
|
||||
rq->rq_sched_info.cpu_time,
|
||||
rq->rq_sched_info.run_delay, rq->rq_sched_info.pcnt);
|
||||
|
||||
seq_printf(seq, "\n");
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/* domain-specific stats */
|
||||
preempt_disable();
|
||||
for_each_domain(cpu, sd) {
|
||||
enum cpu_idle_type itype;
|
||||
char mask_str[NR_CPUS];
|
||||
|
||||
cpumask_scnprintf(mask_str, NR_CPUS, sd->span);
|
||||
seq_printf(seq, "domain%d %s", dcnt++, mask_str);
|
||||
for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES;
|
||||
itype++) {
|
||||
seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu "
|
||||
"%lu",
|
||||
sd->lb_cnt[itype],
|
||||
sd->lb_balanced[itype],
|
||||
sd->lb_failed[itype],
|
||||
sd->lb_imbalance[itype],
|
||||
sd->lb_gained[itype],
|
||||
sd->lb_hot_gained[itype],
|
||||
sd->lb_nobusyq[itype],
|
||||
sd->lb_nobusyg[itype]);
|
||||
}
|
||||
seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu"
|
||||
" %lu %lu %lu\n",
|
||||
sd->alb_cnt, sd->alb_failed, sd->alb_pushed,
|
||||
sd->sbe_cnt, sd->sbe_balanced, sd->sbe_pushed,
|
||||
sd->sbf_cnt, sd->sbf_balanced, sd->sbf_pushed,
|
||||
sd->ttwu_wake_remote, sd->ttwu_move_affine,
|
||||
sd->ttwu_move_balance);
|
||||
}
|
||||
preempt_enable();
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int schedstat_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32);
|
||||
char *buf = kmalloc(size, GFP_KERNEL);
|
||||
struct seq_file *m;
|
||||
int res;
|
||||
|
||||
if (!buf)
|
||||
return -ENOMEM;
|
||||
res = single_open(file, show_schedstat, NULL);
|
||||
if (!res) {
|
||||
m = file->private_data;
|
||||
m->buf = buf;
|
||||
m->size = size;
|
||||
} else
|
||||
kfree(buf);
|
||||
return res;
|
||||
}
|
||||
|
||||
const struct file_operations proc_schedstat_operations = {
|
||||
.open = schedstat_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
/*
|
||||
* Expects runqueue lock to be held for atomicity of update
|
||||
*/
|
||||
static inline void
|
||||
rq_sched_info_arrive(struct rq *rq, unsigned long long delta)
|
||||
{
|
||||
if (rq) {
|
||||
rq->rq_sched_info.run_delay += delta;
|
||||
rq->rq_sched_info.pcnt++;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Expects runqueue lock to be held for atomicity of update
|
||||
*/
|
||||
static inline void
|
||||
rq_sched_info_depart(struct rq *rq, unsigned long long delta)
|
||||
{
|
||||
if (rq)
|
||||
rq->rq_sched_info.cpu_time += delta;
|
||||
}
|
||||
# define schedstat_inc(rq, field) do { (rq)->field++; } while (0)
|
||||
# define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0)
|
||||
#else /* !CONFIG_SCHEDSTATS */
|
||||
static inline void
|
||||
rq_sched_info_arrive(struct rq *rq, unsigned long long delta)
|
||||
{}
|
||||
static inline void
|
||||
rq_sched_info_depart(struct rq *rq, unsigned long long delta)
|
||||
{}
|
||||
# define schedstat_inc(rq, field) do { } while (0)
|
||||
# define schedstat_add(rq, field, amt) do { } while (0)
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
|
||||
/*
|
||||
* Called when a process is dequeued from the active array and given
|
||||
* the cpu. We should note that with the exception of interactive
|
||||
* tasks, the expired queue will become the active queue after the active
|
||||
* queue is empty, without explicitly dequeuing and requeuing tasks in the
|
||||
* expired queue. (Interactive tasks may be requeued directly to the
|
||||
* active queue, thus delaying tasks in the expired queue from running;
|
||||
* see scheduler_tick()).
|
||||
*
|
||||
* This function is only called from sched_info_arrive(), rather than
|
||||
* dequeue_task(). Even though a task may be queued and dequeued multiple
|
||||
* times as it is shuffled about, we're really interested in knowing how
|
||||
* long it was from the *first* time it was queued to the time that it
|
||||
* finally hit a cpu.
|
||||
*/
|
||||
static inline void sched_info_dequeued(struct task_struct *t)
|
||||
{
|
||||
t->sched_info.last_queued = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Called when a task finally hits the cpu. We can now calculate how
|
||||
* long it was waiting to run. We also note when it began so that we
|
||||
* can keep stats on how long its timeslice is.
|
||||
*/
|
||||
static void sched_info_arrive(struct task_struct *t)
|
||||
{
|
||||
unsigned long long now = sched_clock(), delta = 0;
|
||||
|
||||
if (t->sched_info.last_queued)
|
||||
delta = now - t->sched_info.last_queued;
|
||||
sched_info_dequeued(t);
|
||||
t->sched_info.run_delay += delta;
|
||||
t->sched_info.last_arrival = now;
|
||||
t->sched_info.pcnt++;
|
||||
|
||||
rq_sched_info_arrive(task_rq(t), delta);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called when a process is queued into either the active or expired
|
||||
* array. The time is noted and later used to determine how long we
|
||||
* had to wait for us to reach the cpu. Since the expired queue will
|
||||
* become the active queue after active queue is empty, without dequeuing
|
||||
* and requeuing any tasks, we are interested in queuing to either. It
|
||||
* is unusual but not impossible for tasks to be dequeued and immediately
|
||||
* requeued in the same or another array: this can happen in sched_yield(),
|
||||
* set_user_nice(), and even load_balance() as it moves tasks from runqueue
|
||||
* to runqueue.
|
||||
*
|
||||
* This function is only called from enqueue_task(), but also only updates
|
||||
* the timestamp if it is already not set. It's assumed that
|
||||
* sched_info_dequeued() will clear that stamp when appropriate.
|
||||
*/
|
||||
static inline void sched_info_queued(struct task_struct *t)
|
||||
{
|
||||
if (unlikely(sched_info_on()))
|
||||
if (!t->sched_info.last_queued)
|
||||
t->sched_info.last_queued = sched_clock();
|
||||
}
|
||||
|
||||
/*
|
||||
* Called when a process ceases being the active-running process, either
|
||||
* voluntarily or involuntarily. Now we can calculate how long we ran.
|
||||
*/
|
||||
static inline void sched_info_depart(struct task_struct *t)
|
||||
{
|
||||
unsigned long long delta = sched_clock() - t->sched_info.last_arrival;
|
||||
|
||||
t->sched_info.cpu_time += delta;
|
||||
rq_sched_info_depart(task_rq(t), delta);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called when tasks are switched involuntarily due, typically, to expiring
|
||||
* their time slice. (This may also be called when switching to or from
|
||||
* the idle task.) We are only called when prev != next.
|
||||
*/
|
||||
static inline void
|
||||
__sched_info_switch(struct task_struct *prev, struct task_struct *next)
|
||||
{
|
||||
struct rq *rq = task_rq(prev);
|
||||
|
||||
/*
|
||||
* prev now departs the cpu. It's not interesting to record
|
||||
* stats about how efficient we were at scheduling the idle
|
||||
* process, however.
|
||||
*/
|
||||
if (prev != rq->idle)
|
||||
sched_info_depart(prev);
|
||||
|
||||
if (next != rq->idle)
|
||||
sched_info_arrive(next);
|
||||
}
|
||||
static inline void
|
||||
sched_info_switch(struct task_struct *prev, struct task_struct *next)
|
||||
{
|
||||
if (unlikely(sched_info_on()))
|
||||
__sched_info_switch(prev, next);
|
||||
}
|
||||
#else
|
||||
#define sched_info_queued(t) do { } while (0)
|
||||
#define sched_info_switch(t, next) do { } while (0)
|
||||
#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */
|
||||
|
||||
56
kernel/seccomp.c
Normal file
56
kernel/seccomp.c
Normal file
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
* linux/kernel/seccomp.c
|
||||
*
|
||||
* Copyright 2004-2005 Andrea Arcangeli <andrea@cpushare.com>
|
||||
*
|
||||
* This defines a simple but solid secure-computing mode.
|
||||
*/
|
||||
|
||||
#include <linux/seccomp.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
/* #define SECCOMP_DEBUG 1 */
|
||||
|
||||
/*
|
||||
* Secure computing mode 1 allows only read/write/exit/sigreturn.
|
||||
* To be fully secure this must be combined with rlimit
|
||||
* to limit the stack allocations too.
|
||||
*/
|
||||
static int mode1_syscalls[] = {
|
||||
__NR_seccomp_read, __NR_seccomp_write, __NR_seccomp_exit, __NR_seccomp_sigreturn,
|
||||
0, /* null terminated */
|
||||
};
|
||||
|
||||
#ifdef TIF_32BIT
|
||||
static int mode1_syscalls_32[] = {
|
||||
__NR_seccomp_read_32, __NR_seccomp_write_32, __NR_seccomp_exit_32, __NR_seccomp_sigreturn_32,
|
||||
0, /* null terminated */
|
||||
};
|
||||
#endif
|
||||
|
||||
void __secure_computing(int this_syscall)
|
||||
{
|
||||
int mode = current->seccomp.mode;
|
||||
int * syscall;
|
||||
|
||||
switch (mode) {
|
||||
case 1:
|
||||
syscall = mode1_syscalls;
|
||||
#ifdef TIF_32BIT
|
||||
if (test_thread_flag(TIF_32BIT))
|
||||
syscall = mode1_syscalls_32;
|
||||
#endif
|
||||
do {
|
||||
if (*syscall == this_syscall)
|
||||
return;
|
||||
} while (*++syscall);
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
#ifdef SECCOMP_DEBUG
|
||||
dump_stack();
|
||||
#endif
|
||||
do_exit(SIGKILL);
|
||||
}
|
||||
2644
kernel/signal.c
Normal file
2644
kernel/signal.c
Normal file
File diff suppressed because it is too large
Load Diff
656
kernel/softirq.c
Normal file
656
kernel/softirq.c
Normal file
@@ -0,0 +1,656 @@
|
||||
/*
|
||||
* linux/kernel/softirq.c
|
||||
*
|
||||
* Copyright (C) 1992 Linus Torvalds
|
||||
*
|
||||
* Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/kernel_stat.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/tick.h>
|
||||
|
||||
#include <asm/irq.h>
|
||||
/*
|
||||
- No shared variables, all the data are CPU local.
|
||||
- If a softirq needs serialization, let it serialize itself
|
||||
by its own spinlocks.
|
||||
- Even if softirq is serialized, only local cpu is marked for
|
||||
execution. Hence, we get something sort of weak cpu binding.
|
||||
Though it is still not clear, will it result in better locality
|
||||
or will not.
|
||||
|
||||
Examples:
|
||||
- NET RX softirq. It is multithreaded and does not require
|
||||
any global serialization.
|
||||
- NET TX softirq. It kicks software netdevice queues, hence
|
||||
it is logically serialized per device, but this serialization
|
||||
is invisible to common code.
|
||||
- Tasklets: serialized wrt itself.
|
||||
*/
|
||||
|
||||
#ifndef __ARCH_IRQ_STAT
|
||||
irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
|
||||
EXPORT_SYMBOL(irq_stat);
|
||||
#endif
|
||||
|
||||
static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;
|
||||
|
||||
static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
|
||||
|
||||
/*
|
||||
* we cannot loop indefinitely here to avoid userspace starvation,
|
||||
* but we also don't want to introduce a worst case 1/HZ latency
|
||||
* to the pending events, so lets the scheduler to balance
|
||||
* the softirq load for us.
|
||||
*/
|
||||
static inline void wakeup_softirqd(void)
|
||||
{
|
||||
/* Interrupts are disabled: no need to stop preemption */
|
||||
struct task_struct *tsk = __get_cpu_var(ksoftirqd);
|
||||
|
||||
if (tsk && tsk->state != TASK_RUNNING)
|
||||
wake_up_process(tsk);
|
||||
}
|
||||
|
||||
/*
|
||||
* This one is for softirq.c-internal use,
|
||||
* where hardirqs are disabled legitimately:
|
||||
*/
|
||||
#ifdef CONFIG_TRACE_IRQFLAGS
|
||||
static void __local_bh_disable(unsigned long ip)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
WARN_ON_ONCE(in_irq());
|
||||
|
||||
raw_local_irq_save(flags);
|
||||
add_preempt_count(SOFTIRQ_OFFSET);
|
||||
/*
|
||||
* Were softirqs turned off above:
|
||||
*/
|
||||
if (softirq_count() == SOFTIRQ_OFFSET)
|
||||
trace_softirqs_off(ip);
|
||||
raw_local_irq_restore(flags);
|
||||
}
|
||||
#else /* !CONFIG_TRACE_IRQFLAGS */
|
||||
static inline void __local_bh_disable(unsigned long ip)
|
||||
{
|
||||
add_preempt_count(SOFTIRQ_OFFSET);
|
||||
barrier();
|
||||
}
|
||||
#endif /* CONFIG_TRACE_IRQFLAGS */
|
||||
|
||||
void local_bh_disable(void)
|
||||
{
|
||||
__local_bh_disable((unsigned long)__builtin_return_address(0));
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(local_bh_disable);
|
||||
|
||||
void __local_bh_enable(void)
|
||||
{
|
||||
WARN_ON_ONCE(in_irq());
|
||||
|
||||
/*
|
||||
* softirqs should never be enabled by __local_bh_enable(),
|
||||
* it always nests inside local_bh_enable() sections:
|
||||
*/
|
||||
WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET);
|
||||
|
||||
sub_preempt_count(SOFTIRQ_OFFSET);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__local_bh_enable);
|
||||
|
||||
/*
|
||||
* Special-case - softirqs can safely be enabled in
|
||||
* cond_resched_softirq(), or by __do_softirq(),
|
||||
* without processing still-pending softirqs:
|
||||
*/
|
||||
void _local_bh_enable(void)
|
||||
{
|
||||
WARN_ON_ONCE(in_irq());
|
||||
WARN_ON_ONCE(!irqs_disabled());
|
||||
|
||||
if (softirq_count() == SOFTIRQ_OFFSET)
|
||||
trace_softirqs_on((unsigned long)__builtin_return_address(0));
|
||||
sub_preempt_count(SOFTIRQ_OFFSET);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(_local_bh_enable);
|
||||
|
||||
void local_bh_enable(void)
|
||||
{
|
||||
#ifdef CONFIG_TRACE_IRQFLAGS
|
||||
unsigned long flags;
|
||||
|
||||
WARN_ON_ONCE(in_irq());
|
||||
#endif
|
||||
WARN_ON_ONCE(irqs_disabled());
|
||||
|
||||
#ifdef CONFIG_TRACE_IRQFLAGS
|
||||
local_irq_save(flags);
|
||||
#endif
|
||||
/*
|
||||
* Are softirqs going to be turned on now:
|
||||
*/
|
||||
if (softirq_count() == SOFTIRQ_OFFSET)
|
||||
trace_softirqs_on((unsigned long)__builtin_return_address(0));
|
||||
/*
|
||||
* Keep preemption disabled until we are done with
|
||||
* softirq processing:
|
||||
*/
|
||||
sub_preempt_count(SOFTIRQ_OFFSET - 1);
|
||||
|
||||
if (unlikely(!in_interrupt() && local_softirq_pending()))
|
||||
do_softirq();
|
||||
|
||||
dec_preempt_count();
|
||||
#ifdef CONFIG_TRACE_IRQFLAGS
|
||||
local_irq_restore(flags);
|
||||
#endif
|
||||
preempt_check_resched();
|
||||
}
|
||||
EXPORT_SYMBOL(local_bh_enable);
|
||||
|
||||
void local_bh_enable_ip(unsigned long ip)
|
||||
{
|
||||
#ifdef CONFIG_TRACE_IRQFLAGS
|
||||
unsigned long flags;
|
||||
|
||||
WARN_ON_ONCE(in_irq());
|
||||
|
||||
local_irq_save(flags);
|
||||
#endif
|
||||
/*
|
||||
* Are softirqs going to be turned on now:
|
||||
*/
|
||||
if (softirq_count() == SOFTIRQ_OFFSET)
|
||||
trace_softirqs_on(ip);
|
||||
/*
|
||||
* Keep preemption disabled until we are done with
|
||||
* softirq processing:
|
||||
*/
|
||||
sub_preempt_count(SOFTIRQ_OFFSET - 1);
|
||||
|
||||
if (unlikely(!in_interrupt() && local_softirq_pending()))
|
||||
do_softirq();
|
||||
|
||||
dec_preempt_count();
|
||||
#ifdef CONFIG_TRACE_IRQFLAGS
|
||||
local_irq_restore(flags);
|
||||
#endif
|
||||
preempt_check_resched();
|
||||
}
|
||||
EXPORT_SYMBOL(local_bh_enable_ip);
|
||||
|
||||
/*
|
||||
* We restart softirq processing MAX_SOFTIRQ_RESTART times,
|
||||
* and we fall back to softirqd after that.
|
||||
*
|
||||
* This number has been established via experimentation.
|
||||
* The two things to balance is latency against fairness -
|
||||
* we want to handle softirqs as soon as possible, but they
|
||||
* should not be able to lock up the box.
|
||||
*/
|
||||
#define MAX_SOFTIRQ_RESTART 10
|
||||
|
||||
asmlinkage void __do_softirq(void)
|
||||
{
|
||||
struct softirq_action *h;
|
||||
__u32 pending;
|
||||
int max_restart = MAX_SOFTIRQ_RESTART;
|
||||
int cpu;
|
||||
|
||||
pending = local_softirq_pending();
|
||||
account_system_vtime(current);
|
||||
|
||||
__local_bh_disable((unsigned long)__builtin_return_address(0));
|
||||
trace_softirq_enter();
|
||||
|
||||
cpu = smp_processor_id();
|
||||
restart:
|
||||
/* Reset the pending bitmask before enabling irqs */
|
||||
set_softirq_pending(0);
|
||||
|
||||
local_irq_enable();
|
||||
|
||||
h = softirq_vec;
|
||||
|
||||
do {
|
||||
if (pending & 1) {
|
||||
h->action(h);
|
||||
rcu_bh_qsctr_inc(cpu);
|
||||
}
|
||||
h++;
|
||||
pending >>= 1;
|
||||
} while (pending);
|
||||
|
||||
local_irq_disable();
|
||||
|
||||
pending = local_softirq_pending();
|
||||
if (pending && --max_restart)
|
||||
goto restart;
|
||||
|
||||
if (pending)
|
||||
wakeup_softirqd();
|
||||
|
||||
trace_softirq_exit();
|
||||
|
||||
account_system_vtime(current);
|
||||
_local_bh_enable();
|
||||
}
|
||||
|
||||
#ifndef __ARCH_HAS_DO_SOFTIRQ
|
||||
|
||||
asmlinkage void do_softirq(void)
|
||||
{
|
||||
__u32 pending;
|
||||
unsigned long flags;
|
||||
|
||||
if (in_interrupt())
|
||||
return;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
pending = local_softirq_pending();
|
||||
|
||||
if (pending)
|
||||
__do_softirq();
|
||||
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(do_softirq);
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Enter an interrupt context.
|
||||
*/
|
||||
void irq_enter(void)
|
||||
{
|
||||
__irq_enter();
|
||||
#ifdef CONFIG_NO_HZ
|
||||
if (idle_cpu(smp_processor_id()))
|
||||
tick_nohz_update_jiffies();
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
|
||||
# define invoke_softirq() __do_softirq()
|
||||
#else
|
||||
# define invoke_softirq() do_softirq()
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Exit an interrupt context. Process softirqs if needed and possible:
|
||||
*/
|
||||
void irq_exit(void)
|
||||
{
|
||||
account_system_vtime(current);
|
||||
trace_hardirq_exit();
|
||||
sub_preempt_count(IRQ_EXIT_OFFSET);
|
||||
if (!in_interrupt() && local_softirq_pending())
|
||||
invoke_softirq();
|
||||
|
||||
#ifdef CONFIG_NO_HZ
|
||||
/* Make sure that timer wheel updates are propagated */
|
||||
if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
|
||||
tick_nohz_stop_sched_tick();
|
||||
#endif
|
||||
preempt_enable_no_resched();
|
||||
}
|
||||
|
||||
/*
|
||||
* This function must run with irqs disabled!
|
||||
*/
|
||||
inline fastcall void raise_softirq_irqoff(unsigned int nr)
|
||||
{
|
||||
__raise_softirq_irqoff(nr);
|
||||
|
||||
/*
|
||||
* If we're in an interrupt or softirq, we're done
|
||||
* (this also catches softirq-disabled code). We will
|
||||
* actually run the softirq once we return from
|
||||
* the irq or softirq.
|
||||
*
|
||||
* Otherwise we wake up ksoftirqd to make sure we
|
||||
* schedule the softirq soon.
|
||||
*/
|
||||
if (!in_interrupt())
|
||||
wakeup_softirqd();
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(raise_softirq_irqoff);
|
||||
|
||||
void fastcall raise_softirq(unsigned int nr)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
raise_softirq_irqoff(nr);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
|
||||
{
|
||||
softirq_vec[nr].data = data;
|
||||
softirq_vec[nr].action = action;
|
||||
}
|
||||
|
||||
/* Tasklets */
|
||||
struct tasklet_head
|
||||
{
|
||||
struct tasklet_struct *list;
|
||||
};
|
||||
|
||||
/* Some compilers disobey section attribute on statics when not
|
||||
initialized -- RR */
|
||||
static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec) = { NULL };
|
||||
static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec) = { NULL };
|
||||
|
||||
void fastcall __tasklet_schedule(struct tasklet_struct *t)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
t->next = __get_cpu_var(tasklet_vec).list;
|
||||
__get_cpu_var(tasklet_vec).list = t;
|
||||
raise_softirq_irqoff(TASKLET_SOFTIRQ);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(__tasklet_schedule);
|
||||
|
||||
void fastcall __tasklet_hi_schedule(struct tasklet_struct *t)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
t->next = __get_cpu_var(tasklet_hi_vec).list;
|
||||
__get_cpu_var(tasklet_hi_vec).list = t;
|
||||
raise_softirq_irqoff(HI_SOFTIRQ);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(__tasklet_hi_schedule);
|
||||
|
||||
static void tasklet_action(struct softirq_action *a)
|
||||
{
|
||||
struct tasklet_struct *list;
|
||||
|
||||
local_irq_disable();
|
||||
list = __get_cpu_var(tasklet_vec).list;
|
||||
__get_cpu_var(tasklet_vec).list = NULL;
|
||||
local_irq_enable();
|
||||
|
||||
while (list) {
|
||||
struct tasklet_struct *t = list;
|
||||
|
||||
list = list->next;
|
||||
|
||||
if (tasklet_trylock(t)) {
|
||||
if (!atomic_read(&t->count)) {
|
||||
if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
|
||||
BUG();
|
||||
t->func(t->data);
|
||||
tasklet_unlock(t);
|
||||
continue;
|
||||
}
|
||||
tasklet_unlock(t);
|
||||
}
|
||||
|
||||
local_irq_disable();
|
||||
t->next = __get_cpu_var(tasklet_vec).list;
|
||||
__get_cpu_var(tasklet_vec).list = t;
|
||||
__raise_softirq_irqoff(TASKLET_SOFTIRQ);
|
||||
local_irq_enable();
|
||||
}
|
||||
}
|
||||
|
||||
static void tasklet_hi_action(struct softirq_action *a)
|
||||
{
|
||||
struct tasklet_struct *list;
|
||||
|
||||
local_irq_disable();
|
||||
list = __get_cpu_var(tasklet_hi_vec).list;
|
||||
__get_cpu_var(tasklet_hi_vec).list = NULL;
|
||||
local_irq_enable();
|
||||
|
||||
while (list) {
|
||||
struct tasklet_struct *t = list;
|
||||
|
||||
list = list->next;
|
||||
|
||||
if (tasklet_trylock(t)) {
|
||||
if (!atomic_read(&t->count)) {
|
||||
if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
|
||||
BUG();
|
||||
t->func(t->data);
|
||||
tasklet_unlock(t);
|
||||
continue;
|
||||
}
|
||||
tasklet_unlock(t);
|
||||
}
|
||||
|
||||
local_irq_disable();
|
||||
t->next = __get_cpu_var(tasklet_hi_vec).list;
|
||||
__get_cpu_var(tasklet_hi_vec).list = t;
|
||||
__raise_softirq_irqoff(HI_SOFTIRQ);
|
||||
local_irq_enable();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void tasklet_init(struct tasklet_struct *t,
|
||||
void (*func)(unsigned long), unsigned long data)
|
||||
{
|
||||
t->next = NULL;
|
||||
t->state = 0;
|
||||
atomic_set(&t->count, 0);
|
||||
t->func = func;
|
||||
t->data = data;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(tasklet_init);
|
||||
|
||||
void tasklet_kill(struct tasklet_struct *t)
|
||||
{
|
||||
if (in_interrupt())
|
||||
printk("Attempt to kill tasklet from interrupt\n");
|
||||
|
||||
while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
|
||||
do
|
||||
yield();
|
||||
while (test_bit(TASKLET_STATE_SCHED, &t->state));
|
||||
}
|
||||
tasklet_unlock_wait(t);
|
||||
clear_bit(TASKLET_STATE_SCHED, &t->state);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(tasklet_kill);
|
||||
|
||||
void __init softirq_init(void)
|
||||
{
|
||||
open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
|
||||
open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
|
||||
}
|
||||
|
||||
static int ksoftirqd(void * __bind_cpu)
|
||||
{
|
||||
current->flags |= PF_NOFREEZE;
|
||||
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
|
||||
while (!kthread_should_stop()) {
|
||||
preempt_disable();
|
||||
if (!local_softirq_pending()) {
|
||||
preempt_enable_no_resched();
|
||||
schedule();
|
||||
preempt_disable();
|
||||
}
|
||||
|
||||
__set_current_state(TASK_RUNNING);
|
||||
|
||||
while (local_softirq_pending()) {
|
||||
/* Preempt disable stops cpu going offline.
|
||||
If already offline, we'll be on wrong CPU:
|
||||
don't process */
|
||||
if (cpu_is_offline((long)__bind_cpu))
|
||||
goto wait_to_die;
|
||||
do_softirq();
|
||||
preempt_enable_no_resched();
|
||||
cond_resched();
|
||||
preempt_disable();
|
||||
}
|
||||
preempt_enable();
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
}
|
||||
__set_current_state(TASK_RUNNING);
|
||||
return 0;
|
||||
|
||||
wait_to_die:
|
||||
preempt_enable();
|
||||
/* Wait for kthread_stop */
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
while (!kthread_should_stop()) {
|
||||
schedule();
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
}
|
||||
__set_current_state(TASK_RUNNING);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
/*
|
||||
* tasklet_kill_immediate is called to remove a tasklet which can already be
|
||||
* scheduled for execution on @cpu.
|
||||
*
|
||||
* Unlike tasklet_kill, this function removes the tasklet
|
||||
* _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
|
||||
*
|
||||
* When this function is called, @cpu must be in the CPU_DEAD state.
|
||||
*/
|
||||
void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
|
||||
{
|
||||
struct tasklet_struct **i;
|
||||
|
||||
BUG_ON(cpu_online(cpu));
|
||||
BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
|
||||
|
||||
if (!test_bit(TASKLET_STATE_SCHED, &t->state))
|
||||
return;
|
||||
|
||||
/* CPU is dead, so no lock needed. */
|
||||
for (i = &per_cpu(tasklet_vec, cpu).list; *i; i = &(*i)->next) {
|
||||
if (*i == t) {
|
||||
*i = t->next;
|
||||
return;
|
||||
}
|
||||
}
|
||||
BUG();
|
||||
}
|
||||
|
||||
static void takeover_tasklets(unsigned int cpu)
|
||||
{
|
||||
struct tasklet_struct **i;
|
||||
|
||||
/* CPU is dead, so no lock needed. */
|
||||
local_irq_disable();
|
||||
|
||||
/* Find end, append list for that CPU. */
|
||||
for (i = &__get_cpu_var(tasklet_vec).list; *i; i = &(*i)->next);
|
||||
*i = per_cpu(tasklet_vec, cpu).list;
|
||||
per_cpu(tasklet_vec, cpu).list = NULL;
|
||||
raise_softirq_irqoff(TASKLET_SOFTIRQ);
|
||||
|
||||
for (i = &__get_cpu_var(tasklet_hi_vec).list; *i; i = &(*i)->next);
|
||||
*i = per_cpu(tasklet_hi_vec, cpu).list;
|
||||
per_cpu(tasklet_hi_vec, cpu).list = NULL;
|
||||
raise_softirq_irqoff(HI_SOFTIRQ);
|
||||
|
||||
local_irq_enable();
|
||||
}
|
||||
#endif /* CONFIG_HOTPLUG_CPU */
|
||||
|
||||
static int __cpuinit cpu_callback(struct notifier_block *nfb,
|
||||
unsigned long action,
|
||||
void *hcpu)
|
||||
{
|
||||
int hotcpu = (unsigned long)hcpu;
|
||||
struct task_struct *p;
|
||||
|
||||
switch (action) {
|
||||
case CPU_UP_PREPARE:
|
||||
p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
|
||||
if (IS_ERR(p)) {
|
||||
printk("ksoftirqd for %i failed\n", hotcpu);
|
||||
return NOTIFY_BAD;
|
||||
}
|
||||
kthread_bind(p, hotcpu);
|
||||
per_cpu(ksoftirqd, hotcpu) = p;
|
||||
break;
|
||||
case CPU_ONLINE:
|
||||
wake_up_process(per_cpu(ksoftirqd, hotcpu));
|
||||
break;
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
case CPU_UP_CANCELED:
|
||||
if (!per_cpu(ksoftirqd, hotcpu))
|
||||
break;
|
||||
/* Unbind so it can run. Fall thru. */
|
||||
kthread_bind(per_cpu(ksoftirqd, hotcpu),
|
||||
any_online_cpu(cpu_online_map));
|
||||
case CPU_DEAD:
|
||||
p = per_cpu(ksoftirqd, hotcpu);
|
||||
per_cpu(ksoftirqd, hotcpu) = NULL;
|
||||
kthread_stop(p);
|
||||
takeover_tasklets(hotcpu);
|
||||
break;
|
||||
#endif /* CONFIG_HOTPLUG_CPU */
|
||||
}
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block __cpuinitdata cpu_nfb = {
|
||||
.notifier_call = cpu_callback
|
||||
};
|
||||
|
||||
__init int spawn_ksoftirqd(void)
|
||||
{
|
||||
void *cpu = (void *)(long)smp_processor_id();
|
||||
int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
|
||||
|
||||
BUG_ON(err == NOTIFY_BAD);
|
||||
cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
|
||||
register_cpu_notifier(&cpu_nfb);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
* Call a function on all processors
|
||||
*/
|
||||
int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
preempt_disable();
|
||||
ret = smp_call_function(func, info, retry, wait);
|
||||
local_irq_disable();
|
||||
func(info);
|
||||
local_irq_enable();
|
||||
preempt_enable();
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(on_each_cpu);
|
||||
#endif
|
||||
174
kernel/softlockup.c
Normal file
174
kernel/softlockup.c
Normal file
@@ -0,0 +1,174 @@
|
||||
/*
|
||||
* Detect Soft Lockups
|
||||
*
|
||||
* started by Ingo Molnar, Copyright (C) 2005, 2006 Red Hat, Inc.
|
||||
*
|
||||
* this code detects soft lockups: incidents in where on a CPU
|
||||
* the kernel does not reschedule for 10 seconds or more.
|
||||
*/
|
||||
#include <linux/mm.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
static DEFINE_SPINLOCK(print_lock);
|
||||
|
||||
static DEFINE_PER_CPU(unsigned long, touch_timestamp);
|
||||
static DEFINE_PER_CPU(unsigned long, print_timestamp);
|
||||
static DEFINE_PER_CPU(struct task_struct *, watchdog_task);
|
||||
|
||||
static int did_panic = 0;
|
||||
|
||||
static int
|
||||
softlock_panic(struct notifier_block *this, unsigned long event, void *ptr)
|
||||
{
|
||||
did_panic = 1;
|
||||
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
static struct notifier_block panic_block = {
|
||||
.notifier_call = softlock_panic,
|
||||
};
|
||||
|
||||
void touch_softlockup_watchdog(void)
|
||||
{
|
||||
__raw_get_cpu_var(touch_timestamp) = jiffies;
|
||||
}
|
||||
EXPORT_SYMBOL(touch_softlockup_watchdog);
|
||||
|
||||
void touch_all_softlockup_watchdogs(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
/* Cause each CPU to re-update its timestamp rather than complain */
|
||||
for_each_online_cpu(cpu)
|
||||
per_cpu(touch_timestamp, cpu) = 0;
|
||||
}
|
||||
EXPORT_SYMBOL(touch_all_softlockup_watchdogs);
|
||||
|
||||
/*
|
||||
* This callback runs from the timer interrupt, and checks
|
||||
* whether the watchdog thread has hung or not:
|
||||
*/
|
||||
void softlockup_tick(void)
|
||||
{
|
||||
int this_cpu = smp_processor_id();
|
||||
unsigned long touch_timestamp = per_cpu(touch_timestamp, this_cpu);
|
||||
|
||||
if (touch_timestamp == 0) {
|
||||
touch_softlockup_watchdog();
|
||||
return;
|
||||
}
|
||||
|
||||
/* prevent double reports: */
|
||||
if (per_cpu(print_timestamp, this_cpu) == touch_timestamp ||
|
||||
did_panic ||
|
||||
!per_cpu(watchdog_task, this_cpu))
|
||||
return;
|
||||
|
||||
/* do not print during early bootup: */
|
||||
if (unlikely(system_state != SYSTEM_RUNNING)) {
|
||||
touch_softlockup_watchdog();
|
||||
return;
|
||||
}
|
||||
|
||||
/* Wake up the high-prio watchdog task every second: */
|
||||
if (time_after(jiffies, touch_timestamp + HZ))
|
||||
wake_up_process(per_cpu(watchdog_task, this_cpu));
|
||||
|
||||
/* Warn about unreasonable 10+ seconds delays: */
|
||||
if (time_after(jiffies, touch_timestamp + 10*HZ)) {
|
||||
per_cpu(print_timestamp, this_cpu) = touch_timestamp;
|
||||
|
||||
spin_lock(&print_lock);
|
||||
printk(KERN_ERR "BUG: soft lockup detected on CPU#%d!\n",
|
||||
this_cpu);
|
||||
dump_stack();
|
||||
spin_unlock(&print_lock);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The watchdog thread - runs every second and touches the timestamp.
|
||||
*/
|
||||
static int watchdog(void * __bind_cpu)
|
||||
{
|
||||
struct sched_param param = { .sched_priority = 99 };
|
||||
|
||||
sched_setscheduler(current, SCHED_FIFO, ¶m);
|
||||
current->flags |= PF_NOFREEZE;
|
||||
|
||||
/*
|
||||
* Run briefly once per second to reset the softlockup timestamp.
|
||||
* If this gets delayed for more than 10 seconds then the
|
||||
* debug-printout triggers in softlockup_tick().
|
||||
*/
|
||||
while (!kthread_should_stop()) {
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
touch_softlockup_watchdog();
|
||||
schedule();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Create/destroy watchdog threads as CPUs come and go:
|
||||
*/
|
||||
static int __cpuinit
|
||||
cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
|
||||
{
|
||||
int hotcpu = (unsigned long)hcpu;
|
||||
struct task_struct *p;
|
||||
|
||||
switch (action) {
|
||||
case CPU_UP_PREPARE:
|
||||
BUG_ON(per_cpu(watchdog_task, hotcpu));
|
||||
p = kthread_create(watchdog, hcpu, "watchdog/%d", hotcpu);
|
||||
if (IS_ERR(p)) {
|
||||
printk("watchdog for %i failed\n", hotcpu);
|
||||
return NOTIFY_BAD;
|
||||
}
|
||||
per_cpu(touch_timestamp, hotcpu) = jiffies;
|
||||
per_cpu(watchdog_task, hotcpu) = p;
|
||||
kthread_bind(p, hotcpu);
|
||||
break;
|
||||
case CPU_ONLINE:
|
||||
wake_up_process(per_cpu(watchdog_task, hotcpu));
|
||||
break;
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
case CPU_UP_CANCELED:
|
||||
if (!per_cpu(watchdog_task, hotcpu))
|
||||
break;
|
||||
/* Unbind so it can run. Fall thru. */
|
||||
kthread_bind(per_cpu(watchdog_task, hotcpu),
|
||||
any_online_cpu(cpu_online_map));
|
||||
case CPU_DEAD:
|
||||
p = per_cpu(watchdog_task, hotcpu);
|
||||
per_cpu(watchdog_task, hotcpu) = NULL;
|
||||
kthread_stop(p);
|
||||
break;
|
||||
#endif /* CONFIG_HOTPLUG_CPU */
|
||||
}
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block __cpuinitdata cpu_nfb = {
|
||||
.notifier_call = cpu_callback
|
||||
};
|
||||
|
||||
__init void spawn_softlockup_task(void)
|
||||
{
|
||||
void *cpu = (void *)(long)smp_processor_id();
|
||||
int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
|
||||
|
||||
BUG_ON(err == NOTIFY_BAD);
|
||||
cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
|
||||
register_cpu_notifier(&cpu_nfb);
|
||||
|
||||
atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
|
||||
}
|
||||
448
kernel/spinlock.c
Normal file
448
kernel/spinlock.c
Normal file
@@ -0,0 +1,448 @@
|
||||
/*
|
||||
* Copyright (2004) Linus Torvalds
|
||||
*
|
||||
* Author: Zwane Mwaikambo <zwane@fsmlabs.com>
|
||||
*
|
||||
* Copyright (2004, 2005) Ingo Molnar
|
||||
*
|
||||
* This file contains the spinlock/rwlock implementations for the
|
||||
* SMP and the DEBUG_SPINLOCK cases. (UP-nondebug inlines them)
|
||||
*
|
||||
* Note that some architectures have special knowledge about the
|
||||
* stack frames of these functions in their profile_pc. If you
|
||||
* change anything significant here that could change the stack
|
||||
* frame contact the architecture maintainers.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <linux/preempt.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/debug_locks.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
int __lockfunc _spin_trylock(spinlock_t *lock)
|
||||
{
|
||||
preempt_disable();
|
||||
if (_raw_spin_trylock(lock)) {
|
||||
spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
|
||||
return 1;
|
||||
}
|
||||
|
||||
preempt_enable();
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(_spin_trylock);
|
||||
|
||||
int __lockfunc _read_trylock(rwlock_t *lock)
|
||||
{
|
||||
preempt_disable();
|
||||
if (_raw_read_trylock(lock)) {
|
||||
rwlock_acquire_read(&lock->dep_map, 0, 1, _RET_IP_);
|
||||
return 1;
|
||||
}
|
||||
|
||||
preempt_enable();
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(_read_trylock);
|
||||
|
||||
int __lockfunc _write_trylock(rwlock_t *lock)
|
||||
{
|
||||
preempt_disable();
|
||||
if (_raw_write_trylock(lock)) {
|
||||
rwlock_acquire(&lock->dep_map, 0, 1, _RET_IP_);
|
||||
return 1;
|
||||
}
|
||||
|
||||
preempt_enable();
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(_write_trylock);
|
||||
|
||||
/*
|
||||
* If lockdep is enabled then we use the non-preemption spin-ops
|
||||
* even on CONFIG_PREEMPT, because lockdep assumes that interrupts are
|
||||
* not re-enabled during lock-acquire (which the preempt-spin-ops do):
|
||||
*/
|
||||
#if !defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP) || \
|
||||
defined(CONFIG_DEBUG_LOCK_ALLOC)
|
||||
|
||||
void __lockfunc _read_lock(rwlock_t *lock)
|
||||
{
|
||||
preempt_disable();
|
||||
rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
|
||||
_raw_read_lock(lock);
|
||||
}
|
||||
EXPORT_SYMBOL(_read_lock);
|
||||
|
||||
unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
preempt_disable();
|
||||
spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
|
||||
/*
|
||||
* On lockdep we dont want the hand-coded irq-enable of
|
||||
* _raw_spin_lock_flags() code, because lockdep assumes
|
||||
* that interrupts are not re-enabled during lock-acquire:
|
||||
*/
|
||||
#ifdef CONFIG_PROVE_LOCKING
|
||||
_raw_spin_lock(lock);
|
||||
#else
|
||||
_raw_spin_lock_flags(lock, &flags);
|
||||
#endif
|
||||
return flags;
|
||||
}
|
||||
EXPORT_SYMBOL(_spin_lock_irqsave);
|
||||
|
||||
void __lockfunc _spin_lock_irq(spinlock_t *lock)
|
||||
{
|
||||
local_irq_disable();
|
||||
preempt_disable();
|
||||
spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
|
||||
_raw_spin_lock(lock);
|
||||
}
|
||||
EXPORT_SYMBOL(_spin_lock_irq);
|
||||
|
||||
void __lockfunc _spin_lock_bh(spinlock_t *lock)
|
||||
{
|
||||
local_bh_disable();
|
||||
preempt_disable();
|
||||
spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
|
||||
_raw_spin_lock(lock);
|
||||
}
|
||||
EXPORT_SYMBOL(_spin_lock_bh);
|
||||
|
||||
unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
preempt_disable();
|
||||
rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
|
||||
_raw_read_lock(lock);
|
||||
return flags;
|
||||
}
|
||||
EXPORT_SYMBOL(_read_lock_irqsave);
|
||||
|
||||
void __lockfunc _read_lock_irq(rwlock_t *lock)
|
||||
{
|
||||
local_irq_disable();
|
||||
preempt_disable();
|
||||
rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
|
||||
_raw_read_lock(lock);
|
||||
}
|
||||
EXPORT_SYMBOL(_read_lock_irq);
|
||||
|
||||
void __lockfunc _read_lock_bh(rwlock_t *lock)
|
||||
{
|
||||
local_bh_disable();
|
||||
preempt_disable();
|
||||
rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
|
||||
_raw_read_lock(lock);
|
||||
}
|
||||
EXPORT_SYMBOL(_read_lock_bh);
|
||||
|
||||
unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
preempt_disable();
|
||||
rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
|
||||
_raw_write_lock(lock);
|
||||
return flags;
|
||||
}
|
||||
EXPORT_SYMBOL(_write_lock_irqsave);
|
||||
|
||||
void __lockfunc _write_lock_irq(rwlock_t *lock)
|
||||
{
|
||||
local_irq_disable();
|
||||
preempt_disable();
|
||||
rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
|
||||
_raw_write_lock(lock);
|
||||
}
|
||||
EXPORT_SYMBOL(_write_lock_irq);
|
||||
|
||||
void __lockfunc _write_lock_bh(rwlock_t *lock)
|
||||
{
|
||||
local_bh_disable();
|
||||
preempt_disable();
|
||||
rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
|
||||
_raw_write_lock(lock);
|
||||
}
|
||||
EXPORT_SYMBOL(_write_lock_bh);
|
||||
|
||||
void __lockfunc _spin_lock(spinlock_t *lock)
|
||||
{
|
||||
preempt_disable();
|
||||
spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
|
||||
_raw_spin_lock(lock);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(_spin_lock);
|
||||
|
||||
void __lockfunc _write_lock(rwlock_t *lock)
|
||||
{
|
||||
preempt_disable();
|
||||
rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
|
||||
_raw_write_lock(lock);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(_write_lock);
|
||||
|
||||
#else /* CONFIG_PREEMPT: */
|
||||
|
||||
/*
|
||||
* This could be a long-held lock. We both prepare to spin for a long
|
||||
* time (making _this_ CPU preemptable if possible), and we also signal
|
||||
* towards that other CPU that it should break the lock ASAP.
|
||||
*
|
||||
* (We do this in a function because inlining it would be excessive.)
|
||||
*/
|
||||
|
||||
#define BUILD_LOCK_OPS(op, locktype) \
|
||||
void __lockfunc _##op##_lock(locktype##_t *lock) \
|
||||
{ \
|
||||
for (;;) { \
|
||||
preempt_disable(); \
|
||||
if (likely(_raw_##op##_trylock(lock))) \
|
||||
break; \
|
||||
preempt_enable(); \
|
||||
\
|
||||
if (!(lock)->break_lock) \
|
||||
(lock)->break_lock = 1; \
|
||||
while (!op##_can_lock(lock) && (lock)->break_lock) \
|
||||
_raw_##op##_relax(&lock->raw_lock); \
|
||||
} \
|
||||
(lock)->break_lock = 0; \
|
||||
} \
|
||||
\
|
||||
EXPORT_SYMBOL(_##op##_lock); \
|
||||
\
|
||||
unsigned long __lockfunc _##op##_lock_irqsave(locktype##_t *lock) \
|
||||
{ \
|
||||
unsigned long flags; \
|
||||
\
|
||||
for (;;) { \
|
||||
preempt_disable(); \
|
||||
local_irq_save(flags); \
|
||||
if (likely(_raw_##op##_trylock(lock))) \
|
||||
break; \
|
||||
local_irq_restore(flags); \
|
||||
preempt_enable(); \
|
||||
\
|
||||
if (!(lock)->break_lock) \
|
||||
(lock)->break_lock = 1; \
|
||||
while (!op##_can_lock(lock) && (lock)->break_lock) \
|
||||
_raw_##op##_relax(&lock->raw_lock); \
|
||||
} \
|
||||
(lock)->break_lock = 0; \
|
||||
return flags; \
|
||||
} \
|
||||
\
|
||||
EXPORT_SYMBOL(_##op##_lock_irqsave); \
|
||||
\
|
||||
void __lockfunc _##op##_lock_irq(locktype##_t *lock) \
|
||||
{ \
|
||||
_##op##_lock_irqsave(lock); \
|
||||
} \
|
||||
\
|
||||
EXPORT_SYMBOL(_##op##_lock_irq); \
|
||||
\
|
||||
void __lockfunc _##op##_lock_bh(locktype##_t *lock) \
|
||||
{ \
|
||||
unsigned long flags; \
|
||||
\
|
||||
/* */ \
|
||||
/* Careful: we must exclude softirqs too, hence the */ \
|
||||
/* irq-disabling. We use the generic preemption-aware */ \
|
||||
/* function: */ \
|
||||
/**/ \
|
||||
flags = _##op##_lock_irqsave(lock); \
|
||||
local_bh_disable(); \
|
||||
local_irq_restore(flags); \
|
||||
} \
|
||||
\
|
||||
EXPORT_SYMBOL(_##op##_lock_bh)
|
||||
|
||||
/*
|
||||
* Build preemption-friendly versions of the following
|
||||
* lock-spinning functions:
|
||||
*
|
||||
* _[spin|read|write]_lock()
|
||||
* _[spin|read|write]_lock_irq()
|
||||
* _[spin|read|write]_lock_irqsave()
|
||||
* _[spin|read|write]_lock_bh()
|
||||
*/
|
||||
BUILD_LOCK_OPS(spin, spinlock);
|
||||
BUILD_LOCK_OPS(read, rwlock);
|
||||
BUILD_LOCK_OPS(write, rwlock);
|
||||
|
||||
#endif /* CONFIG_PREEMPT */
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
|
||||
void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass)
|
||||
{
|
||||
preempt_disable();
|
||||
spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
|
||||
_raw_spin_lock(lock);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(_spin_lock_nested);
|
||||
unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclass)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
preempt_disable();
|
||||
spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
|
||||
/*
|
||||
* On lockdep we dont want the hand-coded irq-enable of
|
||||
* _raw_spin_lock_flags() code, because lockdep assumes
|
||||
* that interrupts are not re-enabled during lock-acquire:
|
||||
*/
|
||||
#ifdef CONFIG_PROVE_SPIN_LOCKING
|
||||
_raw_spin_lock(lock);
|
||||
#else
|
||||
_raw_spin_lock_flags(lock, &flags);
|
||||
#endif
|
||||
return flags;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(_spin_lock_irqsave_nested);
|
||||
|
||||
#endif
|
||||
|
||||
void __lockfunc _spin_unlock(spinlock_t *lock)
|
||||
{
|
||||
spin_release(&lock->dep_map, 1, _RET_IP_);
|
||||
_raw_spin_unlock(lock);
|
||||
preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(_spin_unlock);
|
||||
|
||||
void __lockfunc _write_unlock(rwlock_t *lock)
|
||||
{
|
||||
rwlock_release(&lock->dep_map, 1, _RET_IP_);
|
||||
_raw_write_unlock(lock);
|
||||
preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(_write_unlock);
|
||||
|
||||
void __lockfunc _read_unlock(rwlock_t *lock)
|
||||
{
|
||||
rwlock_release(&lock->dep_map, 1, _RET_IP_);
|
||||
_raw_read_unlock(lock);
|
||||
preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(_read_unlock);
|
||||
|
||||
void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
|
||||
{
|
||||
spin_release(&lock->dep_map, 1, _RET_IP_);
|
||||
_raw_spin_unlock(lock);
|
||||
local_irq_restore(flags);
|
||||
preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(_spin_unlock_irqrestore);
|
||||
|
||||
void __lockfunc _spin_unlock_irq(spinlock_t *lock)
|
||||
{
|
||||
spin_release(&lock->dep_map, 1, _RET_IP_);
|
||||
_raw_spin_unlock(lock);
|
||||
local_irq_enable();
|
||||
preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(_spin_unlock_irq);
|
||||
|
||||
void __lockfunc _spin_unlock_bh(spinlock_t *lock)
|
||||
{
|
||||
spin_release(&lock->dep_map, 1, _RET_IP_);
|
||||
_raw_spin_unlock(lock);
|
||||
preempt_enable_no_resched();
|
||||
local_bh_enable_ip((unsigned long)__builtin_return_address(0));
|
||||
}
|
||||
EXPORT_SYMBOL(_spin_unlock_bh);
|
||||
|
||||
void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
|
||||
{
|
||||
rwlock_release(&lock->dep_map, 1, _RET_IP_);
|
||||
_raw_read_unlock(lock);
|
||||
local_irq_restore(flags);
|
||||
preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(_read_unlock_irqrestore);
|
||||
|
||||
void __lockfunc _read_unlock_irq(rwlock_t *lock)
|
||||
{
|
||||
rwlock_release(&lock->dep_map, 1, _RET_IP_);
|
||||
_raw_read_unlock(lock);
|
||||
local_irq_enable();
|
||||
preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(_read_unlock_irq);
|
||||
|
||||
void __lockfunc _read_unlock_bh(rwlock_t *lock)
|
||||
{
|
||||
rwlock_release(&lock->dep_map, 1, _RET_IP_);
|
||||
_raw_read_unlock(lock);
|
||||
preempt_enable_no_resched();
|
||||
local_bh_enable_ip((unsigned long)__builtin_return_address(0));
|
||||
}
|
||||
EXPORT_SYMBOL(_read_unlock_bh);
|
||||
|
||||
void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
|
||||
{
|
||||
rwlock_release(&lock->dep_map, 1, _RET_IP_);
|
||||
_raw_write_unlock(lock);
|
||||
local_irq_restore(flags);
|
||||
preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(_write_unlock_irqrestore);
|
||||
|
||||
void __lockfunc _write_unlock_irq(rwlock_t *lock)
|
||||
{
|
||||
rwlock_release(&lock->dep_map, 1, _RET_IP_);
|
||||
_raw_write_unlock(lock);
|
||||
local_irq_enable();
|
||||
preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(_write_unlock_irq);
|
||||
|
||||
void __lockfunc _write_unlock_bh(rwlock_t *lock)
|
||||
{
|
||||
rwlock_release(&lock->dep_map, 1, _RET_IP_);
|
||||
_raw_write_unlock(lock);
|
||||
preempt_enable_no_resched();
|
||||
local_bh_enable_ip((unsigned long)__builtin_return_address(0));
|
||||
}
|
||||
EXPORT_SYMBOL(_write_unlock_bh);
|
||||
|
||||
int __lockfunc _spin_trylock_bh(spinlock_t *lock)
|
||||
{
|
||||
local_bh_disable();
|
||||
preempt_disable();
|
||||
if (_raw_spin_trylock(lock)) {
|
||||
spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
|
||||
return 1;
|
||||
}
|
||||
|
||||
preempt_enable_no_resched();
|
||||
local_bh_enable_ip((unsigned long)__builtin_return_address(0));
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(_spin_trylock_bh);
|
||||
|
||||
int in_lock_functions(unsigned long addr)
|
||||
{
|
||||
/* Linker adds these: start and end of __lockfunc functions */
|
||||
extern char __lock_text_start[], __lock_text_end[];
|
||||
|
||||
return addr >= (unsigned long)__lock_text_start
|
||||
&& addr < (unsigned long)__lock_text_end;
|
||||
}
|
||||
EXPORT_SYMBOL(in_lock_functions);
|
||||
258
kernel/srcu.c
Normal file
258
kernel/srcu.c
Normal file
@@ -0,0 +1,258 @@
|
||||
/*
|
||||
* Sleepable Read-Copy Update mechanism for mutual exclusion.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
* Copyright (C) IBM Corporation, 2006
|
||||
*
|
||||
* Author: Paul McKenney <paulmck@us.ibm.com>
|
||||
*
|
||||
* For detailed explanation of Read-Copy Update mechanism see -
|
||||
* Documentation/RCU/ *.txt
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/preempt.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/srcu.h>
|
||||
|
||||
/**
|
||||
* init_srcu_struct - initialize a sleep-RCU structure
|
||||
* @sp: structure to initialize.
|
||||
*
|
||||
* Must invoke this on a given srcu_struct before passing that srcu_struct
|
||||
* to any other function. Each srcu_struct represents a separate domain
|
||||
* of SRCU protection.
|
||||
*/
|
||||
int init_srcu_struct(struct srcu_struct *sp)
|
||||
{
|
||||
sp->completed = 0;
|
||||
mutex_init(&sp->mutex);
|
||||
sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array);
|
||||
return (sp->per_cpu_ref ? 0 : -ENOMEM);
|
||||
}
|
||||
|
||||
/*
|
||||
* srcu_readers_active_idx -- returns approximate number of readers
|
||||
* active on the specified rank of per-CPU counters.
|
||||
*/
|
||||
|
||||
static int srcu_readers_active_idx(struct srcu_struct *sp, int idx)
|
||||
{
|
||||
int cpu;
|
||||
int sum;
|
||||
|
||||
sum = 0;
|
||||
for_each_possible_cpu(cpu)
|
||||
sum += per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx];
|
||||
return sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* srcu_readers_active - returns approximate number of readers.
|
||||
* @sp: which srcu_struct to count active readers (holding srcu_read_lock).
|
||||
*
|
||||
* Note that this is not an atomic primitive, and can therefore suffer
|
||||
* severe errors when invoked on an active srcu_struct. That said, it
|
||||
* can be useful as an error check at cleanup time.
|
||||
*/
|
||||
int srcu_readers_active(struct srcu_struct *sp)
|
||||
{
|
||||
return srcu_readers_active_idx(sp, 0) + srcu_readers_active_idx(sp, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* cleanup_srcu_struct - deconstruct a sleep-RCU structure
|
||||
* @sp: structure to clean up.
|
||||
*
|
||||
* Must invoke this after you are finished using a given srcu_struct that
|
||||
* was initialized via init_srcu_struct(), else you leak memory.
|
||||
*/
|
||||
void cleanup_srcu_struct(struct srcu_struct *sp)
|
||||
{
|
||||
int sum;
|
||||
|
||||
sum = srcu_readers_active(sp);
|
||||
WARN_ON(sum); /* Leakage unless caller handles error. */
|
||||
if (sum != 0)
|
||||
return;
|
||||
free_percpu(sp->per_cpu_ref);
|
||||
sp->per_cpu_ref = NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* srcu_read_lock - register a new reader for an SRCU-protected structure.
|
||||
* @sp: srcu_struct in which to register the new reader.
|
||||
*
|
||||
* Counts the new reader in the appropriate per-CPU element of the
|
||||
* srcu_struct. Must be called from process context.
|
||||
* Returns an index that must be passed to the matching srcu_read_unlock().
|
||||
*/
|
||||
int srcu_read_lock(struct srcu_struct *sp)
|
||||
{
|
||||
int idx;
|
||||
|
||||
preempt_disable();
|
||||
idx = sp->completed & 0x1;
|
||||
barrier(); /* ensure compiler looks -once- at sp->completed. */
|
||||
per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]++;
|
||||
srcu_barrier(); /* ensure compiler won't misorder critical section. */
|
||||
preempt_enable();
|
||||
return idx;
|
||||
}
|
||||
|
||||
/**
|
||||
* srcu_read_unlock - unregister a old reader from an SRCU-protected structure.
|
||||
* @sp: srcu_struct in which to unregister the old reader.
|
||||
* @idx: return value from corresponding srcu_read_lock().
|
||||
*
|
||||
* Removes the count for the old reader from the appropriate per-CPU
|
||||
* element of the srcu_struct. Note that this may well be a different
|
||||
* CPU than that which was incremented by the corresponding srcu_read_lock().
|
||||
* Must be called from process context.
|
||||
*/
|
||||
void srcu_read_unlock(struct srcu_struct *sp, int idx)
|
||||
{
|
||||
preempt_disable();
|
||||
srcu_barrier(); /* ensure compiler won't misorder critical section. */
|
||||
per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]--;
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
/**
|
||||
* synchronize_srcu - wait for prior SRCU read-side critical-section completion
|
||||
* @sp: srcu_struct with which to synchronize.
|
||||
*
|
||||
* Flip the completed counter, and wait for the old count to drain to zero.
|
||||
* As with classic RCU, the updater must use some separate means of
|
||||
* synchronizing concurrent updates. Can block; must be called from
|
||||
* process context.
|
||||
*
|
||||
* Note that it is illegal to call synchornize_srcu() from the corresponding
|
||||
* SRCU read-side critical section; doing so will result in deadlock.
|
||||
* However, it is perfectly legal to call synchronize_srcu() on one
|
||||
* srcu_struct from some other srcu_struct's read-side critical section.
|
||||
*/
|
||||
void synchronize_srcu(struct srcu_struct *sp)
|
||||
{
|
||||
int idx;
|
||||
|
||||
idx = sp->completed;
|
||||
mutex_lock(&sp->mutex);
|
||||
|
||||
/*
|
||||
* Check to see if someone else did the work for us while we were
|
||||
* waiting to acquire the lock. We need -two- advances of
|
||||
* the counter, not just one. If there was but one, we might have
|
||||
* shown up -after- our helper's first synchronize_sched(), thus
|
||||
* having failed to prevent CPU-reordering races with concurrent
|
||||
* srcu_read_unlock()s on other CPUs (see comment below). So we
|
||||
* either (1) wait for two or (2) supply the second ourselves.
|
||||
*/
|
||||
|
||||
if ((sp->completed - idx) >= 2) {
|
||||
mutex_unlock(&sp->mutex);
|
||||
return;
|
||||
}
|
||||
|
||||
synchronize_sched(); /* Force memory barrier on all CPUs. */
|
||||
|
||||
/*
|
||||
* The preceding synchronize_sched() ensures that any CPU that
|
||||
* sees the new value of sp->completed will also see any preceding
|
||||
* changes to data structures made by this CPU. This prevents
|
||||
* some other CPU from reordering the accesses in its SRCU
|
||||
* read-side critical section to precede the corresponding
|
||||
* srcu_read_lock() -- ensuring that such references will in
|
||||
* fact be protected.
|
||||
*
|
||||
* So it is now safe to do the flip.
|
||||
*/
|
||||
|
||||
idx = sp->completed & 0x1;
|
||||
sp->completed++;
|
||||
|
||||
synchronize_sched(); /* Force memory barrier on all CPUs. */
|
||||
|
||||
/*
|
||||
* At this point, because of the preceding synchronize_sched(),
|
||||
* all srcu_read_lock() calls using the old counters have completed.
|
||||
* Their corresponding critical sections might well be still
|
||||
* executing, but the srcu_read_lock() primitives themselves
|
||||
* will have finished executing.
|
||||
*/
|
||||
|
||||
while (srcu_readers_active_idx(sp, idx))
|
||||
schedule_timeout_interruptible(1);
|
||||
|
||||
synchronize_sched(); /* Force memory barrier on all CPUs. */
|
||||
|
||||
/*
|
||||
* The preceding synchronize_sched() forces all srcu_read_unlock()
|
||||
* primitives that were executing concurrently with the preceding
|
||||
* for_each_possible_cpu() loop to have completed by this point.
|
||||
* More importantly, it also forces the corresponding SRCU read-side
|
||||
* critical sections to have also completed, and the corresponding
|
||||
* references to SRCU-protected data items to be dropped.
|
||||
*
|
||||
* Note:
|
||||
*
|
||||
* Despite what you might think at first glance, the
|
||||
* preceding synchronize_sched() -must- be within the
|
||||
* critical section ended by the following mutex_unlock().
|
||||
* Otherwise, a task taking the early exit can race
|
||||
* with a srcu_read_unlock(), which might have executed
|
||||
* just before the preceding srcu_readers_active() check,
|
||||
* and whose CPU might have reordered the srcu_read_unlock()
|
||||
* with the preceding critical section. In this case, there
|
||||
* is nothing preventing the synchronize_sched() task that is
|
||||
* taking the early exit from freeing a data structure that
|
||||
* is still being referenced (out of order) by the task
|
||||
* doing the srcu_read_unlock().
|
||||
*
|
||||
* Alternatively, the comparison with "2" on the early exit
|
||||
* could be changed to "3", but this increases synchronize_srcu()
|
||||
* latency for bulk loads. So the current code is preferred.
|
||||
*/
|
||||
|
||||
mutex_unlock(&sp->mutex);
|
||||
}
|
||||
|
||||
/**
|
||||
* srcu_batches_completed - return batches completed.
|
||||
* @sp: srcu_struct on which to report batch completion.
|
||||
*
|
||||
* Report the number of batches, correlated with, but not necessarily
|
||||
* precisely the same as, the number of grace periods that have elapsed.
|
||||
*/
|
||||
|
||||
long srcu_batches_completed(struct srcu_struct *sp)
|
||||
{
|
||||
return sp->completed;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(init_srcu_struct);
|
||||
EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
|
||||
EXPORT_SYMBOL_GPL(srcu_read_lock);
|
||||
EXPORT_SYMBOL_GPL(srcu_read_unlock);
|
||||
EXPORT_SYMBOL_GPL(synchronize_srcu);
|
||||
EXPORT_SYMBOL_GPL(srcu_batches_completed);
|
||||
EXPORT_SYMBOL_GPL(srcu_readers_active);
|
||||
24
kernel/stacktrace.c
Normal file
24
kernel/stacktrace.c
Normal file
@@ -0,0 +1,24 @@
|
||||
/*
|
||||
* kernel/stacktrace.c
|
||||
*
|
||||
* Stack trace management functions
|
||||
*
|
||||
* Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
|
||||
*/
|
||||
#include <linux/sched.h>
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/stacktrace.h>
|
||||
|
||||
void print_stack_trace(struct stack_trace *trace, int spaces)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < trace->nr_entries; i++) {
|
||||
unsigned long ip = trace->entries[i];
|
||||
|
||||
for (j = 0; j < spaces + 1; j++)
|
||||
printk(" ");
|
||||
print_ip_sym(ip);
|
||||
}
|
||||
}
|
||||
|
||||
210
kernel/stop_machine.c
Normal file
210
kernel/stop_machine.c
Normal file
@@ -0,0 +1,210 @@
|
||||
/* Copyright 2005 Rusty Russell rusty@rustcorp.com.au IBM Corporation.
|
||||
* GPL v2 and any later version.
|
||||
*/
|
||||
#include <linux/stop_machine.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <asm/atomic.h>
|
||||
#include <asm/semaphore.h>
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
/* Since we effect priority and affinity (both of which are visible
|
||||
* to, and settable by outside processes) we do indirection via a
|
||||
* kthread. */
|
||||
|
||||
/* Thread to stop each CPU in user context. */
|
||||
enum stopmachine_state {
|
||||
STOPMACHINE_WAIT,
|
||||
STOPMACHINE_PREPARE,
|
||||
STOPMACHINE_DISABLE_IRQ,
|
||||
STOPMACHINE_EXIT,
|
||||
};
|
||||
|
||||
static enum stopmachine_state stopmachine_state;
|
||||
static unsigned int stopmachine_num_threads;
|
||||
static atomic_t stopmachine_thread_ack;
|
||||
static DECLARE_MUTEX(stopmachine_mutex);
|
||||
|
||||
static int stopmachine(void *cpu)
|
||||
{
|
||||
int irqs_disabled = 0;
|
||||
int prepared = 0;
|
||||
|
||||
set_cpus_allowed(current, cpumask_of_cpu((int)(long)cpu));
|
||||
|
||||
/* Ack: we are alive */
|
||||
smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
|
||||
atomic_inc(&stopmachine_thread_ack);
|
||||
|
||||
/* Simple state machine */
|
||||
while (stopmachine_state != STOPMACHINE_EXIT) {
|
||||
if (stopmachine_state == STOPMACHINE_DISABLE_IRQ
|
||||
&& !irqs_disabled) {
|
||||
local_irq_disable();
|
||||
irqs_disabled = 1;
|
||||
/* Ack: irqs disabled. */
|
||||
smp_mb(); /* Must read state first. */
|
||||
atomic_inc(&stopmachine_thread_ack);
|
||||
} else if (stopmachine_state == STOPMACHINE_PREPARE
|
||||
&& !prepared) {
|
||||
/* Everyone is in place, hold CPU. */
|
||||
preempt_disable();
|
||||
prepared = 1;
|
||||
smp_mb(); /* Must read state first. */
|
||||
atomic_inc(&stopmachine_thread_ack);
|
||||
}
|
||||
/* Yield in first stage: migration threads need to
|
||||
* help our sisters onto their CPUs. */
|
||||
if (!prepared && !irqs_disabled)
|
||||
yield();
|
||||
else
|
||||
cpu_relax();
|
||||
}
|
||||
|
||||
/* Ack: we are exiting. */
|
||||
smp_mb(); /* Must read state first. */
|
||||
atomic_inc(&stopmachine_thread_ack);
|
||||
|
||||
if (irqs_disabled)
|
||||
local_irq_enable();
|
||||
if (prepared)
|
||||
preempt_enable();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Change the thread state */
|
||||
static void stopmachine_set_state(enum stopmachine_state state)
|
||||
{
|
||||
atomic_set(&stopmachine_thread_ack, 0);
|
||||
smp_wmb();
|
||||
stopmachine_state = state;
|
||||
while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads)
|
||||
cpu_relax();
|
||||
}
|
||||
|
||||
static int stop_machine(void)
|
||||
{
|
||||
int i, ret = 0;
|
||||
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
|
||||
|
||||
/* One high-prio thread per cpu. We'll do this one. */
|
||||
sched_setscheduler(current, SCHED_FIFO, ¶m);
|
||||
|
||||
atomic_set(&stopmachine_thread_ack, 0);
|
||||
stopmachine_num_threads = 0;
|
||||
stopmachine_state = STOPMACHINE_WAIT;
|
||||
|
||||
for_each_online_cpu(i) {
|
||||
if (i == raw_smp_processor_id())
|
||||
continue;
|
||||
ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
|
||||
if (ret < 0)
|
||||
break;
|
||||
stopmachine_num_threads++;
|
||||
}
|
||||
|
||||
/* Wait for them all to come to life. */
|
||||
while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads)
|
||||
yield();
|
||||
|
||||
/* If some failed, kill them all. */
|
||||
if (ret < 0) {
|
||||
stopmachine_set_state(STOPMACHINE_EXIT);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Now they are all started, make them hold the CPUs, ready. */
|
||||
preempt_disable();
|
||||
stopmachine_set_state(STOPMACHINE_PREPARE);
|
||||
|
||||
/* Make them disable irqs. */
|
||||
local_irq_disable();
|
||||
stopmachine_set_state(STOPMACHINE_DISABLE_IRQ);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void restart_machine(void)
|
||||
{
|
||||
stopmachine_set_state(STOPMACHINE_EXIT);
|
||||
local_irq_enable();
|
||||
preempt_enable_no_resched();
|
||||
}
|
||||
|
||||
struct stop_machine_data
|
||||
{
|
||||
int (*fn)(void *);
|
||||
void *data;
|
||||
struct completion done;
|
||||
};
|
||||
|
||||
static int do_stop(void *_smdata)
|
||||
{
|
||||
struct stop_machine_data *smdata = _smdata;
|
||||
int ret;
|
||||
|
||||
ret = stop_machine();
|
||||
if (ret == 0) {
|
||||
ret = smdata->fn(smdata->data);
|
||||
restart_machine();
|
||||
}
|
||||
|
||||
/* We're done: you can kthread_stop us now */
|
||||
complete(&smdata->done);
|
||||
|
||||
/* Wait for kthread_stop */
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
while (!kthread_should_stop()) {
|
||||
schedule();
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
}
|
||||
__set_current_state(TASK_RUNNING);
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
|
||||
unsigned int cpu)
|
||||
{
|
||||
struct stop_machine_data smdata;
|
||||
struct task_struct *p;
|
||||
|
||||
smdata.fn = fn;
|
||||
smdata.data = data;
|
||||
init_completion(&smdata.done);
|
||||
|
||||
down(&stopmachine_mutex);
|
||||
|
||||
/* If they don't care which CPU fn runs on, bind to any online one. */
|
||||
if (cpu == NR_CPUS)
|
||||
cpu = raw_smp_processor_id();
|
||||
|
||||
p = kthread_create(do_stop, &smdata, "kstopmachine");
|
||||
if (!IS_ERR(p)) {
|
||||
kthread_bind(p, cpu);
|
||||
wake_up_process(p);
|
||||
wait_for_completion(&smdata.done);
|
||||
}
|
||||
up(&stopmachine_mutex);
|
||||
return p;
|
||||
}
|
||||
|
||||
int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
|
||||
{
|
||||
struct task_struct *p;
|
||||
int ret;
|
||||
|
||||
/* No CPUs can come up or down during this. */
|
||||
lock_cpu_hotplug();
|
||||
p = __stop_machine_run(fn, data, cpu);
|
||||
if (!IS_ERR(p))
|
||||
ret = kthread_stop(p);
|
||||
else
|
||||
ret = PTR_ERR(p);
|
||||
unlock_cpu_hotplug();
|
||||
|
||||
return ret;
|
||||
}
|
||||
2210
kernel/sys.c
Normal file
2210
kernel/sys.c
Normal file
File diff suppressed because it is too large
Load Diff
143
kernel/sys_ni.c
Normal file
143
kernel/sys_ni.c
Normal file
@@ -0,0 +1,143 @@
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <linux/errno.h>
|
||||
|
||||
#include <asm/unistd.h>
|
||||
|
||||
/*
|
||||
* Non-implemented system calls get redirected here.
|
||||
*/
|
||||
asmlinkage long sys_ni_syscall(void)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
cond_syscall(sys_nfsservctl);
|
||||
cond_syscall(sys_quotactl);
|
||||
cond_syscall(sys_acct);
|
||||
cond_syscall(sys_lookup_dcookie);
|
||||
cond_syscall(sys_swapon);
|
||||
cond_syscall(sys_swapoff);
|
||||
cond_syscall(sys_kexec_load);
|
||||
cond_syscall(compat_sys_kexec_load);
|
||||
cond_syscall(sys_init_module);
|
||||
cond_syscall(sys_delete_module);
|
||||
cond_syscall(sys_socketpair);
|
||||
cond_syscall(sys_bind);
|
||||
cond_syscall(sys_listen);
|
||||
cond_syscall(sys_accept);
|
||||
cond_syscall(sys_connect);
|
||||
cond_syscall(sys_getsockname);
|
||||
cond_syscall(sys_getpeername);
|
||||
cond_syscall(sys_sendto);
|
||||
cond_syscall(sys_send);
|
||||
cond_syscall(sys_recvfrom);
|
||||
cond_syscall(sys_recv);
|
||||
cond_syscall(sys_socket);
|
||||
cond_syscall(sys_setsockopt);
|
||||
cond_syscall(sys_getsockopt);
|
||||
cond_syscall(sys_shutdown);
|
||||
cond_syscall(sys_sendmsg);
|
||||
cond_syscall(sys_recvmsg);
|
||||
cond_syscall(sys_socketcall);
|
||||
cond_syscall(sys_futex);
|
||||
cond_syscall(compat_sys_futex);
|
||||
cond_syscall(sys_set_robust_list);
|
||||
cond_syscall(compat_sys_set_robust_list);
|
||||
cond_syscall(sys_get_robust_list);
|
||||
cond_syscall(compat_sys_get_robust_list);
|
||||
cond_syscall(sys_epoll_create);
|
||||
cond_syscall(sys_epoll_ctl);
|
||||
cond_syscall(sys_epoll_wait);
|
||||
cond_syscall(sys_epoll_pwait);
|
||||
cond_syscall(sys_semget);
|
||||
cond_syscall(sys_semop);
|
||||
cond_syscall(sys_semtimedop);
|
||||
cond_syscall(sys_semctl);
|
||||
cond_syscall(sys_msgget);
|
||||
cond_syscall(sys_msgsnd);
|
||||
cond_syscall(sys_msgrcv);
|
||||
cond_syscall(sys_msgctl);
|
||||
cond_syscall(sys_shmget);
|
||||
cond_syscall(sys_shmat);
|
||||
cond_syscall(sys_shmdt);
|
||||
cond_syscall(sys_shmctl);
|
||||
cond_syscall(sys_mq_open);
|
||||
cond_syscall(sys_mq_unlink);
|
||||
cond_syscall(sys_mq_timedsend);
|
||||
cond_syscall(sys_mq_timedreceive);
|
||||
cond_syscall(sys_mq_notify);
|
||||
cond_syscall(sys_mq_getsetattr);
|
||||
cond_syscall(compat_sys_mq_open);
|
||||
cond_syscall(compat_sys_mq_timedsend);
|
||||
cond_syscall(compat_sys_mq_timedreceive);
|
||||
cond_syscall(compat_sys_mq_notify);
|
||||
cond_syscall(compat_sys_mq_getsetattr);
|
||||
cond_syscall(sys_mbind);
|
||||
cond_syscall(sys_get_mempolicy);
|
||||
cond_syscall(sys_set_mempolicy);
|
||||
cond_syscall(compat_sys_mbind);
|
||||
cond_syscall(compat_sys_get_mempolicy);
|
||||
cond_syscall(compat_sys_set_mempolicy);
|
||||
cond_syscall(sys_add_key);
|
||||
cond_syscall(sys_request_key);
|
||||
cond_syscall(sys_keyctl);
|
||||
cond_syscall(compat_sys_keyctl);
|
||||
cond_syscall(compat_sys_socketcall);
|
||||
cond_syscall(sys_inotify_init);
|
||||
cond_syscall(sys_inotify_add_watch);
|
||||
cond_syscall(sys_inotify_rm_watch);
|
||||
cond_syscall(sys_migrate_pages);
|
||||
cond_syscall(sys_move_pages);
|
||||
cond_syscall(sys_chown16);
|
||||
cond_syscall(sys_fchown16);
|
||||
cond_syscall(sys_getegid16);
|
||||
cond_syscall(sys_geteuid16);
|
||||
cond_syscall(sys_getgid16);
|
||||
cond_syscall(sys_getgroups16);
|
||||
cond_syscall(sys_getresgid16);
|
||||
cond_syscall(sys_getresuid16);
|
||||
cond_syscall(sys_getuid16);
|
||||
cond_syscall(sys_lchown16);
|
||||
cond_syscall(sys_setfsgid16);
|
||||
cond_syscall(sys_setfsuid16);
|
||||
cond_syscall(sys_setgid16);
|
||||
cond_syscall(sys_setgroups16);
|
||||
cond_syscall(sys_setregid16);
|
||||
cond_syscall(sys_setresgid16);
|
||||
cond_syscall(sys_setresuid16);
|
||||
cond_syscall(sys_setreuid16);
|
||||
cond_syscall(sys_setuid16);
|
||||
cond_syscall(sys_vm86old);
|
||||
cond_syscall(sys_vm86);
|
||||
cond_syscall(compat_sys_ipc);
|
||||
cond_syscall(compat_sys_sysctl);
|
||||
|
||||
/* arch-specific weak syscall entries */
|
||||
cond_syscall(sys_pciconfig_read);
|
||||
cond_syscall(sys_pciconfig_write);
|
||||
cond_syscall(sys_pciconfig_iobase);
|
||||
cond_syscall(sys32_ipc);
|
||||
cond_syscall(sys32_sysctl);
|
||||
cond_syscall(ppc_rtas);
|
||||
cond_syscall(sys_spu_run);
|
||||
cond_syscall(sys_spu_create);
|
||||
|
||||
/* mmu depending weak syscall entries */
|
||||
cond_syscall(sys_mprotect);
|
||||
cond_syscall(sys_msync);
|
||||
cond_syscall(sys_mlock);
|
||||
cond_syscall(sys_munlock);
|
||||
cond_syscall(sys_mlockall);
|
||||
cond_syscall(sys_munlockall);
|
||||
cond_syscall(sys_mincore);
|
||||
cond_syscall(sys_madvise);
|
||||
cond_syscall(sys_mremap);
|
||||
cond_syscall(sys_remap_file_pages);
|
||||
cond_syscall(compat_sys_move_pages);
|
||||
cond_syscall(compat_sys_migrate_pages);
|
||||
|
||||
/* block-layer dependent */
|
||||
cond_syscall(sys_bdflush);
|
||||
cond_syscall(sys_ioprio_set);
|
||||
cond_syscall(sys_ioprio_get);
|
||||
2448
kernel/sysctl.c
Normal file
2448
kernel/sysctl.c
Normal file
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user