diff options
413 files changed, 17758 insertions, 5048 deletions
diff --git a/Documentation/irqflags-tracing.txt b/Documentation/irqflags-tracing.txt new file mode 100644 index 000000000000..6a444877ee0b --- /dev/null +++ b/Documentation/irqflags-tracing.txt @@ -0,0 +1,57 @@ +IRQ-flags state tracing + +started by Ingo Molnar <mingo@redhat.com> + +the "irq-flags tracing" feature "traces" hardirq and softirq state, in +that it gives interested subsystems an opportunity to be notified of +every hardirqs-off/hardirqs-on, softirqs-off/softirqs-on event that +happens in the kernel. + +CONFIG_TRACE_IRQFLAGS_SUPPORT is needed for CONFIG_PROVE_SPIN_LOCKING +and CONFIG_PROVE_RW_LOCKING to be offered by the generic lock debugging +code. Otherwise only CONFIG_PROVE_MUTEX_LOCKING and +CONFIG_PROVE_RWSEM_LOCKING will be offered on an architecture - these +are locking APIs that are not used in IRQ context. (the one exception +for rwsems is worked around) + +architecture support for this is certainly not in the "trivial" +category, because lots of lowlevel assembly code deal with irq-flags +state changes. But an architecture can be irq-flags-tracing enabled in a +rather straightforward and risk-free manner. + +Architectures that want to support this need to do a couple of +code-organizational changes first: + +- move their irq-flags manipulation code from their asm/system.h header + to asm/irqflags.h + +- rename local_irq_disable()/etc to raw_local_irq_disable()/etc. so that + the linux/irqflags.h code can inject callbacks and can construct the + real local_irq_disable()/etc APIs. + +- add and enable TRACE_IRQFLAGS_SUPPORT in their arch level Kconfig file + +and then a couple of functional changes are needed as well to implement +irq-flags-tracing support: + +- in lowlevel entry code add (build-conditional) calls to the + trace_hardirqs_off()/trace_hardirqs_on() functions. The lock validator + closely guards whether the 'real' irq-flags matches the 'virtual' + irq-flags state, and complains loudly (and turns itself off) if the + two do not match. Usually most of the time for arch support for + irq-flags-tracing is spent in this state: look at the lockdep + complaint, try to figure out the assembly code we did not cover yet, + fix and repeat. Once the system has booted up and works without a + lockdep complaint in the irq-flags-tracing functions arch support is + complete. +- if the architecture has non-maskable interrupts then those need to be + excluded from the irq-tracing [and lock validation] mechanism via + lockdep_off()/lockdep_on(). + +in general there is no risk from having an incomplete irq-flags-tracing +implementation in an architecture: lockdep will detect that and will +turn itself off. I.e. the lock validator will still be reliable. There +should be no crashes due to irq-tracing bugs. (except if the assembly +changes break other code by modifying conditions or registers that +shouldnt be) + diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 86e9282d1c20..149f62ba14a5 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -435,6 +435,15 @@ running once the system is up. debug [KNL] Enable kernel debugging (events log level). + debug_locks_verbose= + [KNL] verbose self-tests + Format=<0|1> + Print debugging info while doing the locking API + self-tests. + We default to 0 (no extra messages), setting it to + 1 will print _a lot_ more information - normally + only useful to kernel developers. + decnet= [HW,NET] Format: <area>[,<node>] See also Documentation/networking/decnet.txt. diff --git a/Documentation/lockdep-design.txt b/Documentation/lockdep-design.txt new file mode 100644 index 000000000000..00d93605bfd3 --- /dev/null +++ b/Documentation/lockdep-design.txt @@ -0,0 +1,197 @@ +Runtime locking correctness validator +===================================== + +started by Ingo Molnar <mingo@redhat.com> +additions by Arjan van de Ven <arjan@linux.intel.com> + +Lock-class +---------- + +The basic object the validator operates upon is a 'class' of locks. + +A class of locks is a group of locks that are logically the same with +respect to locking rules, even if the locks may have multiple (possibly +tens of thousands of) instantiations. For example a lock in the inode +struct is one class, while each inode has its own instantiation of that +lock class. + +The validator tracks the 'state' of lock-classes, and it tracks +dependencies between different lock-classes. The validator maintains a +rolling proof that the state and the dependencies are correct. + +Unlike an lock instantiation, the lock-class itself never goes away: when +a lock-class is used for the first time after bootup it gets registered, +and all subsequent uses of that lock-class will be attached to this +lock-class. + +State +----- + +The validator tracks lock-class usage history into 5 separate state bits: + +- 'ever held in hardirq context' [ == hardirq-safe ] +- 'ever held in softirq context' [ == softirq-safe ] +- 'ever held with hardirqs enabled' [ == hardirq-unsafe ] +- 'ever held with softirqs and hardirqs enabled' [ == softirq-unsafe ] + +- 'ever used' [ == !unused ] + +Single-lock state rules: +------------------------ + +A softirq-unsafe lock-class is automatically hardirq-unsafe as well. The +following states are exclusive, and only one of them is allowed to be +set for any lock-class: + + <hardirq-safe> and <hardirq-unsafe> + <softirq-safe> and <softirq-unsafe> + +The validator detects and reports lock usage that violate these +single-lock state rules. + +Multi-lock dependency rules: +---------------------------- + +The same lock-class must not be acquired twice, because this could lead +to lock recursion deadlocks. + +Furthermore, two locks may not be taken in different order: + + <L1> -> <L2> + <L2> -> <L1> + +because this could lead to lock inversion deadlocks. (The validator +finds such dependencies in arbitrary complexity, i.e. there can be any +other locking sequence between the acquire-lock operations, the +validator will still track all dependencies between locks.) + +Furthermore, the following usage based lock dependencies are not allowed +between any two lock-classes: + + <hardirq-safe> -> <hardirq-unsafe> + <softirq-safe> -> <softirq-unsafe> + +The first rule comes from the fact the a hardirq-safe lock could be +taken by a hardirq context, interrupting a hardirq-unsafe lock - and +thus could result in a lock inversion deadlock. Likewise, a softirq-safe +lock could be taken by an softirq context, interrupting a softirq-unsafe +lock. + +The above rules are enforced for any locking sequence that occurs in the +kernel: when acquiring a new lock, the validator checks whether there is +any rule violation between the new lock and any of the held locks. + +When a lock-class changes its state, the following aspects of the above +dependency rules are enforced: + +- if a new hardirq-safe lock is discovered, we check whether it + took any hardirq-unsafe lock in the past. + +- if a new softirq-safe lock is discovered, we check whether it took + any softirq-unsafe lock in the past. + +- if a new hardirq-unsafe lock is discovered, we check whether any + hardirq-safe lock took it in the past. + +- if a new softirq-unsafe lock is discovered, we check whether any + softirq-safe lock took it in the past. + +(Again, we do these checks too on the basis that an interrupt context +could interrupt _any_ of the irq-unsafe or hardirq-unsafe locks, which +could lead to a lock inversion deadlock - even if that lock scenario did +not trigger in practice yet.) + +Exception: Nested data dependencies leading to nested locking +------------------------------------------------------------- + +There are a few cases where the Linux kernel acquires more than one +instance of the same lock-class. Such cases typically happen when there +is some sort of hierarchy within objects of the same type. In these +cases there is an inherent "natural" ordering between the two objects +(defined by the properties of the hierarchy), and the kernel grabs the +locks in this fixed order on each of the objects. + +An example of such an object hieararchy that results in "nested locking" +is that of a "whole disk" block-dev object and a "partition" block-dev +object; the partition is "part of" the whole device and as long as one +always takes the whole disk lock as a higher lock than the partition +lock, the lock ordering is fully correct. The validator does not +automatically detect this natural ordering, as the locking rule behind +the ordering is not static. + +In order to teach the validator about this correct usage model, new +versions of the various locking primitives were added that allow you to +specify a "nesting level". An example call, for the block device mutex, +looks like this: + +enum bdev_bd_mutex_lock_class +{ + BD_MUTEX_NORMAL, + BD_MUTEX_WHOLE, + BD_MUTEX_PARTITION +}; + + mutex_lock_nested(&bdev->bd_contains->bd_mutex, BD_MUTEX_PARTITION); + +In this case the locking is done on a bdev object that is known to be a +partition. + +The validator treats a lock that is taken in such a nested fasion as a +separate (sub)class for the purposes of validation. + +Note: When changing code to use the _nested() primitives, be careful and +check really thoroughly that the hiearchy is correctly mapped; otherwise +you can get false positives or false negatives. + +Proof of 100% correctness: +-------------------------- + +The validator achieves perfect, mathematical 'closure' (proof of locking +correctness) in the sense that for every simple, standalone single-task +locking sequence that occured at least once during the lifetime of the +kernel, the validator proves it with a 100% certainty that no +combination and timing of these locking sequences can cause any class of +lock related deadlock. [*] + +I.e. complex multi-CPU and multi-task locking scenarios do not have to +occur in practice to prove a deadlock: only the simple 'component' +locking chains have to occur at least once (anytime, in any +task/context) for the validator to be able to prove correctness. (For +example, complex deadlocks that would normally need more than 3 CPUs and +a very unlikely constellation of tasks, irq-contexts and timings to +occur, can be detected on a plain, lightly loaded single-CPU system as +well!) + +This radically decreases the complexity of locking related QA of the +kernel: what has to be done during QA is to trigger as many "simple" +single-task locking dependencies in the kernel as possible, at least +once, to prove locking correctness - instead of having to trigger every +possible combination of locking interaction between CPUs, combined with +every possible hardirq and softirq nesting scenario (which is impossible +to do in practice). + +[*] assuming that the validator itself is 100% correct, and no other + part of the system corrupts the state of the validator in any way. + We also assume that all NMI/SMM paths [which could interrupt + even hardirq-disabled codepaths] are correct and do not interfere + with the validator. We also assume that the 64-bit 'chain hash' + value is unique for every lock-chain in the system. Also, lock + recursion must not be higher than 20. + +Performance: +------------ + +The above rules require _massive_ amounts of runtime checking. If we did +that for every lock taken and for every irqs-enable event, it would +render the system practically unusably slow. The complexity of checking +is O(N^2), so even with just a few hundred lock-classes we'd have to do +tens of thousands of checks for every event. + +This problem is solved by checking any given 'locking scenario' (unique +sequence of locks taken after each other) only once. A simple stack of +held locks is maintained, and a lightweight 64-bit hash value is +calculated, which hash is unique for every lock chain. The hash value, +when the chain is validated for the first time, is then put into a hash +table, which hash-table can be checked in a lockfree manner. If the +locking chain occurs again later on, the hash table tells us that we +dont have to validate the chain again. diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt index 217e51768b87..3c62e66e1fcc 100644 --- a/Documentation/powerpc/booting-without-of.txt +++ b/Documentation/powerpc/booting-without-of.txt @@ -1436,9 +1436,9 @@ platforms are moved over to use the flattened-device-tree model. interrupts = <1d 3>; interrupt-parent = <40000>; num-channels = <4>; - channel-fifo-len = <24>; + channel-fifo-len = <18>; exec-units-mask = <000000fe>; - descriptor-types-mask = <073f1127>; + descriptor-types-mask = <012b0ebf>; }; diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 86754eb390da..7cee90223d3a 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -28,6 +28,7 @@ Currently, these files are in /proc/sys/vm: - block_dump - drop-caches - zone_reclaim_mode +- min_unmapped_ratio - panic_on_oom ============================================================== @@ -168,6 +169,19 @@ in all nodes of the system. ============================================================= +min_unmapped_ratio: + +This is available only on NUMA kernels. + +A percentage of the file backed pages in each zone. Zone reclaim will only +occur if more than this percentage of pages are file backed and unmapped. +This is to insure that a minimal amount of local pages is still available for +file I/O even if the node is overallocated. + +The default is 1 percent. + +============================================================= + panic_on_oom This enables or disables panic on out-of-memory feature. If this is set to 1, diff --git a/MAINTAINERS b/MAINTAINERS index 42be131139c8..5f76a4f5cd4b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -861,6 +861,8 @@ S: Maintained DOCBOOK FOR DOCUMENTATION P: Martin Waitz M: tali@admingilde.org +P: Randy Dunlap +M: rdunlap@xenotime.net T: git http://tali.admingilde.org/git/linux-docbook.git S: Maintained @@ -2298,6 +2300,14 @@ M: promise@pnd-pc.demon.co.uk W: http://www.pnd-pc.demon.co.uk/promise/ S: Maintained +PVRUSB2 VIDEO4LINUX DRIVER +P: Mike Isely +M: isely@pobox.com +L: pvrusb2@isely.net +L: video4linux-list@redhat.com +W: http://www.isely.net/pvrusb2/ +S: Maintained + PXA2xx SUPPORT P: Nicolas Pitre M: nico@cam.org diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 01c8c8b23337..41ebf51a107a 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -474,7 +474,7 @@ out: */ unsigned long -thread_saved_pc(task_t *t) +thread_saved_pc(struct task_struct *t) { unsigned long base = (unsigned long)task_stack_page(t); unsigned long fp, sp = task_thread_info(t)->pcb.ksp; diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 27d8dddbaa47..daa75ce4b777 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -18,6 +18,14 @@ config GENERIC_TIME bool default y +config LOCKDEP_SUPPORT + bool + default y + +config STACKTRACE_SUPPORT + bool + default y + config SEMAPHORE_SLEEPERS bool default y diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug index c92191b1fb67..b31c0802e1cc 100644 --- a/arch/i386/Kconfig.debug +++ b/arch/i386/Kconfig.debug @@ -1,5 +1,9 @@ menu "Kernel hacking" +config TRACE_IRQFLAGS_SUPPORT + bool + default y + source "lib/Kconfig.debug" config EARLY_PRINTK @@ -31,15 +35,6 @@ config DEBUG_STACK_USAGE This option will slow down process creation somewhat. -config STACK_BACKTRACE_COLS - int "Stack backtraces per line" if DEBUG_KERNEL - range 1 3 - default 2 - help - Selects how many stack backtrace entries per line to display. - - This can save screen space when displaying traces. - comment "Page alloc debug is incompatible with Software Suspend on i386" depends on DEBUG_KERNEL && SOFTWARE_SUSPEND diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index cbc1184e9473..1b452a1665c4 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -9,6 +9,7 @@ obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ pci-dma.o i386_ksyms.o i387.o bootflag.o \ quirks.o i8237.o topology.o alternative.o i8253.o tsc.o +obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += cpu/ obj-y += acpi/ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c index 7b421b3a053e..28ab80649764 100644 --- a/arch/i386/kernel/alternative.c +++ b/arch/i386/kernel/alternative.c @@ -303,6 +303,16 @@ void alternatives_smp_switch(int smp) struct smp_alt_module *mod; unsigned long flags; +#ifdef CONFIG_LOCKDEP + /* + * A not yet fixed binutils section handling bug prevents + * alternatives-replacement from working reliably, so turn + * it off: + */ + printk("lockdep: not fixing up alternatives.\n"); + return; +#endif + if (no_replacement || smp_alt_once) return; BUG_ON(!smp && (num_online_cpus() > 1)); diff --git a/arch/i386/kernel/cpuid.c b/arch/i386/kernel/cpuid.c index a8d3ecdc3897..fde8bea85cee 100644 --- a/arch/i386/kernel/cpuid.c +++ b/arch/i386/kernel/cpuid.c @@ -167,6 +167,7 @@ static int cpuid_class_device_create(int i) return err; } +#ifdef CONFIG_HOTPLUG_CPU static int cpuid_class_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; @@ -186,6 +187,7 @@ static struct notifier_block __cpuinitdata cpuid_class_cpu_notifier = { .notifier_call = cpuid_class_cpu_callback, }; +#endif /* !CONFIG_HOTPLUG_CPU */ static int __init cpuid_init(void) { @@ -208,7 +210,7 @@ static int __init cpuid_init(void) if (err != 0) goto out_class; } - register_cpu_notifier(&cpuid_class_cpu_notifier); + register_hotcpu_notifier(&cpuid_class_cpu_notifier); err = 0; goto out; @@ -233,7 +235,7 @@ static void __exit cpuid_exit(void) class_device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu)); class_destroy(cpuid_class); unregister_chrdev(CPUID_MAJOR, "cpu/cpuid"); - unregister_cpu_notifier(&cpuid_class_cpu_notifier); + unregister_hotcpu_notifier(&cpuid_class_cpu_notifier); } module_init(cpuid_init); diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 787190c45fdb..d9a260f2efb4 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -42,6 +42,7 @@ #include <linux/linkage.h> #include <asm/thread_info.h> +#include <asm/irqflags.h> #include <asm/errno.h> #include <asm/segment.h> #include <asm/smp.h> @@ -76,12 +77,21 @@ NT_MASK = 0x00004000 VM_MASK = 0x00020000 #ifdef CONFIG_PREEMPT -#define preempt_stop cli +#define preempt_stop cli; TRACE_IRQS_OFF #else #define preempt_stop #define resume_kernel restore_nocheck #endif +.macro TRACE_IRQS_IRET +#ifdef CONFIG_TRACE_IRQFLAGS + testl $IF_MASK,EFLAGS(%esp) # interrupts off? + jz 1f + TRACE_IRQS_ON +1: +#endif +.endm + #ifdef CONFIG_VM86 #define resume_userspace_sig check_userspace #else @@ -257,6 +267,10 @@ ENTRY(sysenter_entry) CFI_REGISTER esp, ebp movl TSS_sysenter_esp0(%esp),%esp sysenter_past_esp: + /* + * No need to follow this irqs on/off section: the syscall + * disabled irqs and here we enable it straight after entry: + */ sti pushl $(__USER_DS) CFI_ADJUST_CFA_OFFSET 4 @@ -303,6 +317,7 @@ sysenter_past_esp: call *sys_call_table(,%eax,4) movl %eax,EAX(%esp) cli + TRACE_IRQS_OFF movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx jne syscall_exit_work @@ -310,6 +325,7 @@ sysenter_past_esp: movl EIP(%esp), %edx movl OLDESP(%esp), %ecx xorl %ebp,%ebp + TRACE_IRQS_ON sti sysexit CFI_ENDPROC @@ -339,6 +355,7 @@ syscall_exit: cli # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret + TRACE_IRQS_OFF movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx # current->work jne syscall_exit_work @@ -355,12 +372,15 @@ restore_all: CFI_REMEMBER_STATE je ldt_ss # returning to user-space with LDT SS restore_nocheck: + TRACE_IRQS_IRET +restore_nocheck_notrace: RESTORE_REGS addl $4, %esp CFI_ADJUST_CFA_OFFSET -4 1: iret .section .fixup,"ax" iret_exc: + TRACE_IRQS_ON sti pushl $0 # no error code pushl $do_iret_error @@ -386,11 +406,13 @@ ldt_ss: subl $8, %esp # reserve space for switch16 pointer CFI_ADJUST_CFA_OFFSET 8 cli + TRACE_IRQS_OFF movl %esp, %eax /* Set up the 16bit stack frame with switch32 pointer on top, * and a switch16 pointer on top of the current frame. */ call setup_x86_bogus_stack CFI_ADJUST_CFA_OFFSET -8 # frame has moved + TRACE_IRQS_IRET RESTORE_REGS lss 20+4(%esp), %esp # switch to 16bit stack 1: iret @@ -411,6 +433,7 @@ work_resched: cli # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret + TRACE_IRQS_OFF movl TI_flags(%ebp), %ecx andl $_TIF_WORK_MASK, %ecx # is there any work to be done other # than syscall tracing? @@ -462,6 +485,7 @@ syscall_trace_entry: syscall_exit_work: testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl jz work_pending + TRACE_IRQS_ON sti # could let do_syscall_trace() call # schedule() instead movl %esp, %eax @@ -535,9 +559,14 @@ ENTRY(irq_entries_start) vector=vector+1 .endr +/* + * the CPU automatically disables interrupts when executing an IRQ vector, + * so IRQ-flags tracing has to follow that: + */ ALIGN common_interrupt: SAVE_ALL + TRACE_IRQS_OFF movl %esp,%eax call do_IRQ jmp ret_from_intr @@ -549,9 +578,10 @@ ENTRY(name) \ pushl $~(nr); \ CFI_ADJUST_CFA_OFFSET 4; \ SAVE_ALL; \ + TRACE_IRQS_OFF \ movl %esp,%eax; \ call smp_/**/name; \ - jmp ret_from_intr; \ + jmp ret_from_intr; \ CFI_ENDPROC /* The include is where all of the SMP etc. interrupts come from */ @@ -726,7 +756,7 @@ nmi_stack_correct: xorl %edx,%edx # zero error code movl %esp,%eax # pt_regs pointer call do_nmi - jmp restore_all + jmp restore_nocheck_notrace CFI_ENDPROC nmi_stack_fixup: diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index 16b491703967..6cb529f60dcc 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c @@ -166,7 +166,7 @@ void irq_ctx_init(int cpu) irqctx->tinfo.task = NULL; irqctx->tinfo.exec_domain = NULL; irqctx->tinfo.cpu = cpu; - irqctx->tinfo.preempt_count = SOFTIRQ_OFFSET; + irqctx->tinfo.preempt_count = 0; irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); softirq_ctx[cpu] = irqctx; @@ -211,6 +211,10 @@ asmlinkage void do_softirq(void) : "0"(isp) : "memory", "cc", "edx", "ecx", "eax" ); + /* + * Shouldnt happen, we returned above if in_interrupt(): + */ + WARN_ON_ONCE(softirq_count()); } local_irq_restore(flags); diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index a76e93146585..2dd928a84645 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -107,7 +107,7 @@ int nmi_active; static __init void nmi_cpu_busy(void *data) { volatile int *endflag = data; - local_irq_enable(); + local_irq_enable_in_hardirq(); /* Intentionally don't use cpu_relax here. This is to make sure that the performance counter really ticks, even if there is a simulator or similar that catches the diff --git a/arch/i386/kernel/stacktrace.c b/arch/i386/kernel/stacktrace.c new file mode 100644 index 000000000000..e62a037ab399 --- /dev/null +++ b/arch/i386/kernel/stacktrace.c @@ -0,0 +1,98 @@ +/* + * arch/i386/kernel/stacktrace.c + * + * Stack trace management functions + * + * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + */ +#include <linux/sched.h> +#include <linux/stacktrace.h> + +static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) +{ + return p > (void *)tinfo && + p < (void *)tinfo + THREAD_SIZE - 3; +} + +/* + * Save stack-backtrace addresses into a stack_trace buffer: + */ +static inline unsigned long +save_context_stack(struct stack_trace *trace, unsigned int skip, + struct thread_info *tinfo, unsigned long *stack, + unsigned long ebp) +{ + unsigned long addr; + +#ifdef CONFIG_FRAME_POINTER + while (valid_stack_ptr(tinfo, (void *)ebp)) { + addr = *(unsigned long *)(ebp + 4); + if (!skip) + trace->entries[trace->nr_entries++] = addr; + else + skip--; + if (trace->nr_entries >= trace->max_entries) + break; + /* + * break out of recursive entries (such as + * end_of_stack_stop_unwind_function): + */ + if (ebp == *(unsigned long *)ebp) + break; + + ebp = *(unsigned long *)ebp; + } +#else + while (valid_stack_ptr(tinfo, stack)) { + addr = *stack++; + if (__kernel_text_address(addr)) { + if (!skip) + trace->entries[trace->nr_entries++] = addr; + else + skip--; + if (trace->nr_entries >= trace->max_entries) + break; + } + } +#endif + + return ebp; +} + +/* + * Save stack-backtrace addresses into a stack_trace buffer. + * If all_contexts is set, all contexts (hardirq, softirq and process) + * are saved. If not set then only the current context is saved. + */ +void save_stack_trace(struct stack_trace *trace, + struct task_struct *task, int all_contexts, + unsigned int skip) +{ + unsigned long ebp; + unsigned long *stack = &ebp; + + WARN_ON(trace->nr_entries || !trace->max_entries); + + if (!task || task == current) { + /* Grab ebp right from our regs: */ + asm ("movl %%ebp, %0" : "=r" (ebp)); + } else { + /* ebp is the last reg pushed by switch_to(): */ + ebp = *(unsigned long *) task->thread.esp; + } + + while (1) { + struct thread_info *context = (struct thread_info *) + ((unsigned long)stack & (~(THREAD_SIZE - 1))); + + ebp = save_context_stack(trace, skip, context, stack, ebp); + stack = (unsigned long *)context->previous_esp; + if (!all_contexts || !stack || + trace->nr_entries >= trace->max_entries) + break; + trace->entries[trace->nr_entries++] = ULONG_MAX; + if (trace->nr_entries >= trace->max_entries) + break; + } +} + diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index e8c6086b2aa1..2bf8b55b91f8 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -115,28 +115,13 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) } /* - * Print CONFIG_STACK_BACKTRACE_COLS address/symbol entries per line. + * Print one address/symbol entries per line. */ -static inline int print_addr_and_symbol(unsigned long addr, char *log_lvl, - int printed) +static inline void print_addr_and_symbol(unsigned long addr, char *log_lvl) { - if (!printed) - printk(log_lvl); - -#if CONFIG_STACK_BACKTRACE_COLS == 1 printk(" [<%08lx>] ", addr); -#else - printk(" <%08lx> ", addr); -#endif - print_symbol("%s", addr); - printed = (printed + 1) % CONFIG_STACK_BACKTRACE_COLS; - if (printed) - printk(" "); - else - printk("\n"); - - return printed; + print_symbol("%s\n", addr); } static inline unsigned long print_context_stack(struct thread_info *tinfo, @@ -144,12 +129,11 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo, char *log_lvl) { unsigned long addr; - int printed = 0; /* nr of entries already printed on current line */ #ifdef CONFIG_FRAME_POINTER while (valid_stack_ptr(tinfo, (void *)ebp)) { addr = *(unsigned long *)(ebp + 4); - printed = print_addr_and_symbol(addr, log_lvl, printed); + print_addr_and_symbol(addr, log_lvl); /* * break out of recursive entries (such as * end_of_stack_stop_unwind_function): @@ -162,28 +146,23 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo, while (valid_stack_ptr(tinfo, stack)) { addr = *stack++; if (__kernel_text_address(addr)) - printed = print_addr_and_symbol(addr, log_lvl, printed); + print_addr_and_symbol(addr, log_lvl); } #endif - if (printed) - printk("\n"); - return ebp; } -static asmlinkage int show_trace_unwind(struct unwind_frame_info *info, void *log_lvl) +static asmlinkage int +show_trace_unwind(struct unwind_frame_info *info, void *log_lvl) { int n = 0; - int printed = 0; /* nr of entries already printed on current line */ while (unwind(info) == 0 && UNW_PC(info)) { - ++n; - printed = print_addr_and_symbol(UNW_PC(info), log_lvl, printed); + n++; + print_addr_and_symbol(UNW_PC(info), log_lvl); if (arch_unw_user_mode(info)) break; } - if (printed) - printk("\n"); return n; } diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index eb8e8dc5ac8e..2fbe4536fe18 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -678,7 +678,7 @@ copy_reg(const u64 *fr, u64 fnat, u64 *tr, u64 *tnat) */ static void -ia64_mca_modify_comm(const task_t *previous_current) +ia64_mca_modify_comm(const struct task_struct *previous_current) { char *p, comm[sizeof(current->comm)]; if (previous_current->pid) @@ -709,7 +709,7 @@ ia64_mca_modify_comm(const task_t *previous_current) * that we can do backtrace on the MCA/INIT handler code itself. */ -static task_t * +static struct task_struct * ia64_mca_modify_original_stack(struct pt_regs *regs, const struct switch_stack *sw, struct ia64_sal_os_state *sos, @@ -719,7 +719,7 @@ ia64_mca_modify_original_stack(struct pt_regs *regs, ia64_va va; extern char ia64_leave_kernel[]; /* Need asm address, not function descriptor */ const pal_min_state_area_t *ms = sos->pal_min_state; - task_t *previous_current; + struct task_struct *previous_current; struct pt_regs *old_regs; struct switch_stack *old_sw; unsigned size = sizeof(struct pt_regs) + @@ -1023,7 +1023,7 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, pal_processor_state_info_t *psp = (pal_processor_state_info_t *) &sos->proc_state_param; int recover, cpu = smp_processor_id(); - task_t *previous_current; + struct task_struct *previous_current; struct ia64_mca_notify_die nd = { .sos = sos, .monarch_cpu = &monarch_cpu }; @@ -1352,7 +1352,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, { static atomic_t slaves; static atomic_t monarchs; - task_t *previous_current; + struct task_struct *previous_current; int cpu = smp_processor_id(); struct ia64_mca_notify_die nd = { .sos = sos, .monarch_cpu = &monarch_cpu }; diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index e1960979be29..6203ed4ec8cf 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -124,7 +124,7 @@ extern void __devinit calibrate_delay (void); extern void start_ap (void); extern unsigned long ia64_iobase; -task_t *task_for_booting_cpu; +struct task_struct *task_for_booting_cpu; /* * State for each CPU diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S index ecfd637d702a..01e7fa86aa43 100644 --- a/arch/mips/kernel/entry.S +++ b/arch/mips/kernel/entry.S @@ -65,7 +65,7 @@ need_resched: #endif FEXPORT(ret_from_fork) - jal schedule_tail # a0 = task_t *prev + jal schedule_tail # a0 = struct task_struct *prev FEXPORT(syscall_exit) local_irq_disable # make sure need_resched and diff --git a/arch/mips/kernel/mips-mt.c b/arch/mips/kernel/mips-mt.c index 02237a685ec7..4dcc39f42951 100644 --- a/arch/mips/kernel/mips-mt.c +++ b/arch/mips/kernel/mips-mt.c @@ -47,7 +47,7 @@ unsigned long mt_fpemul_threshold = 0; * used in sys_sched_set/getaffinity() in kernel/sched.c, so * cloned here. */ -static inline task_t *find_process_by_pid(pid_t pid) +static inline struct task_struct *find_process_by_pid(pid_t pid) { return pid ? find_task_by_pid(pid) : current; } @@ -62,7 +62,7 @@ asmlinkage long mipsmt_sys_sched_setaffinity(pid_t pid, unsigned int len, cpumask_t new_mask; cpumask_t effective_mask; int retval; - task_t *p; + struct task_struct *p; if (len < sizeof(new_mask)) return -EINVAL; @@ -127,7 +127,7 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len, unsigned int real_len; cpumask_t mask; int retval; - task_t *p; + struct task_struct *p; real_len = sizeof(mask); if (len < real_len) diff --git a/arch/powerpc/configs/chrp32_defconfig b/arch/powerpc/configs/chrp32_defconfig new file mode 100644 index 000000000000..0fa010a63a8e --- /dev/null +++ b/arch/powerpc/configs/chrp32_defconfig @@ -0,0 +1,1378 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.17 +# Mon Jul 3 12:08:41 2006 +# +# CONFIG_PPC64 is not set +CONFIG_PPC32=y +CONFIG_PPC_MERGE=y +CONFIG_MMU=y +CONFIG_GENERIC_HARDIRQS=y +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +CONFIG_GENERIC_HWEIGHT=y +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_GENERIC_FIND_NEXT_BIT=y +CONFIG_PPC=y +CONFIG_EARLY_PRINTK=y +CONFIG_GENERIC_NVRAM=y +CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y +CONFIG_ARCH_MAY_HAVE_PC_FDC=y +CONFIG_PPC_OF=y +CONFIG_PPC_UDBG_16550=y +CONFIG_GENERIC_TBSYNC=y +# CONFIG_DEFAULT_UIMAGE is not set + +# +# Processor support +# +CONFIG_CLASSIC32=y +# CONFIG_PPC_52xx is not set +# CONFIG_PPC_82xx is not set +# CONFIG_PPC_83xx is not set +# CONFIG_PPC_85xx is not set +# CONFIG_PPC_86xx is not set +# CONFIG_40x is not set +# CONFIG_44x is not set +# CONFIG_8xx is not set +# CONFIG_E200 is not set +CONFIG_6xx=y +CONFIG_PPC_FPU=y +# CONFIG_ALTIVEC is not set +CONFIG_PPC_STD_MMU=y +CONFIG_PPC_STD_MMU_32=y +CONFIG_SMP=y +CONFIG_NR_CPUS=4 + +# +# Code maturity level options +# +CONFIG_EXPERIMENTAL=y +CONFIG_LOCK_KERNEL=y +CONFIG_INIT_ENV_ARG_LIMIT=32 + +# +# General setup +# +CONFIG_LOCALVERSION="" +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +# CONFIG_BSD_PROCESS_ACCT is not set +CONFIG_SYSCTL=y +# CONFIG_AUDIT is not set +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +# CONFIG_CPUSETS is not set +# CONFIG_RELAY is not set +CONFIG_INITRAMFS_SOURCE="" +# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set +# CONFIG_EMBEDDED is not set +CONFIG_KALLSYMS=y +# CONFIG_KALLSYMS_ALL is not set +# CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_HOTPLUG=y +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_ELF_CORE=y +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_EPOLL=y +CONFIG_SHMEM=y +CONFIG_SLAB=y +# CONFIG_TINY_SHMEM is not set +CONFIG_BASE_SMALL=0 +# CONFIG_SLOB is not set + +# +# Loadable module support +# +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y +# CONFIG_MODVERSIONS is not set +# CONFIG_MODULE_SRCVERSION_ALL is not set +CONFIG_KMOD=y +CONFIG_STOP_MACHINE=y + +# +# Block layer +# +CONFIG_LBD=y +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_LSF is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +CONFIG_DEFAULT_AS=y +# CONFIG_DEFAULT_DEADLINE is not set +# CONFIG_DEFAULT_CFQ is not set +# CONFIG_DEFAULT_NOOP is not set +CONFIG_DEFAULT_IOSCHED="anticipatory" + +# +# Platform support +# +CONFIG_PPC_MULTIPLATFORM=y +# CONFIG_PPC_ISERIES is not set +# CONFIG_EMBEDDED6xx is not set +# CONFIG_APUS is not set +CONFIG_PPC_CHRP=y +# CONFIG_PPC_PMAC is not set +# CONFIG_PPC_CELL is not set +# CONFIG_PPC_CELL_NATIVE is not set +CONFIG_MPIC=y +CONFIG_PPC_RTAS=y +# CONFIG_RTAS_ERROR_LOGGING is not set +CONFIG_RTAS_PROC=y +# CONFIG_MMIO_NVRAM is not set +CONFIG_PPC_MPC106=y +# CONFIG_PPC_970_NAP is not set +# CONFIG_CPU_FREQ is not set +# CONFIG_TAU is not set +# CONFIG_WANT_EARLY_SERIAL is not set + +# +# Kernel options +# +CONFIG_HIGHMEM=y +# CONFIG_HZ_100 is not set +CONFIG_HZ_250=y +# CONFIG_HZ_1000 is not set +CONFIG_HZ=250 +CONFIG_PREEMPT_NONE=y +# CONFIG_PREEMPT_VOLUNTARY is not set +# CONFIG_PREEMPT is not set +CONFIG_PREEMPT_BKL=y +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_MISC=y +# CONFIG_KEXEC is not set +CONFIG_IRQ_ALL_CPUS=y +CONFIG_ARCH_FLATMEM_ENABLE=y +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y +# CONFIG_SPARSEMEM_STATIC is not set +CONFIG_SPLIT_PTLOCK_CPUS=4 +CONFIG_PROC_DEVICETREE=y +# CONFIG_CMDLINE_BOOL is not set +# CONFIG_PM is not set +CONFIG_SECCOMP=y +CONFIG_ISA_DMA_API=y + +# +# Bus options +# +CONFIG_ISA=y +CONFIG_GENERIC_ISA_DMA=y +CONFIG_PPC_I8259=y +CONFIG_PPC_INDIRECT_PCI=y +CONFIG_PCI=y +CONFIG_PCI_DOMAINS=y +# CONFIG_PCIEPORTBUS is not set +# CONFIG_PCI_DEBUG is not set + +# +# PCCARD (PCMCIA/CardBus) support +# +# CONFIG_PCCARD is not set + +# +# PCI Hotplug Support +# +# CONFIG_HOTPLUG_PCI is not set + +# +# Advanced setup +# +# CONFIG_ADVANCED_OPTIONS is not set + +# +# Default settings for advanced configuration options are used +# +CONFIG_HIGHMEM_START=0xfe000000 +CONFIG_LOWMEM_SIZE=0x30000000 +CONFIG_KERNEL_START=0xc0000000 +CONFIG_TASK_SIZE=0x80000000 +CONFIG_BOOT_LOAD=0x00800000 + +# +# Networking +# +CONFIG_NET=y + +# +# Networking options +# +# CONFIG_NETDEBUG is not set +CONFIG_PACKET=y +# CONFIG_PACKET_MMAP is not set +CONFIG_UNIX=y +# CONFIG_NET_KEY is not set +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +# CONFIG_IP_ADVANCED_ROUTER is not set +CONFIG_IP_FIB_HASH=y +# CONFIG_IP_PNP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_IP_MROUTE is not set +# CONFIG_ARPD is not set +CONFIG_SYN_COOKIES=y +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_XFRM_TUNNEL is not set +# CONFIG_INET_TUNNEL is not set +# CONFIG_INET_XFRM_MODE_TRANSPORT is not set +# CONFIG_INET_XFRM_MODE_TUNNEL is not set +CONFIG_INET_DIAG=y +CONFIG_INET_TCP_DIAG=y +# CONFIG_TCP_CONG_ADVANCED is not set +CONFIG_TCP_CONG_BIC=y + +# +# IP: Virtual Server Configuration +# +# CONFIG_IP_VS is not set +# CONFIG_IPV6 is not set +# CONFIG_INET6_XFRM_TUNNEL is not set +# CONFIG_INET6_TUNNEL is not set +# CONFIG_NETWORK_SECMARK is not set +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_DEBUG is not set + +# +# Core Netfilter Configuration +# +# CONFIG_NETFILTER_NETLINK is not set +# CONFIG_NETFILTER_XTABLES is not set + +# +# IP: Netfilter Configuration +# +CONFIG_IP_NF_CONNTRACK=m +# CONFIG_IP_NF_CT_ACCT is not set +# CONFIG_IP_NF_CONNTRACK_MARK is not set +# CONFIG_IP_NF_CONNTRACK_EVENTS is not set +# CONFIG_IP_NF_CT_PROTO_SCTP is not set +CONFIG_IP_NF_FTP=m +CONFIG_IP_NF_IRC=m +# CONFIG_IP_NF_NETBIOS_NS is not set +CONFIG_IP_NF_TFTP=m +CONFIG_IP_NF_AMANDA=m +# CONFIG_IP_NF_PPTP is not set +# CONFIG_IP_NF_H323 is not set +# CONFIG_IP_NF_SIP is not set +# CONFIG_IP_NF_QUEUE is not set + +# +# DCCP Configuration (EXPERIMENTAL) +# +# CONFIG_IP_DCCP is not set + +# +# SCTP Configuration (EXPERIMENTAL) +# +# CONFIG_IP_SCTP is not set + +# +# TIPC Configuration (EXPERIMENTAL) +# +# CONFIG_TIPC is not set +# CONFIG_ATM is not set +# CONFIG_BRIDGE is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_NET_DIVERT is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set + +# +# QoS and/or fair queueing +# +# CONFIG_NET_SCHED is not set + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_HAMRADIO is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set +# CONFIG_IEEE80211 is not set + +# +# Device Drivers +# + +# +# Generic Driver Options +# +# CONFIG_STANDALONE is not set +CONFIG_PREVENT_FIRMWARE_BUILD=y +# CONFIG_FW_LOADER is not set +# CONFIG_DEBUG_DRIVER is not set +# CONFIG_SYS_HYPERVISOR is not set + +# +# Connector - unified userspace <-> kernelspace linker +# +# CONFIG_CONNECTOR is not set + +# +# Memory Technology Devices (MTD) +# +# CONFIG_MTD is not set + +# +# Parallel port support +# +# CONFIG_PARPORT is not set + +# +# Plug and Play support +# +# CONFIG_PNP is not set + +# +# Block devices +# +CONFIG_BLK_DEV_FD=y +# CONFIG_BLK_DEV_XD is not set +# CONFIG_BLK_CPQ_DA is not set +# CONFIG_BLK_CPQ_CISS_DA is not set +# CONFIG_BLK_DEV_DAC960 is not set +# CONFIG_BLK_DEV_UMEM is not set +# CONFIG_BLK_DEV_COW_COMMON is not set +CONFIG_BLK_DEV_LOOP=y +# CONFIG_BLK_DEV_CRYPTOLOOP is not set +# CONFIG_BLK_DEV_NBD is not set +# CONFIG_BLK_DEV_SX8 is not set +# CONFIG_BLK_DEV_UB is not set +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_COUNT=16 +CONFIG_BLK_DEV_RAM_SIZE=4096 +CONFIG_BLK_DEV_INITRD=y +# CONFIG_CDROM_PKTCDVD is not set +# CONFIG_ATA_OVER_ETH is not set + +# +# ATA/ATAPI/MFM/RLL support +# +CONFIG_IDE=y +CONFIG_BLK_DEV_IDE=y + +# +# Please see Documentation/ide.txt for help/info on IDE drives +# +# CONFIG_BLK_DEV_IDE_SATA is not set +CONFIG_BLK_DEV_IDEDISK=y +CONFIG_IDEDISK_MULTI_MODE=y +CONFIG_BLK_DEV_IDECD=y +# CONFIG_BLK_DEV_IDETAPE is not set +# CONFIG_BLK_DEV_IDEFLOPPY is not set +# CONFIG_BLK_DEV_IDESCSI is not set +# CONFIG_IDE_TASK_IOCTL is not set + +# +# IDE chipset support/bugfixes +# +CONFIG_IDE_GENERIC=y +CONFIG_BLK_DEV_IDEPCI=y +CONFIG_IDEPCI_SHARE_IRQ=y +# CONFIG_BLK_DEV_OFFBOARD is not set +CONFIG_BLK_DEV_GENERIC=y +# CONFIG_BLK_DEV_OPTI621 is not set +CONFIG_BLK_DEV_SL82C105=y +CONFIG_BLK_DEV_IDEDMA_PCI=y +# CONFIG_BLK_DEV_IDEDMA_FORCED is not set +CONFIG_IDEDMA_PCI_AUTO=y +# CONFIG_IDEDMA_ONLYDISK is not set +# CONFIG_BLK_DEV_AEC62XX is not set +# CONFIG_BLK_DEV_ALI15X3 is not set +# CONFIG_BLK_DEV_AMD74XX is not set +# CONFIG_BLK_DEV_CMD64X is not set +# CONFIG_BLK_DEV_TRIFLEX is not set +# CONFIG_BLK_DEV_CY82C693 is not set +# CONFIG_BLK_DEV_CS5520 is not set +# CONFIG_BLK_DEV_CS5530 is not set +# CONFIG_BLK_DEV_HPT34X is not set +# CONFIG_BLK_DEV_HPT366 is not set +# CONFIG_BLK_DEV_SC1200 is not set +# CONFIG_BLK_DEV_PIIX is not set +# CONFIG_BLK_DEV_IT821X is not set +# CONFIG_BLK_DEV_NS87415 is not set +# CONFIG_BLK_DEV_PDC202XX_OLD is not set +# CONFIG_BLK_DEV_PDC202XX_NEW is not set +# CONFIG_BLK_DEV_SVWKS is not set +# CONFIG_BLK_DEV_SIIMAGE is not set +# CONFIG_BLK_DEV_SLC90E66 is not set +# CONFIG_BLK_DEV_TRM290 is not set +CONFIG_BLK_DEV_VIA82CXXX=y +# CONFIG_IDE_ARM is not set +# CONFIG_IDE_CHIPSETS is not set +CONFIG_BLK_DEV_IDEDMA=y +# CONFIG_IDEDMA_IVB is not set +CONFIG_IDEDMA_AUTO=y +# CONFIG_BLK_DEV_HD is not set + +# +# SCSI device support +# +# CONFIG_RAID_ATTRS is not set +CONFIG_SCSI=y +CONFIG_SCSI_PROC_FS=y + +# +# SCSI support type (disk, tape, CD-ROM) +# +CONFIG_BLK_DEV_SD=y +CONFIG_CHR_DEV_ST=y +# CONFIG_CHR_DEV_OSST is not set +CONFIG_BLK_DEV_SR=y +CONFIG_BLK_DEV_SR_VENDOR=y +CONFIG_CHR_DEV_SG=y +# CONFIG_CHR_DEV_SCH is not set + +# +# Some SCSI devices (e.g. CD jukebox) support multiple LUNs +# +# CONFIG_SCSI_MULTI_LUN is not set +CONFIG_SCSI_CONSTANTS=y +# CONFIG_SCSI_LOGGING is not set + +# +# SCSI Transport Attributes +# +CONFIG_SCSI_SPI_ATTRS=y +# CONFIG_SCSI_FC_ATTRS is not set +# CONFIG_SCSI_ISCSI_ATTRS is not set +# CONFIG_SCSI_SAS_ATTRS is not set + +# +# SCSI low-level drivers +# +# CONFIG_ISCSI_TCP is not set +# CONFIG_BLK_DEV_3W_XXXX_RAID is not set +# CONFIG_SCSI_3W_9XXX is not set +# CONFIG_SCSI_7000FASST is not set +# CONFIG_SCSI_ACARD is not set +# CONFIG_SCSI_AHA152X is not set +# CONFIG_SCSI_AHA1542 is not set +# CONFIG_SCSI_AACRAID is not set +# CONFIG_SCSI_AIC7XXX is not set +# CONFIG_SCSI_AIC7XXX_OLD is not set +# CONFIG_SCSI_AIC79XX is not set +# CONFIG_SCSI_DPT_I2O is not set +# CONFIG_SCSI_IN2000 is not set +# CONFIG_MEGARAID_NEWGEN is not set +# CONFIG_MEGARAID_LEGACY is not set +# CONFIG_MEGARAID_SAS is not set +# CONFIG_SCSI_SATA is not set +# CONFIG_SCSI_HPTIOP is not set +# CONFIG_SCSI_BUSLOGIC is not set +# CONFIG_SCSI_DMX3191D is not set +# CONFIG_SCSI_DTC3280 is not set +# CONFIG_SCSI_EATA is not set +# CONFIG_SCSI_FUTURE_DOMAIN is not set +# CONFIG_SCSI_GDTH is not set +# CONFIG_SCSI_GENERIC_NCR5380 is not set +# CONFIG_SCSI_GENERIC_NCR5380_MMIO is not set +# CONFIG_SCSI_IPS is not set +# CONFIG_SCSI_INITIO is not set +# CONFIG_SCSI_INIA100 is not set +# CONFIG_SCSI_NCR53C406A is not set +CONFIG_SCSI_SYM53C8XX_2=y +CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0 +CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 +CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 +CONFIG_SCSI_SYM53C8XX_MMIO=y +# CONFIG_SCSI_IPR is not set +# CONFIG_SCSI_PAS16 is not set +# CONFIG_SCSI_PSI240I is not set +# CONFIG_SCSI_QLOGIC_FAS is not set +# CONFIG_SCSI_QLOGIC_1280 is not set +# CONFIG_SCSI_QLA_FC is not set +# CONFIG_SCSI_LPFC is not set +# CONFIG_SCSI_SYM53C416 is not set +# CONFIG_SCSI_DC395x is not set +# CONFIG_SCSI_DC390T is not set +# CONFIG_SCSI_T128 is not set +# CONFIG_SCSI_U14_34F is not set +# CONFIG_SCSI_NSP32 is not set +# CONFIG_SCSI_DEBUG is not set + +# +# Old CD-ROM drivers (not SCSI, not IDE) +# +# CONFIG_CD_NO_IDESCSI is not set + +# +# Multi-device support (RAID and LVM) +# +# CONFIG_MD is not set + +# +# Fusion MPT device support +# +# CONFIG_FUSION is not set +# CONFIG_FUSION_SPI is not set +# CONFIG_FUSION_FC is not set +# CONFIG_FUSION_SAS is not set + +# +# IEEE 1394 (FireWire) support +# +# CONFIG_IEEE1394 is not set + +# +# I2O device support +# +# CONFIG_I2O is not set + +# +# Macintosh device drivers +# +# CONFIG_WINDFARM is not set + +# +# Network device support +# +CONFIG_NETDEVICES=y +# CONFIG_DUMMY is not set +# CONFIG_BONDING is not set +# CONFIG_EQUALIZER is not set +# CONFIG_TUN is not set + +# +# ARCnet devices +# +# CONFIG_ARCNET is not set + +# +# PHY device support +# +# CONFIG_PHYLIB is not set + +# +# Ethernet (10 or 100Mbit) +# +CONFIG_NET_ETHERNET=y +CONFIG_MII=y +# CONFIG_HAPPYMEAL is not set +# CONFIG_SUNGEM is not set +# CONFIG_CASSINI is not set +# CONFIG_NET_VENDOR_3COM is not set +# CONFIG_LANCE is not set +# CONFIG_NET_VENDOR_SMC is not set +# CONFIG_NET_VENDOR_RACAL is not set + +# +# Tulip family network device support +# +CONFIG_NET_TULIP=y +# CONFIG_DE2104X is not set +# CONFIG_TULIP is not set +CONFIG_DE4X5=y +# CONFIG_WINBOND_840 is not set +# CONFIG_DM9102 is not set +# CONFIG_ULI526X is not set +# CONFIG_AT1700 is not set +# CONFIG_DEPCA is not set +# CONFIG_HP100 is not set +# CONFIG_NET_ISA is not set +CONFIG_NET_PCI=y +CONFIG_PCNET32=y +# CONFIG_AMD8111_ETH is not set +# CONFIG_ADAPTEC_STARFIRE is not set +# CONFIG_AC3200 is not set +# CONFIG_APRICOT is not set +# CONFIG_B44 is not set +# CONFIG_FORCEDETH is not set +# CONFIG_CS89x0 is not set +# CONFIG_DGRS is not set +# CONFIG_EEPRO100 is not set +# CONFIG_E100 is not set +# CONFIG_FEALNX is not set +# CONFIG_NATSEMI is not set +# CONFIG_NE2K_PCI is not set +CONFIG_8139CP=y +CONFIG_8139TOO=y +# CONFIG_8139TOO_PIO is not set +# CONFIG_8139TOO_TUNE_TWISTER is not set +# CONFIG_8139TOO_8129 is not set +# CONFIG_8139_OLD_RX_RESET is not set +# CONFIG_SIS900 is not set +# CONFIG_EPIC100 is not set +# CONFIG_SUNDANCE is not set +# CONFIG_TLAN is not set +CONFIG_VIA_RHINE=y +# CONFIG_VIA_RHINE_MMIO is not set + +# +# Ethernet (1000 Mbit) +# +# CONFIG_ACENIC is not set +# CONFIG_DL2K is not set +# CONFIG_E1000 is not set +# CONFIG_NS83820 is not set +# CONFIG_HAMACHI is not set +# CONFIG_YELLOWFIN is not set +# CONFIG_R8169 is not set +# CONFIG_SIS190 is not set +# CONFIG_SKGE is not set +# CONFIG_SKY2 is not set +# CONFIG_SK98LIN is not set +# CONFIG_VIA_VELOCITY is not set +# CONFIG_TIGON3 is not set +# CONFIG_BNX2 is not set +CONFIG_MV643XX_ETH=y +# CONFIG_MV643XX_ETH_0 is not set +# CONFIG_MV643XX_ETH_1 is not set +# CONFIG_MV643XX_ETH_2 is not set + +# +# Ethernet (10000 Mbit) +# +# CONFIG_CHELSIO_T1 is not set +# CONFIG_IXGB is not set +# CONFIG_S2IO is not set +# CONFIG_MYRI10GE is not set + +# +# Token Ring devices +# +# CONFIG_TR is not set + +# +# Wireless LAN (non-hamradio) +# +# CONFIG_NET_RADIO is not set + +# +# Wan interfaces +# +# CONFIG_WAN is not set +# CONFIG_FDDI is not set +# CONFIG_HIPPI is not set +CONFIG_PPP=m +CONFIG_PPP_MULTILINK=y +CONFIG_PPP_FILTER=y +CONFIG_PPP_ASYNC=m +CONFIG_PPP_SYNC_TTY=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_MPPE=m +CONFIG_PPPOE=m +# CONFIG_SLIP is not set +# CONFIG_NET_FC is not set +# CONFIG_SHAPER is not set +# CONFIG_NETCONSOLE is not set +# CONFIG_NETPOLL is not set +# CONFIG_NET_POLL_CONTROLLER is not set + +# +# ISDN subsystem +# +# CONFIG_ISDN is not set + +# +# Telephony Support +# +# CONFIG_PHONE is not set + +# +# Input device support +# +CONFIG_INPUT=y + +# +# Userland interfaces +# +CONFIG_INPUT_MOUSEDEV=y +CONFIG_INPUT_MOUSEDEV_PSAUX=y +CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 +# CONFIG_INPUT_JOYDEV is not set +# CONFIG_INPUT_TSDEV is not set +CONFIG_INPUT_EVDEV=y +# CONFIG_INPUT_EVBUG is not set + +# +# Input Device Drivers +# +CONFIG_INPUT_KEYBOARD=y +CONFIG_KEYBOARD_ATKBD=y +# CONFIG_KEYBOARD_SUNKBD is not set +# CONFIG_KEYBOARD_LKKBD is not set +# CONFIG_KEYBOARD_XTKBD is not set +# CONFIG_KEYBOARD_NEWTON is not set +CONFIG_INPUT_MOUSE=y +CONFIG_MOUSE_PS2=y +# CONFIG_MOUSE_SERIAL is not set +# CONFIG_MOUSE_INPORT is not set +# CONFIG_MOUSE_LOGIBM is not set +# CONFIG_MOUSE_PC110PAD is not set +# CONFIG_MOUSE_VSXXXAA is not set +# CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_TOUCHSCREEN is not set +CONFIG_INPUT_MISC=y +# CONFIG_INPUT_PCSPKR is not set +CONFIG_INPUT_UINPUT=y + +# +# Hardware I/O ports +# +CONFIG_SERIO=y +CONFIG_SERIO_I8042=y +CONFIG_SERIO_SERPORT=y +# CONFIG_SERIO_PCIPS2 is not set +CONFIG_SERIO_LIBPS2=y +# CONFIG_SERIO_RAW is not set +# CONFIG_GAMEPORT is not set + +# +# Character devices +# +CONFIG_VT=y +CONFIG_VT_CONSOLE=y +CONFIG_HW_CONSOLE=y +# CONFIG_SERIAL_NONSTANDARD is not set + +# +# Serial drivers +# +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_PCI=y +CONFIG_SERIAL_8250_NR_UARTS=4 +CONFIG_SERIAL_8250_RUNTIME_UARTS=4 +# CONFIG_SERIAL_8250_EXTENDED is not set + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_JSM is not set +CONFIG_UNIX98_PTYS=y +CONFIG_LEGACY_PTYS=y +CONFIG_LEGACY_PTY_COUNT=256 +# CONFIG_HVC_RTAS is not set + +# +# IPMI +# +# CONFIG_IPMI_HANDLER is not set + +# +# Watchdog Cards +# +# CONFIG_WATCHDOG is not set +CONFIG_NVRAM=y +CONFIG_GEN_RTC=y +# CONFIG_GEN_RTC_X is not set +# CONFIG_DTLK is not set +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set + +# +# Ftape, the floppy tape device driver +# +# CONFIG_AGP is not set +# CONFIG_DRM is not set +# CONFIG_RAW_DRIVER is not set + +# +# TPM devices +# +# CONFIG_TCG_TPM is not set +# CONFIG_TELCLOCK is not set + +# +# I2C support +# +CONFIG_I2C=y +# CONFIG_I2C_CHARDEV is not set + +# +# I2C Algorithms +# +CONFIG_I2C_ALGOBIT=y +# CONFIG_I2C_ALGOPCF is not set +# CONFIG_I2C_ALGOPCA is not set + +# +# I2C Hardware Bus support +# +# CONFIG_I2C_ALI1535 is not set +# CONFIG_I2C_ALI1563 is not set +# CONFIG_I2C_ALI15X3 is not set +# CONFIG_I2C_AMD756 is not set +# CONFIG_I2C_AMD8111 is not set +# CONFIG_I2C_HYDRA is not set +# CONFIG_I2C_I801 is not set +# CONFIG_I2C_I810 is not set +# CONFIG_I2C_PIIX4 is not set +# CONFIG_I2C_MPC is not set +# CONFIG_I2C_NFORCE2 is not set +# CONFIG_I2C_OCORES is not set +# CONFIG_I2C_PARPORT_LIGHT is not set +# CONFIG_I2C_PROSAVAGE is not set +# CONFIG_I2C_SAVAGE4 is not set +# CONFIG_I2C_SIS5595 is not set +# CONFIG_I2C_SIS630 is not set +# CONFIG_I2C_SIS96X is not set +# CONFIG_I2C_STUB is not set +# CONFIG_I2C_VIA is not set +# CONFIG_I2C_VIAPRO is not set +# CONFIG_I2C_VOODOO3 is not set +# CONFIG_I2C_PCA_ISA is not set + +# +# Miscellaneous I2C Chip support +# +# CONFIG_SENSORS_DS1337 is not set +# CONFIG_SENSORS_DS1374 is not set +# CONFIG_SENSORS_EEPROM is not set +# CONFIG_SENSORS_PCF8574 is not set +# CONFIG_SENSORS_PCA9539 is not set +# CONFIG_SENSORS_PCF8591 is not set +# CONFIG_SENSORS_M41T00 is not set +# CONFIG_SENSORS_MAX6875 is not set +# CONFIG_I2C_DEBUG_CORE is not set +# CONFIG_I2C_DEBUG_ALGO is not set +# CONFIG_I2C_DEBUG_BUS is not set +# CONFIG_I2C_DEBUG_CHIP is not set + +# +# SPI support +# +# CONFIG_SPI is not set +# CONFIG_SPI_MASTER is not set + +# +# Dallas's 1-wire bus +# + +# +# Hardware Monitoring support +# +# CONFIG_HWMON is not set +# CONFIG_HWMON_VID is not set + +# +# Misc devices +# + +# +# Multimedia devices +# +# CONFIG_VIDEO_DEV is not set +CONFIG_VIDEO_V4L2=y + +# +# Digital Video Broadcasting Devices +# +# CONFIG_DVB is not set +# CONFIG_USB_DABUSB is not set + +# +# Graphics support +# +CONFIG_FB=y +CONFIG_FB_CFB_FILLRECT=y +CONFIG_FB_CFB_COPYAREA=y +CONFIG_FB_CFB_IMAGEBLIT=y +CONFIG_FB_MACMODES=y +CONFIG_FB_FIRMWARE_EDID=y +# CONFIG_FB_BACKLIGHT is not set +CONFIG_FB_MODE_HELPERS=y +CONFIG_FB_TILEBLITTING=y +# CONFIG_FB_CIRRUS is not set +# CONFIG_FB_PM2 is not set +# CONFIG_FB_CYBER2000 is not set +CONFIG_FB_OF=y +# CONFIG_FB_CT65550 is not set +# CONFIG_FB_ASILIANT is not set +# CONFIG_FB_IMSTT is not set +# CONFIG_FB_VGA16 is not set +# CONFIG_FB_S1D13XXX is not set +# CONFIG_FB_NVIDIA is not set +# CONFIG_FB_RIVA is not set +CONFIG_FB_MATROX=y +CONFIG_FB_MATROX_MILLENIUM=y +CONFIG_FB_MATROX_MYSTIQUE=y +CONFIG_FB_MATROX_G=y +# CONFIG_FB_MATROX_I2C is not set +# CONFIG_FB_MATROX_MULTIHEAD is not set +CONFIG_FB_RADEON=y +CONFIG_FB_RADEON_I2C=y +# CONFIG_FB_RADEON_DEBUG is not set +# CONFIG_FB_ATY128 is not set +CONFIG_FB_ATY=y +CONFIG_FB_ATY_CT=y +# CONFIG_FB_ATY_GENERIC_LCD is not set +CONFIG_FB_ATY_GX=y +# CONFIG_FB_SAVAGE is not set +# CONFIG_FB_SIS is not set +# CONFIG_FB_NEOMAGIC is not set +# CONFIG_FB_KYRO is not set +CONFIG_FB_3DFX=y +# CONFIG_FB_3DFX_ACCEL is not set +# CONFIG_FB_VOODOO1 is not set +# CONFIG_FB_TRIDENT is not set +# CONFIG_FB_VIRTUAL is not set + +# +# Console display driver support +# +CONFIG_VGA_CONSOLE=y +# CONFIG_VGACON_SOFT_SCROLLBACK is not set +# CONFIG_MDA_CONSOLE is not set +CONFIG_DUMMY_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE=y +# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set +# CONFIG_FONTS is not set +CONFIG_FONT_8x8=y +CONFIG_FONT_8x16=y + +# +# Logo configuration +# +CONFIG_LOGO=y +CONFIG_LOGO_LINUX_MONO=y +CONFIG_LOGO_LINUX_VGA16=y +CONFIG_LOGO_LINUX_CLUT224=y +# CONFIG_BACKLIGHT_LCD_SUPPORT is not set + +# +# Sound +# +# CONFIG_SOUND is not set + +# +# USB support +# +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y +CONFIG_USB_ARCH_HAS_EHCI=y +CONFIG_USB=y +# CONFIG_USB_DEBUG is not set + +# +# Miscellaneous USB options +# +CONFIG_USB_DEVICEFS=y +# CONFIG_USB_BANDWIDTH is not set +# CONFIG_USB_DYNAMIC_MINORS is not set +# CONFIG_USB_OTG is not set + +# +# USB Host Controller Drivers +# +CONFIG_USB_EHCI_HCD=m +# CONFIG_USB_EHCI_SPLIT_ISO is not set +# CONFIG_USB_EHCI_ROOT_HUB_TT is not set +# CONFIG_USB_EHCI_TT_NEWSCHED is not set +# CONFIG_USB_ISP116X_HCD is not set +CONFIG_USB_OHCI_HCD=y +# CONFIG_USB_OHCI_BIG_ENDIAN is not set +CONFIG_USB_OHCI_LITTLE_ENDIAN=y +CONFIG_USB_UHCI_HCD=y +# CONFIG_USB_SL811_HCD is not set + +# +# USB Device Class drivers +# +# CONFIG_USB_ACM is not set +# CONFIG_USB_PRINTER is not set + +# +# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' +# + +# +# may also be needed; see USB_STORAGE Help for more information +# +CONFIG_USB_STORAGE=m +# CONFIG_USB_STORAGE_DEBUG is not set +# CONFIG_USB_STORAGE_DATAFAB is not set +# CONFIG_USB_STORAGE_FREECOM is not set +# CONFIG_USB_STORAGE_ISD200 is not set +# CONFIG_USB_STORAGE_DPCM is not set +# CONFIG_USB_STORAGE_USBAT is not set +# CONFIG_USB_STORAGE_SDDR09 is not set +# CONFIG_USB_STORAGE_SDDR55 is not set +# CONFIG_USB_STORAGE_JUMPSHOT is not set +# CONFIG_USB_STORAGE_ALAUDA is not set +# CONFIG_USB_STORAGE_ONETOUCH is not set +# CONFIG_USB_LIBUSUAL is not set + +# +# USB Input Devices +# +CONFIG_USB_HID=y +CONFIG_USB_HIDINPUT=y +# CONFIG_USB_HIDINPUT_POWERBOOK is not set +# CONFIG_HID_FF is not set +# CONFIG_USB_HIDDEV is not set +# CONFIG_USB_AIPTEK is not set +# CONFIG_USB_WACOM is not set +# CONFIG_USB_ACECAD is not set +# CONFIG_USB_KBTAB is not set +# CONFIG_USB_POWERMATE is not set +# CONFIG_USB_TOUCHSCREEN is not set +# CONFIG_USB_YEALINK is not set +# CONFIG_USB_XPAD is not set +# CONFIG_USB_ATI_REMOTE is not set +# CONFIG_USB_ATI_REMOTE2 is not set +# CONFIG_USB_KEYSPAN_REMOTE is not set +# CONFIG_USB_APPLETOUCH is not set + +# +# USB Imaging devices +# +# CONFIG_USB_MDC800 is not set +# CONFIG_USB_MICROTEK is not set + +# +# USB Network Adapters +# +# CONFIG_USB_CATC is not set +# CONFIG_USB_KAWETH is not set +# CONFIG_USB_PEGASUS is not set +# CONFIG_USB_RTL8150 is not set +# CONFIG_USB_USBNET is not set +CONFIG_USB_MON=y + +# +# USB port drivers +# + +# +# USB Serial Converter support +# +# CONFIG_USB_SERIAL is not set + +# +# USB Miscellaneous drivers +# +# CONFIG_USB_EMI62 is not set +# CONFIG_USB_EMI26 is not set +# CONFIG_USB_AUERSWALD is not set +# CONFIG_USB_RIO500 is not set +# CONFIG_USB_LEGOTOWER is not set +# CONFIG_USB_LCD is not set +# CONFIG_USB_LED is not set +# CONFIG_USB_CY7C63 is not set +# CONFIG_USB_CYTHERM is not set +# CONFIG_USB_PHIDGETKIT is not set +# CONFIG_USB_PHIDGETSERVO is not set +# CONFIG_USB_IDMOUSE is not set +# CONFIG_USB_APPLEDISPLAY is not set +# CONFIG_USB_SISUSBVGA is not set +# CONFIG_USB_LD is not set +# CONFIG_USB_TEST is not set + +# +# USB DSL modem support +# + +# +# USB Gadget Support +# +# CONFIG_USB_GADGET is not set + +# +# MMC/SD Card support +# +# CONFIG_MMC is not set + +# +# LED devices +# +# CONFIG_NEW_LEDS is not set + +# +# LED drivers +# + +# +# LED Triggers +# + +# +# InfiniBand support +# +# CONFIG_INFINIBAND is not set + +# +# EDAC - error detection and reporting (RAS) (EXPERIMENTAL) +# + +# +# Real Time Clock +# +# CONFIG_RTC_CLASS is not set + +# +# DMA Engine support +# +# CONFIG_DMA_ENGINE is not set + +# +# DMA Clients +# + +# +# DMA Devices +# + +# +# File systems +# +CONFIG_EXT2_FS=y +# CONFIG_EXT2_FS_XATTR is not set +# CONFIG_EXT2_FS_XIP is not set +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_XATTR=y +# CONFIG_EXT3_FS_POSIX_ACL is not set +# CONFIG_EXT3_FS_SECURITY is not set +CONFIG_JBD=y +# CONFIG_JBD_DEBUG is not set +CONFIG_FS_MBCACHE=y +# CONFIG_REISERFS_FS is not set +# CONFIG_JFS_FS is not set +# CONFIG_FS_POSIX_ACL is not set +# CONFIG_XFS_FS is not set +# CONFIG_OCFS2_FS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_ROMFS_FS is not set +CONFIG_INOTIFY=y +CONFIG_INOTIFY_USER=y +# CONFIG_QUOTA is not set +CONFIG_DNOTIFY=y +# CONFIG_AUTOFS_FS is not set +# CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set + +# +# CD-ROM/DVD Filesystems +# +CONFIG_ISO9660_FS=y +# CONFIG_JOLIET is not set +# CONFIG_ZISOFS is not set +# CONFIG_UDF_FS is not set + +# +# DOS/FAT/NT Filesystems +# +CONFIG_FAT_FS=m +CONFIG_MSDOS_FS=m +CONFIG_VFAT_FS=m +CONFIG_FAT_DEFAULT_CODEPAGE=437 +CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +# CONFIG_HUGETLB_PAGE is not set +CONFIG_RAMFS=y +# CONFIG_CONFIGFS_FS is not set + +# +# Miscellaneous filesystems +# +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +# CONFIG_CRAMFS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set + +# +# Network File Systems +# +# CONFIG_NFS_FS is not set +# CONFIG_NFSD is not set +# CONFIG_SMB_FS is not set +# CONFIG_CIFS is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set +# CONFIG_9P_FS is not set + +# +# Partition Types +# +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +# CONFIG_OSF_PARTITION is not set +# CONFIG_AMIGA_PARTITION is not set +# CONFIG_ATARI_PARTITION is not set +CONFIG_MAC_PARTITION=y +CONFIG_MSDOS_PARTITION=y +# CONFIG_BSD_DISKLABEL is not set +# CONFIG_MINIX_SUBPARTITION is not set +# CONFIG_SOLARIS_X86_PARTITION is not set +# CONFIG_UNIXWARE_DISKLABEL is not set +# CONFIG_LDM_PARTITION is not set +# CONFIG_SGI_PARTITION is not set +# CONFIG_ULTRIX_PARTITION is not set +# CONFIG_SUN_PARTITION is not set +# CONFIG_KARMA_PARTITION is not set +# CONFIG_EFI_PARTITION is not set + +# +# Native Language Support +# +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="iso8859-1" +# CONFIG_NLS_CODEPAGE_437 is not set +# CONFIG_NLS_CODEPAGE_737 is not set +# CONFIG_NLS_CODEPAGE_775 is not set +# CONFIG_NLS_CODEPAGE_850 is not set +# CONFIG_NLS_CODEPAGE_852 is not set +# CONFIG_NLS_CODEPAGE_855 is not set +# CONFIG_NLS_CODEPAGE_857 is not set +# CONFIG_NLS_CODEPAGE_860 is not set +# CONFIG_NLS_CODEPAGE_861 is not set +# CONFIG_NLS_CODEPAGE_862 is not set +# CONFIG_NLS_CODEPAGE_863 is not set +# CONFIG_NLS_CODEPAGE_864 is not set +# CONFIG_NLS_CODEPAGE_865 is not set +# CONFIG_NLS_CODEPAGE_866 is not set +# CONFIG_NLS_CODEPAGE_869 is not set +# CONFIG_NLS_CODEPAGE_936 is not set +# CONFIG_NLS_CODEPAGE_950 is not set +# CONFIG_NLS_CODEPAGE_932 is not set +# CONFIG_NLS_CODEPAGE_949 is not set +# CONFIG_NLS_CODEPAGE_874 is not set +# CONFIG_NLS_ISO8859_8 is not set +# CONFIG_NLS_CODEPAGE_1250 is not set +# CONFIG_NLS_CODEPAGE_1251 is not set +CONFIG_NLS_ASCII=y +CONFIG_NLS_ISO8859_1=m +# CONFIG_NLS_ISO8859_2 is not set +# CONFIG_NLS_ISO8859_3 is not set +# CONFIG_NLS_ISO8859_4 is not set +# CONFIG_NLS_ISO8859_5 is not set +# CONFIG_NLS_ISO8859_6 is not set +# CONFIG_NLS_ISO8859_7 is not set +# CONFIG_NLS_ISO8859_9 is not set +# CONFIG_NLS_ISO8859_13 is not set +# CONFIG_NLS_ISO8859_14 is not set +# CONFIG_NLS_ISO8859_15 is not set +# CONFIG_NLS_KOI8_R is not set +# CONFIG_NLS_KOI8_U is not set +# CONFIG_NLS_UTF8 is not set + +# +# Library routines +# +CONFIG_CRC_CCITT=m +# CONFIG_CRC16 is not set +CONFIG_CRC32=y +# CONFIG_LIBCRC32C is not set +CONFIG_ZLIB_INFLATE=m +CONFIG_ZLIB_DEFLATE=m +CONFIG_TEXTSEARCH=y +CONFIG_TEXTSEARCH_KMP=m + +# +# Instrumentation Support +# +# CONFIG_PROFILING is not set + +# +# Kernel hacking +# +# CONFIG_PRINTK_TIME is not set +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_KERNEL=y +CONFIG_LOG_BUF_SHIFT=15 +CONFIG_DETECT_SOFTLOCKUP=y +# CONFIG_SCHEDSTATS is not set +# CONFIG_DEBUG_SLAB is not set +CONFIG_DEBUG_MUTEXES=y +# CONFIG_DEBUG_SPINLOCK is not set +CONFIG_DEBUG_SPINLOCK_SLEEP=y +# CONFIG_DEBUG_KOBJECT is not set +# CONFIG_DEBUG_HIGHMEM is not set +# CONFIG_DEBUG_INFO is not set +# CONFIG_DEBUG_FS is not set +# CONFIG_DEBUG_VM is not set +CONFIG_FORCED_INLINING=y +# CONFIG_RCU_TORTURE_TEST is not set +CONFIG_DEBUGGER=y +CONFIG_XMON=y +CONFIG_XMON_DEFAULT=y +# CONFIG_BDI_SWITCH is not set +# CONFIG_BOOTX_TEXT is not set +# CONFIG_PPC_EARLY_DEBUG is not set + +# +# Security options +# +# CONFIG_KEYS is not set +# CONFIG_SECURITY is not set + +# +# Cryptographic options +# +CONFIG_CRYPTO=y +# CONFIG_CRYPTO_HMAC is not set +# CONFIG_CRYPTO_NULL is not set +# CONFIG_CRYPTO_MD4 is not set +# CONFIG_CRYPTO_MD5 is not set +CONFIG_CRYPTO_SHA1=m +# CONFIG_CRYPTO_SHA256 is not set +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_WP512 is not set +# CONFIG_CRYPTO_TGR192 is not set +# CONFIG_CRYPTO_DES is not set +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_TWOFISH is not set +# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_AES is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +# CONFIG_CRYPTO_TEA is not set +CONFIG_CRYPTO_ARC4=m +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_ANUBIS is not set +# CONFIG_CRYPTO_DEFLATE is not set +# CONFIG_CRYPTO_MICHAEL_MIC is not set +# CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_TEST is not set + +# +# Hardware crypto devices +# diff --git a/arch/powerpc/configs/mpc834x_itx_defconfig b/arch/powerpc/configs/mpc834x_itx_defconfig new file mode 100644 index 000000000000..fc2d9789adc8 --- /dev/null +++ b/arch/powerpc/configs/mpc834x_itx_defconfig @@ -0,0 +1,1336 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.17 +# Fri Jun 30 17:53:25 2006 +# +# CONFIG_PPC64 is not set +CONFIG_PPC32=y +CONFIG_PPC_MERGE=y +CONFIG_MMU=y +CONFIG_GENERIC_HARDIRQS=y +CONFIG_IRQ_PER_CPU=y +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +CONFIG_GENERIC_HWEIGHT=y +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_GENERIC_FIND_NEXT_BIT=y +CONFIG_PPC=y +CONFIG_EARLY_PRINTK=y +CONFIG_GENERIC_NVRAM=y +CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y +CONFIG_ARCH_MAY_HAVE_PC_FDC=y +CONFIG_PPC_OF=y +CONFIG_PPC_UDBG_16550=y +# CONFIG_GENERIC_TBSYNC is not set +CONFIG_DEFAULT_UIMAGE=y + +# +# Processor support +# +# CONFIG_CLASSIC32 is not set +# CONFIG_PPC_52xx is not set +# CONFIG_PPC_82xx is not set +CONFIG_PPC_83xx=y +# CONFIG_PPC_85xx is not set +# CONFIG_PPC_86xx is not set +# CONFIG_40x is not set +# CONFIG_44x is not set +# CONFIG_8xx is not set +# CONFIG_E200 is not set +CONFIG_6xx=y +CONFIG_83xx=y +CONFIG_PPC_FPU=y +CONFIG_PPC_STD_MMU=y +CONFIG_PPC_STD_MMU_32=y +# CONFIG_SMP is not set +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" + +# +# Code maturity level options +# +CONFIG_EXPERIMENTAL=y +CONFIG_BROKEN_ON_SMP=y +CONFIG_INIT_ENV_ARG_LIMIT=32 + +# +# General setup +# +CONFIG_LOCALVERSION="" +CONFIG_LOCALVERSION_AUTO=y +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +# CONFIG_POSIX_MQUEUE is not set +# CONFIG_BSD_PROCESS_ACCT is not set +CONFIG_SYSCTL=y +# CONFIG_AUDIT is not set +# CONFIG_IKCONFIG is not set +# CONFIG_RELAY is not set +CONFIG_INITRAMFS_SOURCE="" +# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set +CONFIG_EMBEDDED=y +# CONFIG_KALLSYMS is not set +CONFIG_HOTPLUG=y +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_ELF_CORE=y +CONFIG_BASE_FULL=y +CONFIG_RT_MUTEXES=y +CONFIG_FUTEX=y +# CONFIG_EPOLL is not set +CONFIG_SHMEM=y +CONFIG_SLAB=y +# CONFIG_TINY_SHMEM is not set +CONFIG_BASE_SMALL=0 +# CONFIG_SLOB is not set + +# +# Loadable module support +# +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +# CONFIG_MODULE_FORCE_UNLOAD is not set +# CONFIG_MODVERSIONS is not set +# CONFIG_MODULE_SRCVERSION_ALL is not set +# CONFIG_KMOD is not set + +# +# Block layer +# +# CONFIG_LBD is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_LSF is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +CONFIG_DEFAULT_AS=y +# CONFIG_DEFAULT_DEADLINE is not set +# CONFIG_DEFAULT_CFQ is not set +# CONFIG_DEFAULT_NOOP is not set +CONFIG_DEFAULT_IOSCHED="anticipatory" +CONFIG_PPC_GEN550=y +# CONFIG_WANT_EARLY_SERIAL is not set + +# +# Platform support +# +# CONFIG_MPC834x_SYS is not set +CONFIG_MPC834x_ITX=y +CONFIG_MPC834x=y + +# +# Kernel options +# +# CONFIG_HIGHMEM is not set +# CONFIG_HZ_100 is not set +CONFIG_HZ_250=y +# CONFIG_HZ_1000 is not set +CONFIG_HZ=250 +CONFIG_PREEMPT_NONE=y +# CONFIG_PREEMPT_VOLUNTARY is not set +# CONFIG_PREEMPT is not set +CONFIG_BINFMT_ELF=y +# CONFIG_BINFMT_MISC is not set +CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y +CONFIG_ARCH_FLATMEM_ENABLE=y +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y +# CONFIG_SPARSEMEM_STATIC is not set +CONFIG_SPLIT_PTLOCK_CPUS=4 +# CONFIG_RESOURCES_64BIT is not set +CONFIG_PROC_DEVICETREE=y +# CONFIG_CMDLINE_BOOL is not set +# CONFIG_PM is not set +# CONFIG_SOFTWARE_SUSPEND is not set +CONFIG_SECCOMP=y +CONFIG_ISA_DMA_API=y + +# +# Bus options +# +CONFIG_GENERIC_ISA_DMA=y +# CONFIG_PPC_I8259 is not set +CONFIG_PPC_INDIRECT_PCI=y +CONFIG_FSL_SOC=y +CONFIG_PCI=y +CONFIG_PCI_DOMAINS=y +# CONFIG_PCIEPORTBUS is not set +# CONFIG_PCI_DEBUG is not set + +# +# PCCARD (PCMCIA/CardBus) support +# +# CONFIG_PCCARD is not set + +# +# PCI Hotplug Support +# +# CONFIG_HOTPLUG_PCI is not set + +# +# Advanced setup +# +# CONFIG_ADVANCED_OPTIONS is not set + +# +# Default settings for advanced configuration options are used +# +CONFIG_HIGHMEM_START=0xfe000000 +CONFIG_LOWMEM_SIZE=0x30000000 +CONFIG_KERNEL_START=0xc0000000 +CONFIG_TASK_SIZE=0x80000000 +CONFIG_BOOT_LOAD=0x00800000 + +# +# Networking +# +CONFIG_NET=y + +# +# Networking options +# +# CONFIG_NETDEBUG is not set +CONFIG_PACKET=y +# CONFIG_PACKET_MMAP is not set +CONFIG_UNIX=y +CONFIG_XFRM=y +# CONFIG_XFRM_USER is not set +# CONFIG_NET_KEY is not set +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +# CONFIG_IP_ADVANCED_ROUTER is not set +CONFIG_IP_FIB_HASH=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +# CONFIG_IP_PNP_RARP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_IP_MROUTE is not set +# CONFIG_ARPD is not set +CONFIG_SYN_COOKIES=y +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_XFRM_TUNNEL is not set +# CONFIG_INET_TUNNEL is not set +CONFIG_INET_XFRM_MODE_TRANSPORT=y +CONFIG_INET_XFRM_MODE_TUNNEL=y +CONFIG_INET_DIAG=y +CONFIG_INET_TCP_DIAG=y +# CONFIG_TCP_CONG_ADVANCED is not set +CONFIG_TCP_CONG_BIC=y +# CONFIG_IPV6 is not set +# CONFIG_INET6_XFRM_TUNNEL is not set +# CONFIG_INET6_TUNNEL is not set +# CONFIG_NETWORK_SECMARK is not set +# CONFIG_NETFILTER is not set + +# +# DCCP Configuration (EXPERIMENTAL) +# +# CONFIG_IP_DCCP is not set + +# +# SCTP Configuration (EXPERIMENTAL) +# +# CONFIG_IP_SCTP is not set + +# +# TIPC Configuration (EXPERIMENTAL) +# +# CONFIG_TIPC is not set +# CONFIG_ATM is not set +# CONFIG_BRIDGE is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_NET_DIVERT is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set + +# +# QoS and/or fair queueing +# +# CONFIG_NET_SCHED is not set + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_HAMRADIO is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set +# CONFIG_IEEE80211 is not set + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_STANDALONE=y +CONFIG_PREVENT_FIRMWARE_BUILD=y +# CONFIG_FW_LOADER is not set +# CONFIG_DEBUG_DRIVER is not set +# CONFIG_SYS_HYPERVISOR is not set + +# +# Connector - unified userspace <-> kernelspace linker +# +# CONFIG_CONNECTOR is not set + +# +# Memory Technology Devices (MTD) +# +CONFIG_MTD=y +# CONFIG_MTD_DEBUG is not set +# CONFIG_MTD_CONCAT is not set +# CONFIG_MTD_PARTITIONS is not set + +# +# User Modules And Translation Layers +# +CONFIG_MTD_CHAR=y +# CONFIG_MTD_BLOCK is not set +# CONFIG_MTD_BLOCK_RO is not set +# CONFIG_FTL is not set +# CONFIG_NFTL is not set +# CONFIG_INFTL is not set +# CONFIG_RFD_FTL is not set + +# +# RAM/ROM/Flash chip drivers +# +CONFIG_MTD_CFI=y +# CONFIG_MTD_JEDECPROBE is not set +CONFIG_MTD_GEN_PROBE=y +# CONFIG_MTD_CFI_ADV_OPTIONS is not set +CONFIG_MTD_MAP_BANK_WIDTH_1=y +CONFIG_MTD_MAP_BANK_WIDTH_2=y +CONFIG_MTD_MAP_BANK_WIDTH_4=y +# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set +# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set +# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set +CONFIG_MTD_CFI_I1=y +CONFIG_MTD_CFI_I2=y +# CONFIG_MTD_CFI_I4 is not set +# CONFIG_MTD_CFI_I8 is not set +# CONFIG_MTD_CFI_INTELEXT is not set +CONFIG_MTD_CFI_AMDSTD=y +# CONFIG_MTD_CFI_STAA is not set +CONFIG_MTD_CFI_UTIL=y +# CONFIG_MTD_RAM is not set +# CONFIG_MTD_ROM is not set +# CONFIG_MTD_ABSENT is not set +# CONFIG_MTD_OBSOLETE_CHIPS is not set + +# +# Mapping drivers for chip access +# +# CONFIG_MTD_COMPLEX_MAPPINGS is not set +CONFIG_MTD_PHYSMAP=y +CONFIG_MTD_PHYSMAP_START=0xfe000000 +CONFIG_MTD_PHYSMAP_LEN=0x1000000 +CONFIG_MTD_PHYSMAP_BANKWIDTH=2 +# CONFIG_MTD_PLATRAM is not set + +# +# Self-contained MTD device drivers +# +# CONFIG_MTD_PMC551 is not set +# CONFIG_MTD_DATAFLASH is not set +# CONFIG_MTD_M25P80 is not set +# CONFIG_MTD_SLRAM is not set +# CONFIG_MTD_PHRAM is not set +# CONFIG_MTD_MTDRAM is not set +# CONFIG_MTD_BLOCK2MTD is not set + +# +# Disk-On-Chip Device Drivers +# +# CONFIG_MTD_DOC2000 is not set +# CONFIG_MTD_DOC2001 is not set +# CONFIG_MTD_DOC2001PLUS is not set + +# +# NAND Flash Device Drivers +# +# CONFIG_MTD_NAND is not set + +# +# OneNAND Flash Device Drivers +# +# CONFIG_MTD_ONENAND is not set + +# +# Parallel port support +# +# CONFIG_PARPORT is not set + +# +# Plug and Play support +# + +# +# Block devices +# +# CONFIG_BLK_DEV_FD is not set +# CONFIG_BLK_CPQ_DA is not set +# CONFIG_BLK_CPQ_CISS_DA is not set +# CONFIG_BLK_DEV_DAC960 is not set +# CONFIG_BLK_DEV_UMEM is not set +# CONFIG_BLK_DEV_COW_COMMON is not set +CONFIG_BLK_DEV_LOOP=y +# CONFIG_BLK_DEV_CRYPTOLOOP is not set +# CONFIG_BLK_DEV_NBD is not set +# CONFIG_BLK_DEV_SX8 is not set +# CONFIG_BLK_DEV_UB is not set +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_COUNT=16 +CONFIG_BLK_DEV_RAM_SIZE=32768 +CONFIG_BLK_DEV_INITRD=y +# CONFIG_CDROM_PKTCDVD is not set +# CONFIG_ATA_OVER_ETH is not set + +# +# ATA/ATAPI/MFM/RLL support +# +CONFIG_IDE=y +# CONFIG_BLK_DEV_IDE is not set +# CONFIG_BLK_DEV_HD_ONLY is not set +# CONFIG_BLK_DEV_HD is not set + +# +# SCSI device support +# +# CONFIG_RAID_ATTRS is not set +CONFIG_SCSI=y +CONFIG_SCSI_PROC_FS=y + +# +# SCSI support type (disk, tape, CD-ROM) +# +CONFIG_BLK_DEV_SD=y +# CONFIG_CHR_DEV_ST is not set +# CONFIG_CHR_DEV_OSST is not set +# CONFIG_BLK_DEV_SR is not set +CONFIG_CHR_DEV_SG=y +# CONFIG_CHR_DEV_SCH is not set + +# +# Some SCSI devices (e.g. CD jukebox) support multiple LUNs +# +# CONFIG_SCSI_MULTI_LUN is not set +# CONFIG_SCSI_CONSTANTS is not set +# CONFIG_SCSI_LOGGING is not set + +# +# SCSI Transport Attributes +# +CONFIG_SCSI_SPI_ATTRS=y +# CONFIG_SCSI_FC_ATTRS is not set +# CONFIG_SCSI_ISCSI_ATTRS is not set +# CONFIG_SCSI_SAS_ATTRS is not set + +# +# SCSI low-level drivers +# +# CONFIG_ISCSI_TCP is not set +# CONFIG_BLK_DEV_3W_XXXX_RAID is not set +# CONFIG_SCSI_3W_9XXX is not set +# CONFIG_SCSI_ACARD is not set +# CONFIG_SCSI_AACRAID is not set +# CONFIG_SCSI_AIC7XXX is not set +# CONFIG_SCSI_AIC7XXX_OLD is not set +# CONFIG_SCSI_AIC79XX is not set +# CONFIG_SCSI_DPT_I2O is not set +# CONFIG_MEGARAID_NEWGEN is not set +# CONFIG_MEGARAID_LEGACY is not set +# CONFIG_MEGARAID_SAS is not set +CONFIG_SCSI_SATA=y +# CONFIG_SCSI_SATA_AHCI is not set +# CONFIG_SCSI_SATA_SVW is not set +# CONFIG_SCSI_ATA_PIIX is not set +# CONFIG_SCSI_SATA_MV is not set +# CONFIG_SCSI_SATA_NV is not set +# CONFIG_SCSI_PDC_ADMA is not set +# CONFIG_SCSI_HPTIOP is not set +# CONFIG_SCSI_SATA_QSTOR is not set +# CONFIG_SCSI_SATA_PROMISE is not set +# CONFIG_SCSI_SATA_SX4 is not set +CONFIG_SCSI_SATA_SIL=y +# CONFIG_SCSI_SATA_SIL24 is not set +# CONFIG_SCSI_SATA_SIS is not set +# CONFIG_SCSI_SATA_ULI is not set +# CONFIG_SCSI_SATA_VIA is not set +# CONFIG_SCSI_SATA_VITESSE is not set +# CONFIG_SCSI_BUSLOGIC is not set +# CONFIG_SCSI_DMX3191D is not set +# CONFIG_SCSI_EATA is not set +# CONFIG_SCSI_FUTURE_DOMAIN is not set +# CONFIG_SCSI_GDTH is not set +# CONFIG_SCSI_IPS is not set +# CONFIG_SCSI_INITIO is not set +# CONFIG_SCSI_INIA100 is not set +# CONFIG_SCSI_SYM53C8XX_2 is not set +# CONFIG_SCSI_IPR is not set +# CONFIG_SCSI_QLOGIC_1280 is not set +# CONFIG_SCSI_QLA_FC is not set +# CONFIG_SCSI_LPFC is not set +# CONFIG_SCSI_DC395x is not set +# CONFIG_SCSI_DC390T is not set +# CONFIG_SCSI_NSP32 is not set +# CONFIG_SCSI_DEBUG is not set + +# +# Multi-device support (RAID and LVM) +# +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +CONFIG_MD_LINEAR=y +CONFIG_MD_RAID0=y +CONFIG_MD_RAID1=y +# CONFIG_MD_RAID10 is not set +# CONFIG_MD_RAID456 is not set +# CONFIG_MD_MULTIPATH is not set +# CONFIG_MD_FAULTY is not set +# CONFIG_BLK_DEV_DM is not set + +# +# Fusion MPT device support +# +# CONFIG_FUSION is not set +# CONFIG_FUSION_SPI is not set +# CONFIG_FUSION_FC is not set +# CONFIG_FUSION_SAS is not set + +# +# IEEE 1394 (FireWire) support +# +# CONFIG_IEEE1394 is not set + +# +# I2O device support +# +# CONFIG_I2O is not set + +# +# Macintosh device drivers +# +# CONFIG_WINDFARM is not set + +# +# Network device support +# +CONFIG_NETDEVICES=y +# CONFIG_DUMMY is not set +# CONFIG_BONDING is not set +# CONFIG_EQUALIZER is not set +# CONFIG_TUN is not set + +# +# ARCnet devices +# +# CONFIG_ARCNET is not set + +# +# PHY device support +# +CONFIG_PHYLIB=y + +# +# MII PHY device drivers +# +# CONFIG_MARVELL_PHY is not set +# CONFIG_DAVICOM_PHY is not set +# CONFIG_QSEMI_PHY is not set +# CONFIG_LXT_PHY is not set +CONFIG_CICADA_PHY=y +# CONFIG_VITESSE_PHY is not set +# CONFIG_SMSC_PHY is not set + +# +# Ethernet (10 or 100Mbit) +# +CONFIG_NET_ETHERNET=y +CONFIG_MII=y +# CONFIG_HAPPYMEAL is not set +# CONFIG_SUNGEM is not set +# CONFIG_CASSINI is not set +# CONFIG_NET_VENDOR_3COM is not set + +# +# Tulip family network device support +# +# CONFIG_NET_TULIP is not set +# CONFIG_HP100 is not set +CONFIG_NET_PCI=y +# CONFIG_PCNET32 is not set +# CONFIG_AMD8111_ETH is not set +# CONFIG_ADAPTEC_STARFIRE is not set +# CONFIG_B44 is not set +# CONFIG_FORCEDETH is not set +# CONFIG_DGRS is not set +# CONFIG_EEPRO100 is not set +CONFIG_E100=y +# CONFIG_FEALNX is not set +# CONFIG_NATSEMI is not set +# CONFIG_NE2K_PCI is not set +# CONFIG_8139CP is not set +# CONFIG_8139TOO is not set +# CONFIG_SIS900 is not set +# CONFIG_EPIC100 is not set +# CONFIG_SUNDANCE is not set +# CONFIG_TLAN is not set +# CONFIG_VIA_RHINE is not set + +# +# Ethernet (1000 Mbit) +# +# CONFIG_ACENIC is not set +# CONFIG_DL2K is not set +# CONFIG_E1000 is not set +# CONFIG_NS83820 is not set +# CONFIG_HAMACHI is not set +# CONFIG_YELLOWFIN is not set +# CONFIG_R8169 is not set +# CONFIG_SIS190 is not set +# CONFIG_SKGE is not set +# CONFIG_SKY2 is not set +# CONFIG_SK98LIN is not set +# CONFIG_VIA_VELOCITY is not set +# CONFIG_TIGON3 is not set +# CONFIG_BNX2 is not set +CONFIG_GIANFAR=y +CONFIG_GFAR_NAPI=y + +# +# Ethernet (10000 Mbit) +# +# CONFIG_CHELSIO_T1 is not set +# CONFIG_IXGB is not set +# CONFIG_S2IO is not set +# CONFIG_MYRI10GE is not set + +# +# Token Ring devices +# +# CONFIG_TR is not set + +# +# Wireless LAN (non-hamradio) +# +# CONFIG_NET_RADIO is not set + +# +# Wan interfaces +# +# CONFIG_WAN is not set +# CONFIG_FDDI is not set +# CONFIG_HIPPI is not set +# CONFIG_PPP is not set +# CONFIG_SLIP is not set +# CONFIG_NET_FC is not set +# CONFIG_SHAPER is not set +# CONFIG_NETCONSOLE is not set +# CONFIG_NETPOLL is not set +# CONFIG_NET_POLL_CONTROLLER is not set + +# +# ISDN subsystem +# +# CONFIG_ISDN is not set + +# +# Telephony Support +# +# CONFIG_PHONE is not set + +# +# Input device support +# +CONFIG_INPUT=y + +# +# Userland interfaces +# +# CONFIG_INPUT_MOUSEDEV is not set +# CONFIG_INPUT_JOYDEV is not set +# CONFIG_INPUT_TSDEV is not set +# CONFIG_INPUT_EVDEV is not set +# CONFIG_INPUT_EVBUG is not set + +# +# Input Device Drivers +# +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_TOUCHSCREEN is not set +# CONFIG_INPUT_MISC is not set + +# +# Hardware I/O ports +# +# CONFIG_SERIO is not set +# CONFIG_GAMEPORT is not set + +# +# Character devices +# +# CONFIG_VT is not set +# CONFIG_SERIAL_NONSTANDARD is not set + +# +# Serial drivers +# +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_PCI=y +CONFIG_SERIAL_8250_NR_UARTS=4 +CONFIG_SERIAL_8250_RUNTIME_UARTS=4 +# CONFIG_SERIAL_8250_EXTENDED is not set + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_JSM is not set +CONFIG_UNIX98_PTYS=y +CONFIG_LEGACY_PTYS=y +CONFIG_LEGACY_PTY_COUNT=256 + +# +# IPMI +# +# CONFIG_IPMI_HANDLER is not set + +# +# Watchdog Cards +# +CONFIG_WATCHDOG=y +# CONFIG_WATCHDOG_NOWAYOUT is not set + +# +# Watchdog Device Drivers +# +# CONFIG_SOFT_WATCHDOG is not set +CONFIG_83xx_WDT=y + +# +# PCI-based Watchdog Cards +# +# CONFIG_PCIPCWATCHDOG is not set +# CONFIG_WDTPCI is not set + +# +# USB-based Watchdog Cards +# +# CONFIG_USBPCWATCHDOG is not set +CONFIG_HW_RANDOM=y +# CONFIG_NVRAM is not set +# CONFIG_GEN_RTC is not set +# CONFIG_DTLK is not set +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set + +# +# Ftape, the floppy tape device driver +# +# CONFIG_AGP is not set +# CONFIG_DRM is not set +# CONFIG_RAW_DRIVER is not set + +# +# TPM devices +# +# CONFIG_TCG_TPM is not set +# CONFIG_TELCLOCK is not set + +# +# I2C support +# +CONFIG_I2C=y +CONFIG_I2C_CHARDEV=y + +# +# I2C Algorithms +# +# CONFIG_I2C_ALGOBIT is not set +# CONFIG_I2C_ALGOPCF is not set +# CONFIG_I2C_ALGOPCA is not set + +# +# I2C Hardware Bus support +# +# CONFIG_I2C_ALI1535 is not set +# CONFIG_I2C_ALI1563 is not set +# CONFIG_I2C_ALI15X3 is not set +# CONFIG_I2C_AMD756 is not set +# CONFIG_I2C_AMD8111 is not set +# CONFIG_I2C_I801 is not set +# CONFIG_I2C_I810 is not set +# CONFIG_I2C_PIIX4 is not set +CONFIG_I2C_MPC=y +# CONFIG_I2C_NFORCE2 is not set +# CONFIG_I2C_OCORES is not set +# CONFIG_I2C_PARPORT_LIGHT is not set +# CONFIG_I2C_PROSAVAGE is not set +# CONFIG_I2C_SAVAGE4 is not set +# CONFIG_I2C_SIS5595 is not set +# CONFIG_I2C_SIS630 is not set +# CONFIG_I2C_SIS96X is not set +# CONFIG_I2C_STUB is not set +# CONFIG_I2C_VIA is not set +# CONFIG_I2C_VIAPRO is not set +# CONFIG_I2C_VOODOO3 is not set +# CONFIG_I2C_PCA_ISA is not set + +# +# Miscellaneous I2C Chip support +# +# CONFIG_SENSORS_DS1337 is not set +# CONFIG_SENSORS_DS1374 is not set +# CONFIG_SENSORS_EEPROM is not set +# CONFIG_SENSORS_PCF8574 is not set +# CONFIG_SENSORS_PCA9539 is not set +# CONFIG_SENSORS_PCF8591 is not set +# CONFIG_SENSORS_M41T00 is not set +# CONFIG_SENSORS_MAX6875 is not set +# CONFIG_I2C_DEBUG_CORE is not set +# CONFIG_I2C_DEBUG_ALGO is not set +# CONFIG_I2C_DEBUG_BUS is not set +# CONFIG_I2C_DEBUG_CHIP is not set + +# +# SPI support +# +CONFIG_SPI=y +# CONFIG_SPI_DEBUG is not set +CONFIG_SPI_MASTER=y + +# +# SPI Master Controller Drivers +# +CONFIG_SPI_BITBANG=y +CONFIG_SPI_MPC83xx=y + +# +# SPI Protocol Masters +# + +# +# Dallas's 1-wire bus +# + +# +# Hardware Monitoring support +# +CONFIG_HWMON=y +# CONFIG_HWMON_VID is not set +# CONFIG_SENSORS_ABITUGURU is not set +# CONFIG_SENSORS_ADM1021 is not set +# CONFIG_SENSORS_ADM1025 is not set +# CONFIG_SENSORS_ADM1026 is not set +# CONFIG_SENSORS_ADM1031 is not set +# CONFIG_SENSORS_ADM9240 is not set +# CONFIG_SENSORS_ASB100 is not set +# CONFIG_SENSORS_ATXP1 is not set +# CONFIG_SENSORS_DS1621 is not set +# CONFIG_SENSORS_F71805F is not set +# CONFIG_SENSORS_FSCHER is not set +# CONFIG_SENSORS_FSCPOS is not set +# CONFIG_SENSORS_GL518SM is not set +# CONFIG_SENSORS_GL520SM is not set +# CONFIG_SENSORS_IT87 is not set +# CONFIG_SENSORS_LM63 is not set +# CONFIG_SENSORS_LM70 is not set +# CONFIG_SENSORS_LM75 is not set +# CONFIG_SENSORS_LM77 is not set +# CONFIG_SENSORS_LM78 is not set +# CONFIG_SENSORS_LM80 is not set +# CONFIG_SENSORS_LM83 is not set +# CONFIG_SENSORS_LM85 is not set +# CONFIG_SENSORS_LM87 is not set +# CONFIG_SENSORS_LM90 is not set +# CONFIG_SENSORS_LM92 is not set +# CONFIG_SENSORS_MAX1619 is not set +# CONFIG_SENSORS_PC87360 is not set +# CONFIG_SENSORS_SIS5595 is not set +# CONFIG_SENSORS_SMSC47M1 is not set +# CONFIG_SENSORS_SMSC47M192 is not set +# CONFIG_SENSORS_SMSC47B397 is not set +# CONFIG_SENSORS_VIA686A is not set +# CONFIG_SENSORS_VT8231 is not set +# CONFIG_SENSORS_W83781D is not set +# CONFIG_SENSORS_W83791D is not set +# CONFIG_SENSORS_W83792D is not set +# CONFIG_SENSORS_W83L785TS is not set +# CONFIG_SENSORS_W83627HF is not set +# CONFIG_SENSORS_W83627EHF is not set +# CONFIG_HWMON_DEBUG_CHIP is not set + +# +# Misc devices +# + +# +# Multimedia devices +# +# CONFIG_VIDEO_DEV is not set +CONFIG_VIDEO_V4L2=y + +# +# Digital Video Broadcasting Devices +# +# CONFIG_DVB is not set +# CONFIG_USB_DABUSB is not set + +# +# Graphics support +# +CONFIG_FIRMWARE_EDID=y +# CONFIG_FB is not set + +# +# Sound +# +# CONFIG_SOUND is not set + +# +# USB support +# +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y +CONFIG_USB_ARCH_HAS_EHCI=y +CONFIG_USB=y +# CONFIG_USB_DEBUG is not set + +# +# Miscellaneous USB options +# +CONFIG_USB_DEVICEFS=y +# CONFIG_USB_BANDWIDTH is not set +# CONFIG_USB_DYNAMIC_MINORS is not set +# CONFIG_USB_OTG is not set + +# +# USB Host Controller Drivers +# +CONFIG_USB_EHCI_HCD=y +# CONFIG_USB_EHCI_SPLIT_ISO is not set +# CONFIG_USB_EHCI_ROOT_HUB_TT is not set +# CONFIG_USB_EHCI_TT_NEWSCHED is not set +# CONFIG_USB_ISP116X_HCD is not set +CONFIG_USB_OHCI_HCD=y +# CONFIG_USB_OHCI_BIG_ENDIAN is not set +CONFIG_USB_OHCI_LITTLE_ENDIAN=y +CONFIG_USB_UHCI_HCD=y +# CONFIG_USB_SL811_HCD is not set + +# +# USB Device Class drivers +# +# CONFIG_USB_ACM is not set +# CONFIG_USB_PRINTER is not set + +# +# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' +# + +# +# may also be needed; see USB_STORAGE Help for more information +# +CONFIG_USB_STORAGE=y +# CONFIG_USB_STORAGE_DEBUG is not set +# CONFIG_USB_STORAGE_DATAFAB is not set +# CONFIG_USB_STORAGE_FREECOM is not set +# CONFIG_USB_STORAGE_DPCM is not set +# CONFIG_USB_STORAGE_USBAT is not set +# CONFIG_USB_STORAGE_SDDR09 is not set +# CONFIG_USB_STORAGE_SDDR55 is not set +# CONFIG_USB_STORAGE_JUMPSHOT is not set +# CONFIG_USB_STORAGE_ALAUDA is not set +# CONFIG_USB_LIBUSUAL is not set + +# +# USB Input Devices +# +# CONFIG_USB_HID is not set + +# +# USB HID Boot Protocol drivers +# +# CONFIG_USB_KBD is not set +# CONFIG_USB_MOUSE is not set +# CONFIG_USB_AIPTEK is not set +# CONFIG_USB_WACOM is not set +# CONFIG_USB_ACECAD is not set +# CONFIG_USB_KBTAB is not set +# CONFIG_USB_POWERMATE is not set +# CONFIG_USB_TOUCHSCREEN is not set +# CONFIG_USB_YEALINK is not set +# CONFIG_USB_XPAD is not set +# CONFIG_USB_ATI_REMOTE is not set +# CONFIG_USB_ATI_REMOTE2 is not set +# CONFIG_USB_KEYSPAN_REMOTE is not set +# CONFIG_USB_APPLETOUCH is not set + +# +# USB Imaging devices +# +# CONFIG_USB_MDC800 is not set +# CONFIG_USB_MICROTEK is not set + +# +# USB Network Adapters +# +# CONFIG_USB_CATC is not set +# CONFIG_USB_KAWETH is not set +# CONFIG_USB_PEGASUS is not set +# CONFIG_USB_RTL8150 is not set +# CONFIG_USB_USBNET is not set +CONFIG_USB_MON=y + +# +# USB port drivers +# + +# +# USB Serial Converter support +# +# CONFIG_USB_SERIAL is not set + +# +# USB Miscellaneous drivers +# +# CONFIG_USB_EMI62 is not set +# CONFIG_USB_EMI26 is not set +# CONFIG_USB_AUERSWALD is not set +# CONFIG_USB_RIO500 is not set +# CONFIG_USB_LEGOTOWER is not set +# CONFIG_USB_LCD is not set +# CONFIG_USB_LED is not set +# CONFIG_USB_CY7C63 is not set +# CONFIG_USB_CYTHERM is not set +# CONFIG_USB_PHIDGETKIT is not set +# CONFIG_USB_PHIDGETSERVO is not set +# CONFIG_USB_IDMOUSE is not set +# CONFIG_USB_APPLEDISPLAY is not set +# CONFIG_USB_SISUSBVGA is not set +# CONFIG_USB_LD is not set +# CONFIG_USB_TEST is not set + +# +# USB DSL modem support +# + +# +# USB Gadget Support +# +CONFIG_USB_GADGET=y +# CONFIG_USB_GADGET_DEBUG_FILES is not set +CONFIG_USB_GADGET_SELECTED=y +CONFIG_USB_GADGET_NET2280=y +CONFIG_USB_NET2280=y +# CONFIG_USB_GADGET_PXA2XX is not set +# CONFIG_USB_GADGET_GOKU is not set +# CONFIG_USB_GADGET_LH7A40X is not set +# CONFIG_USB_GADGET_OMAP is not set +# CONFIG_USB_GADGET_AT91 is not set +# CONFIG_USB_GADGET_DUMMY_HCD is not set +CONFIG_USB_GADGET_DUALSPEED=y +# CONFIG_USB_ZERO is not set +CONFIG_USB_ETH=y +CONFIG_USB_ETH_RNDIS=y +# CONFIG_USB_GADGETFS is not set +# CONFIG_USB_FILE_STORAGE is not set +# CONFIG_USB_G_SERIAL is not set + +# +# MMC/SD Card support +# +# CONFIG_MMC is not set + +# +# LED devices +# +# CONFIG_NEW_LEDS is not set + +# +# LED drivers +# + +# +# LED Triggers +# + +# +# InfiniBand support +# +# CONFIG_INFINIBAND is not set + +# +# EDAC - error detection and reporting (RAS) (EXPERIMENTAL) +# + +# +# Real Time Clock +# +CONFIG_RTC_LIB=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_HCTOSYS=y +CONFIG_RTC_HCTOSYS_DEVICE="rtc0" + +# +# RTC interfaces +# +CONFIG_RTC_INTF_SYSFS=y +CONFIG_RTC_INTF_PROC=y +CONFIG_RTC_INTF_DEV=y +CONFIG_RTC_INTF_DEV_UIE_EMUL=y + +# +# RTC drivers +# +# CONFIG_RTC_DRV_X1205 is not set +CONFIG_RTC_DRV_DS1307=y +# CONFIG_RTC_DRV_DS1553 is not set +# CONFIG_RTC_DRV_DS1672 is not set +# CONFIG_RTC_DRV_DS1742 is not set +# CONFIG_RTC_DRV_PCF8563 is not set +# CONFIG_RTC_DRV_PCF8583 is not set +# CONFIG_RTC_DRV_RS5C348 is not set +# CONFIG_RTC_DRV_RS5C372 is not set +# CONFIG_RTC_DRV_M48T86 is not set +# CONFIG_RTC_DRV_TEST is not set +# CONFIG_RTC_DRV_MAX6902 is not set +# CONFIG_RTC_DRV_V3020 is not set + +# +# DMA Engine support +# +CONFIG_DMA_ENGINE=y + +# +# DMA Clients +# +CONFIG_NET_DMA=y + +# +# DMA Devices +# +CONFIG_INTEL_IOATDMA=y + +# +# File systems +# +CONFIG_EXT2_FS=y +# CONFIG_EXT2_FS_XATTR is not set +# CONFIG_EXT2_FS_XIP is not set +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_XATTR=y +# CONFIG_EXT3_FS_POSIX_ACL is not set +# CONFIG_EXT3_FS_SECURITY is not set +CONFIG_JBD=y +# CONFIG_JBD_DEBUG is not set +CONFIG_FS_MBCACHE=y +# CONFIG_REISERFS_FS is not set +# CONFIG_JFS_FS is not set +# CONFIG_FS_POSIX_ACL is not set +# CONFIG_XFS_FS is not set +# CONFIG_OCFS2_FS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_ROMFS_FS is not set +CONFIG_INOTIFY=y +CONFIG_INOTIFY_USER=y +# CONFIG_QUOTA is not set +CONFIG_DNOTIFY=y +# CONFIG_AUTOFS_FS is not set +# CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set + +# +# CD-ROM/DVD Filesystems +# +# CONFIG_ISO9660_FS is not set +# CONFIG_UDF_FS is not set + +# +# DOS/FAT/NT Filesystems +# +# CONFIG_MSDOS_FS is not set +# CONFIG_VFAT_FS is not set +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +# CONFIG_HUGETLB_PAGE is not set +CONFIG_RAMFS=y +# CONFIG_CONFIGFS_FS is not set + +# +# Miscellaneous filesystems +# +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +# CONFIG_JFFS_FS is not set +# CONFIG_JFFS2_FS is not set +# CONFIG_CRAMFS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set + +# +# Network File Systems +# +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +# CONFIG_NFS_V3_ACL is not set +CONFIG_NFS_V4=y +# CONFIG_NFS_DIRECTIO is not set +# CONFIG_NFSD is not set +CONFIG_ROOT_NFS=y +CONFIG_LOCKD=y +CONFIG_LOCKD_V4=y +CONFIG_NFS_COMMON=y +CONFIG_SUNRPC=y +CONFIG_SUNRPC_GSS=y +CONFIG_RPCSEC_GSS_KRB5=y +# CONFIG_RPCSEC_GSS_SPKM3 is not set +# CONFIG_SMB_FS is not set +# CONFIG_CIFS is not set +# CONFIG_CIFS_DEBUG2 is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set +# CONFIG_9P_FS is not set + +# +# Partition Types +# +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +# CONFIG_OSF_PARTITION is not set +# CONFIG_AMIGA_PARTITION is not set +# CONFIG_ATARI_PARTITION is not set +# CONFIG_MAC_PARTITION is not set +# CONFIG_MSDOS_PARTITION is not set +# CONFIG_LDM_PARTITION is not set +# CONFIG_SGI_PARTITION is not set +# CONFIG_ULTRIX_PARTITION is not set +# CONFIG_SUN_PARTITION is not set +# CONFIG_KARMA_PARTITION is not set +# CONFIG_EFI_PARTITION is not set + +# +# Native Language Support +# +# CONFIG_NLS is not set + +# +# Library routines +# +# CONFIG_CRC_CCITT is not set +# CONFIG_CRC16 is not set +CONFIG_CRC32=y +# CONFIG_LIBCRC32C is not set +CONFIG_PLIST=y + +# +# Instrumentation Support +# +# CONFIG_PROFILING is not set + +# +# Kernel hacking +# +CONFIG_PRINTK_TIME=y +# CONFIG_MAGIC_SYSRQ is not set +# CONFIG_UNUSED_SYMBOLS is not set +CONFIG_DEBUG_KERNEL=y +CONFIG_LOG_BUF_SHIFT=17 +CONFIG_DETECT_SOFTLOCKUP=y +# CONFIG_SCHEDSTATS is not set +# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_MUTEXES is not set +# CONFIG_DEBUG_RT_MUTEXES is not set +# CONFIG_RT_MUTEX_TESTER is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set +# CONFIG_DEBUG_KOBJECT is not set +CONFIG_DEBUG_INFO=y +# CONFIG_DEBUG_FS is not set +# CONFIG_DEBUG_VM is not set +CONFIG_FORCED_INLINING=y +# CONFIG_RCU_TORTURE_TEST is not set +# CONFIG_DEBUGGER is not set +# CONFIG_BDI_SWITCH is not set +CONFIG_BOOTX_TEXT=y +CONFIG_SERIAL_TEXT_DEBUG=y +# CONFIG_PPC_EARLY_DEBUG is not set + +# +# Security options +# +# CONFIG_KEYS is not set +# CONFIG_SECURITY is not set + +# +# Cryptographic options +# +CONFIG_CRYPTO=y +# CONFIG_CRYPTO_HMAC is not set +# CONFIG_CRYPTO_NULL is not set +# CONFIG_CRYPTO_MD4 is not set +CONFIG_CRYPTO_MD5=y +# CONFIG_CRYPTO_SHA1 is not set +# CONFIG_CRYPTO_SHA256 is not set +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_WP512 is not set +# CONFIG_CRYPTO_TGR192 is not set +CONFIG_CRYPTO_DES=y +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_TWOFISH is not set +# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_AES is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +# CONFIG_CRYPTO_TEA is not set +# CONFIG_CRYPTO_ARC4 is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_ANUBIS is not set +# CONFIG_CRYPTO_DEFLATE is not set +# CONFIG_CRYPTO_MICHAEL_MIC is not set +# CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_TEST is not set + +# +# Hardware crypto devices +# diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index a6920919d68e..f4e5e14ee2b6 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -111,7 +111,7 @@ void __init btext_setup_display(int width, int height, int depth, int pitch, logicalDisplayBase = (unsigned char *)address; dispDeviceBase = (unsigned char *)address; dispDeviceRowBytes = pitch; - dispDeviceDepth = depth; + dispDeviceDepth = depth == 15 ? 16 : depth; dispDeviceRect[0] = dispDeviceRect[1] = 0; dispDeviceRect[2] = width; dispDeviceRect[3] = height; @@ -160,20 +160,28 @@ int btext_initialize(struct device_node *np) unsigned long address = 0; u32 *prop; - prop = (u32 *)get_property(np, "width", NULL); + prop = (u32 *)get_property(np, "linux,bootx-width", NULL); + if (prop == NULL) + prop = (u32 *)get_property(np, "width", NULL); if (prop == NULL) return -EINVAL; width = *prop; - prop = (u32 *)get_property(np, "height", NULL); + prop = (u32 *)get_property(np, "linux,bootx-height", NULL); + if (prop == NULL) + prop = (u32 *)get_property(np, "height", NULL); if (prop == NULL) return -EINVAL; height = *prop; - prop = (u32 *)get_property(np, "depth", NULL); + prop = (u32 *)get_property(np, "linux,bootx-depth", NULL); + if (prop == NULL) + prop = (u32 *)get_property(np, "depth", NULL); if (prop == NULL) return -EINVAL; depth = *prop; pitch = width * ((depth + 7) / 8); - prop = (u32 *)get_property(np, "linebytes", NULL); + prop = (u32 *)get_property(np, "linux,bootx-linebytes", NULL); + if (prop == NULL) + prop = (u32 *)get_property(np, "linebytes", NULL); if (prop) pitch = *prop; if (pitch == 1) @@ -194,7 +202,7 @@ int btext_initialize(struct device_node *np) g_max_loc_Y = height / 16; dispDeviceBase = (unsigned char *)address; dispDeviceRowBytes = pitch; - dispDeviceDepth = depth; + dispDeviceDepth = depth == 15 ? 16 : depth; dispDeviceRect[0] = dispDeviceRect[1] = 0; dispDeviceRect[2] = width; dispDeviceRect[3] = height; diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index e47d40ac6f39..97ddc02a3d42 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -323,13 +323,11 @@ int ibmebus_request_irq(struct ibmebus_dev *dev, unsigned long irq_flags, const char * devname, void *dev_id) { - unsigned int irq = virt_irq_create_mapping(ist); + unsigned int irq = irq_create_mapping(NULL, ist, 0); if (irq == NO_IRQ) return -EINVAL; - irq = irq_offset_up(irq); - return request_irq(irq, handler, irq_flags, devname, dev_id); } @@ -337,12 +335,9 @@ EXPORT_SYMBOL(ibmebus_request_irq); void ibmebus_free_irq(struct ibmebus_dev *dev, u32 ist, void *dev_id) { - unsigned int irq = virt_irq_create_mapping(ist); + unsigned int irq = irq_find_mapping(NULL, ist); - irq = irq_offset_up(irq); free_irq(irq, dev_id); - - return; } EXPORT_SYMBOL(ibmebus_free_irq); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 525baab45d2d..8cf987809c66 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -29,6 +29,8 @@ * to reduce code space and undefined function references. */ +#undef DEBUG + #include <linux/module.h> #include <linux/threads.h> #include <linux/kernel_stat.h> @@ -46,7 +48,10 @@ #include <linux/cpumask.h> #include <linux/profile.h> #include <linux/bitops.h> -#include <linux/pci.h> +#include <linux/list.h> +#include <linux/radix-tree.h> +#include <linux/mutex.h> +#include <linux/bootmem.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -57,39 +62,38 @@ #include <asm/prom.h> #include <asm/ptrace.h> #include <asm/machdep.h> +#include <asm/udbg.h> #ifdef CONFIG_PPC_ISERIES #include <asm/paca.h> #endif int __irq_offset_value; -#ifdef CONFIG_PPC32 -EXPORT_SYMBOL(__irq_offset_value); -#endif - static int ppc_spurious_interrupts; #ifdef CONFIG_PPC32 -#define NR_MASK_WORDS ((NR_IRQS + 31) / 32) +EXPORT_SYMBOL(__irq_offset_value); +atomic_t ppc_n_lost_interrupts; +#ifndef CONFIG_PPC_MERGE +#define NR_MASK_WORDS ((NR_IRQS + 31) / 32) unsigned long ppc_cached_irq_mask[NR_MASK_WORDS]; -atomic_t ppc_n_lost_interrupts; +#endif #ifdef CONFIG_TAU_INT extern int tau_initialized; extern int tau_interrupts(int); #endif +#endif /* CONFIG_PPC32 */ #if defined(CONFIG_SMP) && !defined(CONFIG_PPC_MERGE) extern atomic_t ipi_recv; extern atomic_t ipi_sent; #endif -#endif /* CONFIG_PPC32 */ #ifdef CONFIG_PPC64 EXPORT_SYMBOL(irq_desc); int distribute_irqs = 1; -u64 ppc64_interrupt_controller; #endif /* CONFIG_PPC64 */ int show_interrupts(struct seq_file *p, void *v) @@ -182,7 +186,7 @@ void fixup_irqs(cpumask_t map) void do_IRQ(struct pt_regs *regs) { - int irq; + unsigned int irq; #ifdef CONFIG_IRQSTACKS struct thread_info *curtp, *irqtp; #endif @@ -213,22 +217,26 @@ void do_IRQ(struct pt_regs *regs) */ irq = ppc_md.get_irq(regs); - if (irq >= 0) { + if (irq != NO_IRQ && irq != NO_IRQ_IGNORE) { #ifdef CONFIG_IRQSTACKS /* Switch to the irq stack to handle this */ curtp = current_thread_info(); irqtp = hardirq_ctx[smp_processor_id()]; if (curtp != irqtp) { + struct irq_desc *desc = irq_desc + irq; + void *handler = desc->handle_irq; + if (handler == NULL) + handler = &__do_IRQ; irqtp->task = curtp->task; irqtp->flags = 0; - call___do_IRQ(irq, regs, irqtp); + call_handle_irq(irq, desc, regs, irqtp, handler); irqtp->task = NULL; if (irqtp->flags) set_bits(irqtp->flags, &curtp->flags); } else #endif - __do_IRQ(irq, regs); - } else if (irq != -2) + generic_handle_irq(irq, regs); + } else if (irq != NO_IRQ_IGNORE) /* That's not SMP safe ... but who cares ? */ ppc_spurious_interrupts++; @@ -245,196 +253,562 @@ void do_IRQ(struct pt_regs *regs) void __init init_IRQ(void) { + ppc_md.init_IRQ(); #ifdef CONFIG_PPC64 - static int once = 0; + irq_ctx_init(); +#endif +} + + +#ifdef CONFIG_IRQSTACKS +struct thread_info *softirq_ctx[NR_CPUS] __read_mostly; +struct thread_info *hardirq_ctx[NR_CPUS] __read_mostly; + +void irq_ctx_init(void) +{ + struct thread_info *tp; + int i; + + for_each_possible_cpu(i) { + memset((void *)softirq_ctx[i], 0, THREAD_SIZE); + tp = softirq_ctx[i]; + tp->cpu = i; + tp->preempt_count = SOFTIRQ_OFFSET; + + memset((void *)hardirq_ctx[i], 0, THREAD_SIZE); + tp = hardirq_ctx[i]; + tp->cpu = i; + tp->preempt_count = HARDIRQ_OFFSET; + } +} + +static inline void do_softirq_onstack(void) +{ + struct thread_info *curtp, *irqtp; + + curtp = current_thread_info(); + irqtp = softirq_ctx[smp_processor_id()]; + irqtp->task = curtp->task; + call_do_softirq(irqtp); + irqtp->task = NULL; +} - if (once) +#else +#define do_softirq_onstack() __do_softirq() +#endif /* CONFIG_IRQSTACKS */ + +void do_softirq(void) +{ + unsigned long flags; + + if (in_interrupt()) return; - once++; + local_irq_save(flags); -#endif - ppc_md.init_IRQ(); -#ifdef CONFIG_PPC64 - irq_ctx_init(); -#endif + if (local_softirq_pending()) + do_softirq_onstack(); + + local_irq_restore(flags); } +EXPORT_SYMBOL(do_softirq); + -#ifdef CONFIG_PPC64 /* - * Virtual IRQ mapping code, used on systems with XICS interrupt controllers. + * IRQ controller and virtual interrupts */ -#define UNDEFINED_IRQ 0xffffffff -unsigned int virt_irq_to_real_map[NR_IRQS]; +#ifdef CONFIG_PPC_MERGE -/* - * Don't use virtual irqs 0, 1, 2 for devices. - * The pcnet32 driver considers interrupt numbers < 2 to be invalid, - * and 2 is the XICS IPI interrupt. - * We limit virtual irqs to __irq_offet_value less than virt_irq_max so - * that when we offset them we don't end up with an interrupt - * number >= virt_irq_max. - */ -#define MIN_VIRT_IRQ 3 +static LIST_HEAD(irq_hosts); +static spinlock_t irq_big_lock = SPIN_LOCK_UNLOCKED; -unsigned int virt_irq_max; -static unsigned int max_virt_irq; -static unsigned int nr_virt_irqs; +struct irq_map_entry irq_map[NR_IRQS]; +static unsigned int irq_virq_count = NR_IRQS; +static struct irq_host *irq_default_host; -void -virt_irq_init(void) +struct irq_host *irq_alloc_host(unsigned int revmap_type, + unsigned int revmap_arg, + struct irq_host_ops *ops, + irq_hw_number_t inval_irq) { - int i; + struct irq_host *host; + unsigned int size = sizeof(struct irq_host); + unsigned int i; + unsigned int *rmap; + unsigned long flags; - if ((virt_irq_max == 0) || (virt_irq_max > (NR_IRQS - 1))) - virt_irq_max = NR_IRQS - 1; - max_virt_irq = virt_irq_max - __irq_offset_value; - nr_virt_irqs = max_virt_irq - MIN_VIRT_IRQ + 1; + /* Allocate structure and revmap table if using linear mapping */ + if (revmap_type == IRQ_HOST_MAP_LINEAR) + size += revmap_arg * sizeof(unsigned int); + if (mem_init_done) + host = kzalloc(size, GFP_KERNEL); + else { + host = alloc_bootmem(size); + if (host) + memset(host, 0, size); + } + if (host == NULL) + return NULL; - for (i = 0; i < NR_IRQS; i++) - virt_irq_to_real_map[i] = UNDEFINED_IRQ; + /* Fill structure */ + host->revmap_type = revmap_type; + host->inval_irq = inval_irq; + host->ops = ops; + + spin_lock_irqsave(&irq_big_lock, flags); + + /* If it's a legacy controller, check for duplicates and + * mark it as allocated (we use irq 0 host pointer for that + */ + if (revmap_type == IRQ_HOST_MAP_LEGACY) { + if (irq_map[0].host != NULL) { + spin_unlock_irqrestore(&irq_big_lock, flags); + /* If we are early boot, we can't free the structure, + * too bad... + * this will be fixed once slab is made available early + * instead of the current cruft + */ + if (mem_init_done) + kfree(host); + return NULL; + } + irq_map[0].host = host; + } + + list_add(&host->link, &irq_hosts); + spin_unlock_irqrestore(&irq_big_lock, flags); + + /* Additional setups per revmap type */ + switch(revmap_type) { + case IRQ_HOST_MAP_LEGACY: + /* 0 is always the invalid number for legacy */ + host->inval_irq = 0; + /* setup us as the host for all legacy interrupts */ + for (i = 1; i < NUM_ISA_INTERRUPTS; i++) { + irq_map[i].hwirq = 0; + smp_wmb(); + irq_map[i].host = host; + smp_wmb(); + + /* Clear some flags */ + get_irq_desc(i)->status + &= ~(IRQ_NOREQUEST | IRQ_LEVEL); + + /* Legacy flags are left to default at this point, + * one can then use irq_create_mapping() to + * explicitely change them + */ + ops->map(host, i, i, 0); + } + break; + case IRQ_HOST_MAP_LINEAR: + rmap = (unsigned int *)(host + 1); + for (i = 0; i < revmap_arg; i++) + rmap[i] = IRQ_NONE; + host->revmap_data.linear.size = revmap_arg; + smp_wmb(); + host->revmap_data.linear.revmap = rmap; + break; + default: + break; + } + + pr_debug("irq: Allocated host of type %d @0x%p\n", revmap_type, host); + + return host; } -/* Create a mapping for a real_irq if it doesn't already exist. - * Return the virtual irq as a convenience. - */ -int virt_irq_create_mapping(unsigned int real_irq) +struct irq_host *irq_find_host(struct device_node *node) { - unsigned int virq, first_virq; - static int warned; + struct irq_host *h, *found = NULL; + unsigned long flags; + + /* We might want to match the legacy controller last since + * it might potentially be set to match all interrupts in + * the absence of a device node. This isn't a problem so far + * yet though... + */ + spin_lock_irqsave(&irq_big_lock, flags); + list_for_each_entry(h, &irq_hosts, link) + if (h->ops->match == NULL || h->ops->match(h, node)) { + found = h; + break; + } + spin_unlock_irqrestore(&irq_big_lock, flags); + return found; +} +EXPORT_SYMBOL_GPL(irq_find_host); + +void irq_set_default_host(struct irq_host *host) +{ + pr_debug("irq: Default host set to @0x%p\n", host); + + irq_default_host = host; +} - if (ppc64_interrupt_controller == IC_OPEN_PIC) - return real_irq; /* no mapping for openpic (for now) */ +void irq_set_virq_count(unsigned int count) +{ + pr_debug("irq: Trying to set virq count to %d\n", count); - if (ppc64_interrupt_controller == IC_CELL_PIC) - return real_irq; /* no mapping for iic either */ + BUG_ON(count < NUM_ISA_INTERRUPTS); + if (count < NR_IRQS) + irq_virq_count = count; +} - /* don't map interrupts < MIN_VIRT_IRQ */ - if (real_irq < MIN_VIRT_IRQ) { - virt_irq_to_real_map[real_irq] = real_irq; - return real_irq; +unsigned int irq_create_mapping(struct irq_host *host, + irq_hw_number_t hwirq, + unsigned int flags) +{ + unsigned int virq, hint; + + pr_debug("irq: irq_create_mapping(0x%p, 0x%lx, 0x%x)\n", + host, hwirq, flags); + + /* Look for default host if nececssary */ + if (host == NULL) + host = irq_default_host; + if (host == NULL) { + printk(KERN_WARNING "irq_create_mapping called for" + " NULL host, hwirq=%lx\n", hwirq); + WARN_ON(1); + return NO_IRQ; } + pr_debug("irq: -> using host @%p\n", host); - /* map to a number between MIN_VIRT_IRQ and max_virt_irq */ - virq = real_irq; - if (virq > max_virt_irq) - virq = (virq % nr_virt_irqs) + MIN_VIRT_IRQ; - - /* search for this number or a free slot */ - first_virq = virq; - while (virt_irq_to_real_map[virq] != UNDEFINED_IRQ) { - if (virt_irq_to_real_map[virq] == real_irq) - return virq; - if (++virq > max_virt_irq) - virq = MIN_VIRT_IRQ; - if (virq == first_virq) - goto nospace; /* oops, no free slots */ + /* Check if mapping already exist, if it does, call + * host->ops->map() to update the flags + */ + virq = irq_find_mapping(host, hwirq); + if (virq != IRQ_NONE) { + pr_debug("irq: -> existing mapping on virq %d\n", virq); + host->ops->map(host, virq, hwirq, flags); + return virq; + } + + /* Get a virtual interrupt number */ + if (host->revmap_type == IRQ_HOST_MAP_LEGACY) { + /* Handle legacy */ + virq = (unsigned int)hwirq; + if (virq == 0 || virq >= NUM_ISA_INTERRUPTS) + return NO_IRQ; + return virq; + } else { + /* Allocate a virtual interrupt number */ + hint = hwirq % irq_virq_count; + virq = irq_alloc_virt(host, 1, hint); + if (virq == NO_IRQ) { + pr_debug("irq: -> virq allocation failed\n"); + return NO_IRQ; + } } + pr_debug("irq: -> obtained virq %d\n", virq); - virt_irq_to_real_map[virq] = real_irq; + /* Clear some flags */ + get_irq_desc(virq)->status &= ~(IRQ_NOREQUEST | IRQ_LEVEL); + + /* map it */ + if (host->ops->map(host, virq, hwirq, flags)) { + pr_debug("irq: -> mapping failed, freeing\n"); + irq_free_virt(virq, 1); + return NO_IRQ; + } + smp_wmb(); + irq_map[virq].hwirq = hwirq; + smp_mb(); return virq; +} +EXPORT_SYMBOL_GPL(irq_create_mapping); - nospace: - if (!warned) { - printk(KERN_CRIT "Interrupt table is full\n"); - printk(KERN_CRIT "Increase virt_irq_max (currently %d) " - "in your kernel sources and rebuild.\n", virt_irq_max); - warned = 1; +extern unsigned int irq_create_of_mapping(struct device_node *controller, + u32 *intspec, unsigned int intsize) +{ + struct irq_host *host; + irq_hw_number_t hwirq; + unsigned int flags = IRQ_TYPE_NONE; + + if (controller == NULL) + host = irq_default_host; + else + host = irq_find_host(controller); + if (host == NULL) + return NO_IRQ; + + /* If host has no translation, then we assume interrupt line */ + if (host->ops->xlate == NULL) + hwirq = intspec[0]; + else { + if (host->ops->xlate(host, controller, intspec, intsize, + &hwirq, &flags)) + return NO_IRQ; } - return NO_IRQ; + + return irq_create_mapping(host, hwirq, flags); } +EXPORT_SYMBOL_GPL(irq_create_of_mapping); -/* - * In most cases will get a hit on the very first slot checked in the - * virt_irq_to_real_map. Only when there are a large number of - * IRQs will this be expensive. - */ -unsigned int real_irq_to_virt_slowpath(unsigned int real_irq) +unsigned int irq_of_parse_and_map(struct device_node *dev, int index) { - unsigned int virq; - unsigned int first_virq; + struct of_irq oirq; - virq = real_irq; + if (of_irq_map_one(dev, index, &oirq)) + return NO_IRQ; - if (virq > max_virt_irq) - virq = (virq % nr_virt_irqs) + MIN_VIRT_IRQ; + return irq_create_of_mapping(oirq.controller, oirq.specifier, + oirq.size); +} +EXPORT_SYMBOL_GPL(irq_of_parse_and_map); - first_virq = virq; +void irq_dispose_mapping(unsigned int virq) +{ + struct irq_host *host = irq_map[virq].host; + irq_hw_number_t hwirq; + unsigned long flags; - do { - if (virt_irq_to_real_map[virq] == real_irq) - return virq; + WARN_ON (host == NULL); + if (host == NULL) + return; - virq++; + /* Never unmap legacy interrupts */ + if (host->revmap_type == IRQ_HOST_MAP_LEGACY) + return; - if (virq >= max_virt_irq) - virq = 0; + /* remove chip and handler */ + set_irq_chip_and_handler(virq, NULL, NULL); + + /* Make sure it's completed */ + synchronize_irq(virq); + + /* Tell the PIC about it */ + if (host->ops->unmap) + host->ops->unmap(host, virq); + smp_mb(); + + /* Clear reverse map */ + hwirq = irq_map[virq].hwirq; + switch(host->revmap_type) { + case IRQ_HOST_MAP_LINEAR: + if (hwirq < host->revmap_data.linear.size) + host->revmap_data.linear.revmap[hwirq] = IRQ_NONE; + break; + case IRQ_HOST_MAP_TREE: + /* Check if radix tree allocated yet */ + if (host->revmap_data.tree.gfp_mask == 0) + break; + /* XXX radix tree not safe ! remove lock whem it becomes safe + * and use some RCU sync to make sure everything is ok before we + * can re-use that map entry + */ + spin_lock_irqsave(&irq_big_lock, flags); + radix_tree_delete(&host->revmap_data.tree, hwirq); + spin_unlock_irqrestore(&irq_big_lock, flags); + break; + } - } while (first_virq != virq); + /* Destroy map */ + smp_mb(); + irq_map[virq].hwirq = host->inval_irq; - return NO_IRQ; + /* Set some flags */ + get_irq_desc(virq)->status |= IRQ_NOREQUEST; + /* Free it */ + irq_free_virt(virq, 1); } -#endif /* CONFIG_PPC64 */ +EXPORT_SYMBOL_GPL(irq_dispose_mapping); -#ifdef CONFIG_IRQSTACKS -struct thread_info *softirq_ctx[NR_CPUS] __read_mostly; -struct thread_info *hardirq_ctx[NR_CPUS] __read_mostly; +unsigned int irq_find_mapping(struct irq_host *host, + irq_hw_number_t hwirq) +{ + unsigned int i; + unsigned int hint = hwirq % irq_virq_count; + + /* Look for default host if nececssary */ + if (host == NULL) + host = irq_default_host; + if (host == NULL) + return NO_IRQ; + + /* legacy -> bail early */ + if (host->revmap_type == IRQ_HOST_MAP_LEGACY) + return hwirq; + + /* Slow path does a linear search of the map */ + if (hint < NUM_ISA_INTERRUPTS) + hint = NUM_ISA_INTERRUPTS; + i = hint; + do { + if (irq_map[i].host == host && + irq_map[i].hwirq == hwirq) + return i; + i++; + if (i >= irq_virq_count) + i = NUM_ISA_INTERRUPTS; + } while(i != hint); + return NO_IRQ; +} +EXPORT_SYMBOL_GPL(irq_find_mapping); -void irq_ctx_init(void) + +unsigned int irq_radix_revmap(struct irq_host *host, + irq_hw_number_t hwirq) { - struct thread_info *tp; - int i; + struct radix_tree_root *tree; + struct irq_map_entry *ptr; + unsigned int virq; + unsigned long flags; - for_each_possible_cpu(i) { - memset((void *)softirq_ctx[i], 0, THREAD_SIZE); - tp = softirq_ctx[i]; - tp->cpu = i; - tp->preempt_count = SOFTIRQ_OFFSET; + WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE); - memset((void *)hardirq_ctx[i], 0, THREAD_SIZE); - tp = hardirq_ctx[i]; - tp->cpu = i; - tp->preempt_count = HARDIRQ_OFFSET; + /* Check if the radix tree exist yet. We test the value of + * the gfp_mask for that. Sneaky but saves another int in the + * structure. If not, we fallback to slow mode + */ + tree = &host->revmap_data.tree; + if (tree->gfp_mask == 0) + return irq_find_mapping(host, hwirq); + + /* XXX Current radix trees are NOT SMP safe !!! Remove that lock + * when that is fixed (when Nick's patch gets in + */ + spin_lock_irqsave(&irq_big_lock, flags); + + /* Now try to resolve */ + ptr = radix_tree_lookup(tree, hwirq); + /* Found it, return */ + if (ptr) { + virq = ptr - irq_map; + goto bail; } + + /* If not there, try to insert it */ + virq = irq_find_mapping(host, hwirq); + if (virq != NO_IRQ) + radix_tree_insert(tree, virq, &irq_map[virq]); + bail: + spin_unlock_irqrestore(&irq_big_lock, flags); + return virq; } -static inline void do_softirq_onstack(void) +unsigned int irq_linear_revmap(struct irq_host *host, + irq_hw_number_t hwirq) { - struct thread_info *curtp, *irqtp; + unsigned int *revmap; - curtp = current_thread_info(); - irqtp = softirq_ctx[smp_processor_id()]; - irqtp->task = curtp->task; - call_do_softirq(irqtp); - irqtp->task = NULL; + WARN_ON(host->revmap_type != IRQ_HOST_MAP_LINEAR); + + /* Check revmap bounds */ + if (unlikely(hwirq >= host->revmap_data.linear.size)) + return irq_find_mapping(host, hwirq); + + /* Check if revmap was allocated */ + revmap = host->revmap_data.linear.revmap; + if (unlikely(revmap == NULL)) + return irq_find_mapping(host, hwirq); + + /* Fill up revmap with slow path if no mapping found */ + if (unlikely(revmap[hwirq] == NO_IRQ)) + revmap[hwirq] = irq_find_mapping(host, hwirq); + + return revmap[hwirq]; } -#else -#define do_softirq_onstack() __do_softirq() -#endif /* CONFIG_IRQSTACKS */ +unsigned int irq_alloc_virt(struct irq_host *host, + unsigned int count, + unsigned int hint) +{ + unsigned long flags; + unsigned int i, j, found = NO_IRQ; + unsigned int limit = irq_virq_count - count; -void do_softirq(void) + if (count == 0 || count > (irq_virq_count - NUM_ISA_INTERRUPTS)) + return NO_IRQ; + + spin_lock_irqsave(&irq_big_lock, flags); + + /* Use hint for 1 interrupt if any */ + if (count == 1 && hint >= NUM_ISA_INTERRUPTS && + hint < irq_virq_count && irq_map[hint].host == NULL) { + found = hint; + goto hint_found; + } + + /* Look for count consecutive numbers in the allocatable + * (non-legacy) space + */ + for (i = NUM_ISA_INTERRUPTS; i <= limit; ) { + for (j = i; j < (i + count); j++) + if (irq_map[j].host != NULL) { + i = j + 1; + continue; + } + found = i; + break; + } + if (found == NO_IRQ) { + spin_unlock_irqrestore(&irq_big_lock, flags); + return NO_IRQ; + } + hint_found: + for (i = found; i < (found + count); i++) { + irq_map[i].hwirq = host->inval_irq; + smp_wmb(); + irq_map[i].host = host; + } + spin_unlock_irqrestore(&irq_big_lock, flags); + return found; +} + +void irq_free_virt(unsigned int virq, unsigned int count) { unsigned long flags; + unsigned int i; - if (in_interrupt()) - return; + WARN_ON (virq < NUM_ISA_INTERRUPTS); + WARN_ON (count == 0 || (virq + count) > irq_virq_count); - local_irq_save(flags); + spin_lock_irqsave(&irq_big_lock, flags); + for (i = virq; i < (virq + count); i++) { + struct irq_host *host; - if (local_softirq_pending()) { - account_system_vtime(current); - local_bh_disable(); - do_softirq_onstack(); - account_system_vtime(current); - __local_bh_enable(); + if (i < NUM_ISA_INTERRUPTS || + (virq + count) > irq_virq_count) + continue; + + host = irq_map[i].host; + irq_map[i].hwirq = host->inval_irq; + smp_wmb(); + irq_map[i].host = NULL; } + spin_unlock_irqrestore(&irq_big_lock, flags); +} - local_irq_restore(flags); +void irq_early_init(void) +{ + unsigned int i; + + for (i = 0; i < NR_IRQS; i++) + get_irq_desc(i)->status |= IRQ_NOREQUEST; } -EXPORT_SYMBOL(do_softirq); + +/* We need to create the radix trees late */ +static int irq_late_init(void) +{ + struct irq_host *h; + unsigned long flags; + + spin_lock_irqsave(&irq_big_lock, flags); + list_for_each_entry(h, &irq_hosts, link) { + if (h->revmap_type == IRQ_HOST_MAP_TREE) + INIT_RADIX_TREE(&h->revmap_data.tree, GFP_ATOMIC); + } + spin_unlock_irqrestore(&irq_big_lock, flags); + + return 0; +} +arch_initcall(irq_late_init); + +#endif /* CONFIG_PPC_MERGE */ #ifdef CONFIG_PCI_MSI int pci_enable_msi(struct pci_dev * pdev) diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index 4cf0b971976b..7e98e778b52f 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -28,6 +28,7 @@ static struct legacy_serial_info { struct device_node *np; unsigned int speed; unsigned int clock; + int irq_check_parent; phys_addr_t taddr; } legacy_serial_infos[MAX_LEGACY_SERIAL_PORTS]; static unsigned int legacy_serial_count; @@ -36,7 +37,7 @@ static int legacy_serial_console = -1; static int __init add_legacy_port(struct device_node *np, int want_index, int iotype, phys_addr_t base, phys_addr_t taddr, unsigned long irq, - upf_t flags) + upf_t flags, int irq_check_parent) { u32 *clk, *spd, clock = BASE_BAUD * 16; int index; @@ -68,7 +69,7 @@ static int __init add_legacy_port(struct device_node *np, int want_index, if (legacy_serial_infos[index].np != 0) { /* if we still have some room, move it, else override */ if (legacy_serial_count < MAX_LEGACY_SERIAL_PORTS) { - printk(KERN_INFO "Moved legacy port %d -> %d\n", + printk(KERN_DEBUG "Moved legacy port %d -> %d\n", index, legacy_serial_count); legacy_serial_ports[legacy_serial_count] = legacy_serial_ports[index]; @@ -76,7 +77,7 @@ static int __init add_legacy_port(struct device_node *np, int want_index, legacy_serial_infos[index]; legacy_serial_count++; } else { - printk(KERN_INFO "Replacing legacy port %d\n", index); + printk(KERN_DEBUG "Replacing legacy port %d\n", index); } } @@ -95,10 +96,11 @@ static int __init add_legacy_port(struct device_node *np, int want_index, legacy_serial_infos[index].np = of_node_get(np); legacy_serial_infos[index].clock = clock; legacy_serial_infos[index].speed = spd ? *spd : 0; + legacy_serial_infos[index].irq_check_parent = irq_check_parent; - printk(KERN_INFO "Found legacy serial port %d for %s\n", + printk(KERN_DEBUG "Found legacy serial port %d for %s\n", index, np->full_name); - printk(KERN_INFO " %s=%llx, taddr=%llx, irq=%lx, clk=%d, speed=%d\n", + printk(KERN_DEBUG " %s=%llx, taddr=%llx, irq=%lx, clk=%d, speed=%d\n", (iotype == UPIO_PORT) ? "port" : "mem", (unsigned long long)base, (unsigned long long)taddr, irq, legacy_serial_ports[index].uartclk, @@ -126,11 +128,13 @@ static int __init add_legacy_soc_port(struct device_node *np, return -1; addr = of_translate_address(soc_dev, addrp); + if (addr == OF_BAD_ADDR) + return -1; /* Add port, irq will be dealt with later. We passed a translated * IO port value. It will be fixed up later along with the irq */ - return add_legacy_port(np, -1, UPIO_MEM, addr, addr, NO_IRQ, flags); + return add_legacy_port(np, -1, UPIO_MEM, addr, addr, NO_IRQ, flags, 0); } static int __init add_legacy_isa_port(struct device_node *np, @@ -141,6 +145,8 @@ static int __init add_legacy_isa_port(struct device_node *np, int index = -1; phys_addr_t taddr; + DBG(" -> add_legacy_isa_port(%s)\n", np->full_name); + /* Get the ISA port number */ reg = (u32 *)get_property(np, "reg", NULL); if (reg == NULL) @@ -161,9 +167,12 @@ static int __init add_legacy_isa_port(struct device_node *np, /* Translate ISA address */ taddr = of_translate_address(np, reg); + if (taddr == OF_BAD_ADDR) + return -1; /* Add port, irq will be dealt with later */ - return add_legacy_port(np, index, UPIO_PORT, reg[1], taddr, NO_IRQ, UPF_BOOT_AUTOCONF); + return add_legacy_port(np, index, UPIO_PORT, reg[1], taddr, + NO_IRQ, UPF_BOOT_AUTOCONF, 0); } @@ -176,6 +185,8 @@ static int __init add_legacy_pci_port(struct device_node *np, unsigned int flags; int iotype, index = -1, lindex = 0; + DBG(" -> add_legacy_pci_port(%s)\n", np->full_name); + /* We only support ports that have a clock frequency properly * encoded in the device-tree (that is have an fcode). Anything * else can't be used that early and will be normally probed by @@ -194,6 +205,8 @@ static int __init add_legacy_pci_port(struct device_node *np, /* We only support BAR 0 for now */ iotype = (flags & IORESOURCE_MEM) ? UPIO_MEM : UPIO_PORT; addr = of_translate_address(pci_dev, addrp); + if (addr == OF_BAD_ADDR) + return -1; /* Set the IO base to the same as the translated address for MMIO, * or to the domain local IO base for PIO (it will be fixed up later) @@ -231,7 +244,8 @@ static int __init add_legacy_pci_port(struct device_node *np, /* Add port, irq will be dealt with later. We passed a translated * IO port value. It will be fixed up later along with the irq */ - return add_legacy_port(np, index, iotype, base, addr, NO_IRQ, UPF_BOOT_AUTOCONF); + return add_legacy_port(np, index, iotype, base, addr, NO_IRQ, + UPF_BOOT_AUTOCONF, np != pci_dev); } #endif @@ -362,27 +376,22 @@ static void __init fixup_port_irq(int index, struct device_node *np, struct plat_serial8250_port *port) { + unsigned int virq; + DBG("fixup_port_irq(%d)\n", index); - /* Check for interrupts in that node */ - if (np->n_intrs > 0) { - port->irq = np->intrs[0].line; - DBG(" port %d (%s), irq=%d\n", - index, np->full_name, port->irq); - return; + virq = irq_of_parse_and_map(np, 0); + if (virq == NO_IRQ && legacy_serial_infos[index].irq_check_parent) { + np = of_get_parent(np); + if (np == NULL) + return; + virq = irq_of_parse_and_map(np, 0); + of_node_put(np); } - - /* Check for interrupts in the parent */ - np = of_get_parent(np); - if (np == NULL) + if (virq == NO_IRQ) return; - if (np->n_intrs > 0) { - port->irq = np->intrs[0].line; - DBG(" port %d (%s), irq=%d\n", - index, np->full_name, port->irq); - } - of_node_put(np); + port->irq = virq; } static void __init fixup_port_pio(int index, diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 0c3c70d115c6..bfb407fc1aa1 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -51,12 +51,14 @@ _GLOBAL(call_do_softirq) mtlr r0 blr -_GLOBAL(call___do_IRQ) +_GLOBAL(call_handle_irq) + ld r8,0(r7) mflr r0 std r0,16(r1) - stdu r1,THREAD_SIZE-112(r5) - mr r1,r5 - bl .__do_IRQ + mtctr r8 + stdu r1,THREAD_SIZE-112(r6) + mr r1,r6 + bctrl ld r1,0(r1) ld r0,16(r1) mtlr r0 diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 1333335c474e..898dae8ab6d9 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -1404,6 +1404,43 @@ pcibios_update_irq(struct pci_dev *dev, int irq) /* XXX FIXME - update OF device tree node interrupt property */ } +#ifdef CONFIG_PPC_MERGE +/* XXX This is a copy of the ppc64 version. This is temporary until we start + * merging the 2 PCI layers + */ +/* + * Reads the interrupt pin to determine if interrupt is use by card. + * If the interrupt is used, then gets the interrupt line from the + * openfirmware and sets it in the pci_dev and pci_config line. + */ +int pci_read_irq_line(struct pci_dev *pci_dev) +{ + struct of_irq oirq; + unsigned int virq; + + DBG("Try to map irq for %s...\n", pci_name(pci_dev)); + + if (of_irq_map_pci(pci_dev, &oirq)) { + DBG(" -> failed !\n"); + return -1; + } + + DBG(" -> got one, spec %d cells (0x%08x...) on %s\n", + oirq.size, oirq.specifier[0], oirq.controller->full_name); + + virq = irq_create_of_mapping(oirq.controller, oirq.specifier, oirq.size); + if(virq == NO_IRQ) { + DBG(" -> failed to map !\n"); + return -1; + } + pci_dev->irq = virq; + pci_write_config_byte(pci_dev, PCI_INTERRUPT_LINE, virq); + + return 0; +} +EXPORT_SYMBOL(pci_read_irq_line); +#endif /* CONFIG_PPC_MERGE */ + int pcibios_enable_device(struct pci_dev *dev, int mask) { u16 cmd, old_cmd; diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index bea8451fb57b..efc0b5559ee0 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -398,12 +398,8 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, } else { dev->hdr_type = PCI_HEADER_TYPE_NORMAL; dev->rom_base_reg = PCI_ROM_ADDRESS; + /* Maybe do a default OF mapping here */ dev->irq = NO_IRQ; - if (node->n_intrs > 0) { - dev->irq = node->intrs[0].line; - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, - dev->irq); - } } pci_parse_of_addrs(node, dev); @@ -1288,23 +1284,26 @@ EXPORT_SYMBOL(pcibios_fixup_bus); */ int pci_read_irq_line(struct pci_dev *pci_dev) { - u8 intpin; - struct device_node *node; - - pci_read_config_byte(pci_dev, PCI_INTERRUPT_PIN, &intpin); - if (intpin == 0) - return 0; + struct of_irq oirq; + unsigned int virq; - node = pci_device_to_OF_node(pci_dev); - if (node == NULL) - return -1; + DBG("Try to map irq for %s...\n", pci_name(pci_dev)); - if (node->n_intrs == 0) + if (of_irq_map_pci(pci_dev, &oirq)) { + DBG(" -> failed !\n"); return -1; + } - pci_dev->irq = node->intrs[0].line; + DBG(" -> got one, spec %d cells (0x%08x...) on %s\n", + oirq.size, oirq.specifier[0], oirq.controller->full_name); - pci_write_config_byte(pci_dev, PCI_INTERRUPT_LINE, pci_dev->irq); + virq = irq_create_of_mapping(oirq.controller, oirq.specifier, oirq.size); + if(virq == NO_IRQ) { + DBG(" -> failed to map !\n"); + return -1; + } + pci_dev->irq = virq; + pci_write_config_byte(pci_dev, PCI_INTERRUPT_LINE, virq); return 0; } diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 4c524cb52184..a1787ffb6319 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -30,6 +30,7 @@ #include <linux/module.h> #include <linux/kexec.h> #include <linux/debugfs.h> +#include <linux/irq.h> #include <asm/prom.h> #include <asm/rtas.h> @@ -86,424 +87,6 @@ static DEFINE_RWLOCK(devtree_lock); /* export that to outside world */ struct device_node *of_chosen; -struct device_node *dflt_interrupt_controller; -int num_interrupt_controllers; - -/* - * Wrapper for allocating memory for various data that needs to be - * attached to device nodes as they are processed at boot or when - * added to the device tree later (e.g. DLPAR). At boot there is - * already a region reserved so we just increment *mem_start by size; - * otherwise we call kmalloc. - */ -static void * prom_alloc(unsigned long size, unsigned long *mem_start) -{ - unsigned long tmp; - - if (!mem_start) - return kmalloc(size, GFP_KERNEL); - - tmp = *mem_start; - *mem_start += size; - return (void *)tmp; -} - -/* - * Find the device_node with a given phandle. - */ -static struct device_node * find_phandle(phandle ph) -{ - struct device_node *np; - - for (np = allnodes; np != 0; np = np->allnext) - if (np->linux_phandle == ph) - return np; - return NULL; -} - -/* - * Find the interrupt parent of a node. - */ -static struct device_node * __devinit intr_parent(struct device_node *p) -{ - phandle *parp; - - parp = (phandle *) get_property(p, "interrupt-parent", NULL); - if (parp == NULL) - return p->parent; - p = find_phandle(*parp); - if (p != NULL) - return p; - /* - * On a powermac booted with BootX, we don't get to know the - * phandles for any nodes, so find_phandle will return NULL. - * Fortunately these machines only have one interrupt controller - * so there isn't in fact any ambiguity. -- paulus - */ - if (num_interrupt_controllers == 1) - p = dflt_interrupt_controller; - return p; -} - -/* - * Find out the size of each entry of the interrupts property - * for a node. - */ -int __devinit prom_n_intr_cells(struct device_node *np) -{ - struct device_node *p; - unsigned int *icp; - - for (p = np; (p = intr_parent(p)) != NULL; ) { - icp = (unsigned int *) - get_property(p, "#interrupt-cells", NULL); - if (icp != NULL) - return *icp; - if (get_property(p, "interrupt-controller", NULL) != NULL - || get_property(p, "interrupt-map", NULL) != NULL) { - printk("oops, node %s doesn't have #interrupt-cells\n", - p->full_name); - return 1; - } - } -#ifdef DEBUG_IRQ - printk("prom_n_intr_cells failed for %s\n", np->full_name); -#endif - return 1; -} - -/* - * Map an interrupt from a device up to the platform interrupt - * descriptor. - */ -static int __devinit map_interrupt(unsigned int **irq, struct device_node **ictrler, - struct device_node *np, unsigned int *ints, - int nintrc) -{ - struct device_node *p, *ipar; - unsigned int *imap, *imask, *ip; - int i, imaplen, match; - int newintrc = 0, newaddrc = 0; - unsigned int *reg; - int naddrc; - - reg = (unsigned int *) get_property(np, "reg", NULL); - naddrc = prom_n_addr_cells(np); - p = intr_parent(np); - while (p != NULL) { - if (get_property(p, "interrupt-controller", NULL) != NULL) - /* this node is an interrupt controller, stop here */ - break; - imap = (unsigned int *) - get_property(p, "interrupt-map", &imaplen); - if (imap == NULL) { - p = intr_parent(p); - continue; - } - imask = (unsigned int *) - get_property(p, "interrupt-map-mask", NULL); - if (imask == NULL) { - printk("oops, %s has interrupt-map but no mask\n", - p->full_name); - return 0; - } - imaplen /= sizeof(unsigned int); - match = 0; - ipar = NULL; - while (imaplen > 0 && !match) { - /* check the child-interrupt field */ - match = 1; - for (i = 0; i < naddrc && match; ++i) - match = ((reg[i] ^ imap[i]) & imask[i]) == 0; - for (; i < naddrc + nintrc && match; ++i) - match = ((ints[i-naddrc] ^ imap[i]) & imask[i]) == 0; - imap += naddrc + nintrc; - imaplen -= naddrc + nintrc; - /* grab the interrupt parent */ - ipar = find_phandle((phandle) *imap++); - --imaplen; - if (ipar == NULL && num_interrupt_controllers == 1) - /* cope with BootX not giving us phandles */ - ipar = dflt_interrupt_controller; - if (ipar == NULL) { - printk("oops, no int parent %x in map of %s\n", - imap[-1], p->full_name); - return 0; - } - /* find the parent's # addr and intr cells */ - ip = (unsigned int *) - get_property(ipar, "#interrupt-cells", NULL); - if (ip == NULL) { - printk("oops, no #interrupt-cells on %s\n", - ipar->full_name); - return 0; - } - newintrc = *ip; - ip = (unsigned int *) - get_property(ipar, "#address-cells", NULL); - newaddrc = (ip == NULL)? 0: *ip; - imap += newaddrc + newintrc; - imaplen -= newaddrc + newintrc; - } - if (imaplen < 0) { - printk("oops, error decoding int-map on %s, len=%d\n", - p->full_name, imaplen); - return 0; - } - if (!match) { -#ifdef DEBUG_IRQ - printk("oops, no match in %s int-map for %s\n", - p->full_name, np->full_name); -#endif - return 0; - } - p = ipar; - naddrc = newaddrc; - nintrc = newintrc; - ints = imap - nintrc; - reg = ints - naddrc; - } - if (p == NULL) { -#ifdef DEBUG_IRQ - printk("hmmm, int tree for %s doesn't have ctrler\n", - np->full_name); -#endif - return 0; - } - *irq = ints; - *ictrler = p; - return nintrc; -} - -static unsigned char map_isa_senses[4] = { - IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE, - IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE, - IRQ_SENSE_EDGE | IRQ_POLARITY_NEGATIVE, - IRQ_SENSE_EDGE | IRQ_POLARITY_POSITIVE -}; - -static unsigned char map_mpic_senses[4] = { - IRQ_SENSE_EDGE | IRQ_POLARITY_POSITIVE, - IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE, - /* 2 seems to be used for the 8259 cascade... */ - IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE, - IRQ_SENSE_EDGE | IRQ_POLARITY_NEGATIVE, -}; - -static int __devinit finish_node_interrupts(struct device_node *np, - unsigned long *mem_start, - int measure_only) -{ - unsigned int *ints; - int intlen, intrcells, intrcount; - int i, j, n, sense; - unsigned int *irq, virq; - struct device_node *ic; - int trace = 0; - - //#define TRACE(fmt...) do { if (trace) { printk(fmt); mdelay(1000); } } while(0) -#define TRACE(fmt...) - - if (!strcmp(np->name, "smu-doorbell")) - trace = 1; - - TRACE("Finishing SMU doorbell ! num_interrupt_controllers = %d\n", - num_interrupt_controllers); - - if (num_interrupt_controllers == 0) { - /* - * Old machines just have a list of interrupt numbers - * and no interrupt-controller nodes. - */ - ints = (unsigned int *) get_property(np, "AAPL,interrupts", - &intlen); - /* XXX old interpret_pci_props looked in parent too */ - /* XXX old interpret_macio_props looked for interrupts - before AAPL,interrupts */ - if (ints == NULL) - ints = (unsigned int *) get_property(np, "interrupts", - &intlen); - if (ints == NULL) - return 0; - - np->n_intrs = intlen / sizeof(unsigned int); - np->intrs = prom_alloc(np->n_intrs * sizeof(np->intrs[0]), - mem_start); - if (!np->intrs) - return -ENOMEM; - if (measure_only) - return 0; - - for (i = 0; i < np->n_intrs; ++i) { - np->intrs[i].line = *ints++; - np->intrs[i].sense = IRQ_SENSE_LEVEL - | IRQ_POLARITY_NEGATIVE; - } - return 0; - } - - ints = (unsigned int *) get_property(np, "interrupts", &intlen); - TRACE("ints=%p, intlen=%d\n", ints, intlen); - if (ints == NULL) - return 0; - intrcells = prom_n_intr_cells(np); - intlen /= intrcells * sizeof(unsigned int); - TRACE("intrcells=%d, new intlen=%d\n", intrcells, intlen); - np->intrs = prom_alloc(intlen * sizeof(*(np->intrs)), mem_start); - if (!np->intrs) - return -ENOMEM; - - if (measure_only) - return 0; - - intrcount = 0; - for (i = 0; i < intlen; ++i, ints += intrcells) { - n = map_interrupt(&irq, &ic, np, ints, intrcells); - TRACE("map, irq=%d, ic=%p, n=%d\n", irq, ic, n); - if (n <= 0) - continue; - - /* don't map IRQ numbers under a cascaded 8259 controller */ - if (ic && device_is_compatible(ic, "chrp,iic")) { - np->intrs[intrcount].line = irq[0]; - sense = (n > 1)? (irq[1] & 3): 3; - np->intrs[intrcount].sense = map_isa_senses[sense]; - } else { - virq = virt_irq_create_mapping(irq[0]); - TRACE("virq=%d\n", virq); -#ifdef CONFIG_PPC64 - if (virq == NO_IRQ) { - printk(KERN_CRIT "Could not allocate interrupt" - " number for %s\n", np->full_name); - continue; - } -#endif - np->intrs[intrcount].line = irq_offset_up(virq); - sense = (n > 1)? (irq[1] & 3): 1; - - /* Apple uses bits in there in a different way, let's - * only keep the real sense bit on macs - */ - if (machine_is(powermac)) - sense &= 0x1; - np->intrs[intrcount].sense = map_mpic_senses[sense]; - } - -#ifdef CONFIG_PPC64 - /* We offset irq numbers for the u3 MPIC by 128 in PowerMac */ - if (machine_is(powermac) && ic && ic->parent) { - char *name = get_property(ic->parent, "name", NULL); - if (name && !strcmp(name, "u3")) - np->intrs[intrcount].line += 128; - else if (!(name && (!strcmp(name, "mac-io") || - !strcmp(name, "u4")))) - /* ignore other cascaded controllers, such as - the k2-sata-root */ - break; - } -#endif /* CONFIG_PPC64 */ - if (n > 2) { - printk("hmmm, got %d intr cells for %s:", n, - np->full_name); - for (j = 0; j < n; ++j) - printk(" %d", irq[j]); - printk("\n"); - } - ++intrcount; - } - np->n_intrs = intrcount; - - return 0; -} - -static int __devinit finish_node(struct device_node *np, - unsigned long *mem_start, - int measure_only) -{ - struct device_node *child; - int rc = 0; - - rc = finish_node_interrupts(np, mem_start, measure_only); - if (rc) - goto out; - - for (child = np->child; child != NULL; child = child->sibling) { - rc = finish_node(child, mem_start, measure_only); - if (rc) - goto out; - } -out: - return rc; -} - -static void __init scan_interrupt_controllers(void) -{ - struct device_node *np; - int n = 0; - char *name, *ic; - int iclen; - - for (np = allnodes; np != NULL; np = np->allnext) { - ic = get_property(np, "interrupt-controller", &iclen); - name = get_property(np, "name", NULL); - /* checking iclen makes sure we don't get a false - match on /chosen.interrupt_controller */ - if ((name != NULL - && strcmp(name, "interrupt-controller") == 0) - || (ic != NULL && iclen == 0 - && strcmp(name, "AppleKiwi"))) { - if (n == 0) - dflt_interrupt_controller = np; - ++n; - } - } - num_interrupt_controllers = n; -} - -/** - * finish_device_tree is called once things are running normally - * (i.e. with text and data mapped to the address they were linked at). - * It traverses the device tree and fills in some of the additional, - * fields in each node like {n_}addrs and {n_}intrs, the virt interrupt - * mapping is also initialized at this point. - */ -void __init finish_device_tree(void) -{ - unsigned long start, end, size = 0; - - DBG(" -> finish_device_tree\n"); - -#ifdef CONFIG_PPC64 - /* Initialize virtual IRQ map */ - virt_irq_init(); -#endif - scan_interrupt_controllers(); - - /* - * Finish device-tree (pre-parsing some properties etc...) - * We do this in 2 passes. One with "measure_only" set, which - * will only measure the amount of memory needed, then we can - * allocate that memory, and call finish_node again. However, - * we must be careful as most routines will fail nowadays when - * prom_alloc() returns 0, so we must make sure our first pass - * doesn't start at 0. We pre-initialize size to 16 for that - * reason and then remove those additional 16 bytes - */ - size = 16; - finish_node(allnodes, &size, 1); - size -= 16; - - if (0 == size) - end = start = 0; - else - end = start = (unsigned long)__va(lmb_alloc(size, 128)); - - finish_node(allnodes, &end, 0); - BUG_ON(end != start + size); - - DBG(" <- finish_device_tree\n"); -} - static inline char *find_flat_dt_string(u32 offset) { return ((char *)initial_boot_params) + @@ -1389,27 +972,6 @@ prom_n_size_cells(struct device_node* np) EXPORT_SYMBOL(prom_n_size_cells); /** - * Work out the sense (active-low level / active-high edge) - * of each interrupt from the device tree. - */ -void __init prom_get_irq_senses(unsigned char *senses, int off, int max) -{ - struct device_node *np; - int i, j; - - /* default to level-triggered */ - memset(senses, IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE, max - off); - - for (np = allnodes; np != 0; np = np->allnext) { - for (j = 0; j < np->n_intrs; j++) { - i = np->intrs[j].line; - if (i >= off && i < max) - senses[i-off] = np->intrs[j].sense; - } - } -} - -/** * Construct and return a list of the device_nodes with a given name. */ struct device_node *find_devices(const char *name) @@ -1808,7 +1370,6 @@ static void of_node_release(struct kref *kref) node->deadprops = NULL; } } - kfree(node->intrs); kfree(node->full_name); kfree(node->data); kfree(node); @@ -1881,13 +1442,7 @@ void of_detach_node(const struct device_node *np) #ifdef CONFIG_PPC_PSERIES /* * Fix up the uninitialized fields in a new device node: - * name, type, n_addrs, addrs, n_intrs, intrs, and pci-specific fields - * - * A lot of boot-time code is duplicated here, because functions such - * as finish_node_interrupts, interpret_pci_props, etc. cannot use the - * slab allocator. - * - * This should probably be split up into smaller chunks. + * name, type and pci-specific fields */ static int of_finish_dynamic_node(struct device_node *node) @@ -1928,8 +1483,6 @@ static int prom_reconfig_notifier(struct notifier_block *nb, switch (action) { case PSERIES_RECONFIG_ADD: err = of_finish_dynamic_node(node); - if (!err) - finish_node(node, NULL, 0); if (err < 0) { printk(KERN_ERR "finish_node returned %d\n", err); err = NOTIFY_BAD; @@ -1975,8 +1528,7 @@ struct property *of_find_property(struct device_node *np, const char *name, * Find a property with a given name for a given node * and return the value. */ -unsigned char *get_property(struct device_node *np, const char *name, - int *lenp) +void *get_property(struct device_node *np, const char *name, int *lenp) { struct property *pp = of_find_property(np,name,lenp); return pp ? pp->value : NULL; diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 1e95a9f8cda1..ebd501a59abd 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -1990,12 +1990,22 @@ static void __init flatten_device_tree(void) static void __init fixup_device_tree_maple(void) { phandle isa; + u32 rloc = 0x01002000; /* IO space; PCI device = 4 */ u32 isa_ranges[6]; - - isa = call_prom("finddevice", 1, 1, ADDR("/ht@0/isa@4")); + char *name; + + name = "/ht@0/isa@4"; + isa = call_prom("finddevice", 1, 1, ADDR(name)); + if (!PHANDLE_VALID(isa)) { + name = "/ht@0/isa@6"; + isa = call_prom("finddevice", 1, 1, ADDR(name)); + rloc = 0x01003000; /* IO space; PCI device = 6 */ + } if (!PHANDLE_VALID(isa)) return; + if (prom_getproplen(isa, "ranges") != 12) + return; if (prom_getprop(isa, "ranges", isa_ranges, sizeof(isa_ranges)) == PROM_ERROR) return; @@ -2005,15 +2015,15 @@ static void __init fixup_device_tree_maple(void) isa_ranges[2] != 0x00010000) return; - prom_printf("fixing up bogus ISA range on Maple...\n"); + prom_printf("Fixing up bogus ISA range on Maple/Apache...\n"); isa_ranges[0] = 0x1; isa_ranges[1] = 0x0; - isa_ranges[2] = 0x01002000; /* IO space; PCI device = 4 */ + isa_ranges[2] = rloc; isa_ranges[3] = 0x0; isa_ranges[4] = 0x0; isa_ranges[5] = 0x00010000; - prom_setprop(isa, "/ht@0/isa@4", "ranges", + prom_setprop(isa, name, "ranges", isa_ranges, sizeof(isa_ranges)); } #else diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c index 45df420383cc..21009b1f7869 100644 --- a/arch/powerpc/kernel/prom_parse.c +++ b/arch/powerpc/kernel/prom_parse.c @@ -38,14 +38,6 @@ static void of_dump_addr(const char *s, u32 *addr, int na) static void of_dump_addr(const char *s, u32 *addr, int na) { } #endif -/* Read a big address */ -static inline u64 of_read_addr(u32 *cell, int size) -{ - u64 r = 0; - while (size--) - r = (r << 32) | *(cell++); - return r; -} /* Callbacks for bus specific translators */ struct of_bus { @@ -77,9 +69,9 @@ static u64 of_bus_default_map(u32 *addr, u32 *range, int na, int ns, int pna) { u64 cp, s, da; - cp = of_read_addr(range, na); - s = of_read_addr(range + na + pna, ns); - da = of_read_addr(addr, na); + cp = of_read_number(range, na); + s = of_read_number(range + na + pna, ns); + da = of_read_number(addr, na); DBG("OF: default map, cp="PRu64", s="PRu64", da="PRu64"\n", cp, s, da); @@ -91,7 +83,7 @@ static u64 of_bus_default_map(u32 *addr, u32 *range, int na, int ns, int pna) static int of_bus_default_translate(u32 *addr, u64 offset, int na) { - u64 a = of_read_addr(addr, na); + u64 a = of_read_number(addr, na); memset(addr, 0, na * 4); a += offset; if (na > 1) @@ -135,9 +127,9 @@ static u64 of_bus_pci_map(u32 *addr, u32 *range, int na, int ns, int pna) return OF_BAD_ADDR; /* Read address values, skipping high cell */ - cp = of_read_addr(range + 1, na - 1); - s = of_read_addr(range + na + pna, ns); - da = of_read_addr(addr + 1, na - 1); + cp = of_read_number(range + 1, na - 1); + s = of_read_number(range + na + pna, ns); + da = of_read_number(addr + 1, na - 1); DBG("OF: PCI map, cp="PRu64", s="PRu64", da="PRu64"\n", cp, s, da); @@ -195,9 +187,9 @@ static u64 of_bus_isa_map(u32 *addr, u32 *range, int na, int ns, int pna) return OF_BAD_ADDR; /* Read address values, skipping high cell */ - cp = of_read_addr(range + 1, na - 1); - s = of_read_addr(range + na + pna, ns); - da = of_read_addr(addr + 1, na - 1); + cp = of_read_number(range + 1, na - 1); + s = of_read_number(range + na + pna, ns); + da = of_read_number(addr + 1, na - 1); DBG("OF: ISA map, cp="PRu64", s="PRu64", da="PRu64"\n", cp, s, da); @@ -295,7 +287,7 @@ static int of_translate_one(struct device_node *parent, struct of_bus *bus, */ ranges = (u32 *)get_property(parent, "ranges", &rlen); if (ranges == NULL || rlen == 0) { - offset = of_read_addr(addr, na); + offset = of_read_number(addr, na); memset(addr, 0, pna * 4); DBG("OF: no ranges, 1:1 translation\n"); goto finish; @@ -378,7 +370,7 @@ u64 of_translate_address(struct device_node *dev, u32 *in_addr) /* If root, we have finished */ if (parent == NULL) { DBG("OF: reached root node\n"); - result = of_read_addr(addr, na); + result = of_read_number(addr, na); break; } @@ -442,7 +434,7 @@ u32 *of_get_address(struct device_node *dev, int index, u64 *size, for (i = 0; psize >= onesize; psize -= onesize, prop += onesize, i++) if (i == index) { if (size) - *size = of_read_addr(prop + na, ns); + *size = of_read_number(prop + na, ns); if (flags) *flags = bus->get_flags(prop); return prop; @@ -484,7 +476,7 @@ u32 *of_get_pci_address(struct device_node *dev, int bar_no, u64 *size, for (i = 0; psize >= onesize; psize -= onesize, prop += onesize, i++) if ((prop[0] & 0xff) == ((bar_no * 4) + PCI_BASE_ADDRESS_0)) { if (size) - *size = of_read_addr(prop + na, ns); + *size = of_read_number(prop + na, ns); if (flags) *flags = bus->get_flags(prop); return prop; @@ -565,11 +557,414 @@ void of_parse_dma_window(struct device_node *dn, unsigned char *dma_window_prop, prop = get_property(dn, "#address-cells", NULL); cells = prop ? *(u32 *)prop : prom_n_addr_cells(dn); - *phys = of_read_addr(dma_window, cells); + *phys = of_read_number(dma_window, cells); dma_window += cells; prop = get_property(dn, "ibm,#dma-size-cells", NULL); cells = prop ? *(u32 *)prop : prom_n_size_cells(dn); - *size = of_read_addr(dma_window, cells); + *size = of_read_number(dma_window, cells); +} + +/* + * Interrupt remapper + */ + +static unsigned int of_irq_workarounds; +static struct device_node *of_irq_dflt_pic; + +static struct device_node *of_irq_find_parent(struct device_node *child) +{ + struct device_node *p; + phandle *parp; + + if (!of_node_get(child)) + return NULL; + + do { + parp = (phandle *)get_property(child, "interrupt-parent", NULL); + if (parp == NULL) + p = of_get_parent(child); + else { + if (of_irq_workarounds & OF_IMAP_NO_PHANDLE) + p = of_node_get(of_irq_dflt_pic); + else + p = of_find_node_by_phandle(*parp); + } + of_node_put(child); + child = p; + } while (p && get_property(p, "#interrupt-cells", NULL) == NULL); + + return p; +} + +static u8 of_irq_pci_swizzle(u8 slot, u8 pin) +{ + return (((pin - 1) + slot) % 4) + 1; } + +/* This doesn't need to be called if you don't have any special workaround + * flags to pass + */ +void of_irq_map_init(unsigned int flags) +{ + of_irq_workarounds = flags; + + /* OldWorld, don't bother looking at other things */ + if (flags & OF_IMAP_OLDWORLD_MAC) + return; + + /* If we don't have phandles, let's try to locate a default interrupt + * controller (happens when booting with BootX). We do a first match + * here, hopefully, that only ever happens on machines with one + * controller. + */ + if (flags & OF_IMAP_NO_PHANDLE) { + struct device_node *np; + + for(np = NULL; (np = of_find_all_nodes(np)) != NULL;) { + if (get_property(np, "interrupt-controller", NULL) + == NULL) + continue; + /* Skip /chosen/interrupt-controller */ + if (strcmp(np->name, "chosen") == 0) + continue; + /* It seems like at least one person on this planet wants + * to use BootX on a machine with an AppleKiwi controller + * which happens to pretend to be an interrupt + * controller too. + */ + if (strcmp(np->name, "AppleKiwi") == 0) + continue; + /* I think we found one ! */ + of_irq_dflt_pic = np; + break; + } + } + +} + +int of_irq_map_raw(struct device_node *parent, u32 *intspec, u32 *addr, + struct of_irq *out_irq) +{ + struct device_node *ipar, *tnode, *old = NULL, *newpar = NULL; + u32 *tmp, *imap, *imask; + u32 intsize = 1, addrsize, newintsize = 0, newaddrsize = 0; + int imaplen, match, i; + + ipar = of_node_get(parent); + + /* First get the #interrupt-cells property of the current cursor + * that tells us how to interpret the passed-in intspec. If there + * is none, we are nice and just walk up the tree + */ + do { + tmp = (u32 *)get_property(ipar, "#interrupt-cells", NULL); + if (tmp != NULL) { + intsize = *tmp; + break; + } + tnode = ipar; + ipar = of_irq_find_parent(ipar); + of_node_put(tnode); + } while (ipar); + if (ipar == NULL) { + DBG(" -> no parent found !\n"); + goto fail; + } + + DBG("of_irq_map_raw: ipar=%s, size=%d\n", ipar->full_name, intsize); + + /* Look for this #address-cells. We have to implement the old linux + * trick of looking for the parent here as some device-trees rely on it + */ + old = of_node_get(ipar); + do { + tmp = (u32 *)get_property(old, "#address-cells", NULL); + tnode = of_get_parent(old); + of_node_put(old); + old = tnode; + } while(old && tmp == NULL); + of_node_put(old); + old = NULL; + addrsize = (tmp == NULL) ? 2 : *tmp; + + DBG(" -> addrsize=%d\n", addrsize); + + /* Now start the actual "proper" walk of the interrupt tree */ + while (ipar != NULL) { + /* Now check if cursor is an interrupt-controller and if it is + * then we are done + */ + if (get_property(ipar, "interrupt-controller", NULL) != NULL) { + DBG(" -> got it !\n"); + memcpy(out_irq->specifier, intspec, + intsize * sizeof(u32)); + out_irq->size = intsize; + out_irq->controller = ipar; + of_node_put(old); + return 0; + } + + /* Now look for an interrupt-map */ + imap = (u32 *)get_property(ipar, "interrupt-map", &imaplen); + /* No interrupt map, check for an interrupt parent */ + if (imap == NULL) { + DBG(" -> no map, getting parent\n"); + newpar = of_irq_find_parent(ipar); + goto skiplevel; + } + imaplen /= sizeof(u32); + + /* Look for a mask */ + imask = (u32 *)get_property(ipar, "interrupt-map-mask", NULL); + + /* If we were passed no "reg" property and we attempt to parse + * an interrupt-map, then #address-cells must be 0. + * Fail if it's not. + */ + if (addr == NULL && addrsize != 0) { + DBG(" -> no reg passed in when needed !\n"); + goto fail; + } + + /* Parse interrupt-map */ + match = 0; + while (imaplen > (addrsize + intsize + 1) && !match) { + /* Compare specifiers */ + match = 1; + for (i = 0; i < addrsize && match; ++i) { + u32 mask = imask ? imask[i] : 0xffffffffu; + match = ((addr[i] ^ imap[i]) & mask) == 0; + } + for (; i < (addrsize + intsize) && match; ++i) { + u32 mask = imask ? imask[i] : 0xffffffffu; + match = + ((intspec[i-addrsize] ^ imap[i]) & mask) == 0; + } + imap += addrsize + intsize; + imaplen -= addrsize + intsize; + + DBG(" -> match=%d (imaplen=%d)\n", match, imaplen); + + /* Get the interrupt parent */ + if (of_irq_workarounds & OF_IMAP_NO_PHANDLE) + newpar = of_node_get(of_irq_dflt_pic); + else + newpar = of_find_node_by_phandle((phandle)*imap); + imap++; + --imaplen; + + /* Check if not found */ + if (newpar == NULL) { + DBG(" -> imap parent not found !\n"); + goto fail; + } + + /* Get #interrupt-cells and #address-cells of new + * parent + */ + tmp = (u32 *)get_property(newpar, "#interrupt-cells", + NULL); + if (tmp == NULL) { + DBG(" -> parent lacks #interrupt-cells !\n"); + goto fail; + } + newintsize = *tmp; + tmp = (u32 *)get_property(newpar, "#address-cells", + NULL); + newaddrsize = (tmp == NULL) ? 0 : *tmp; + + DBG(" -> newintsize=%d, newaddrsize=%d\n", + newintsize, newaddrsize); + + /* Check for malformed properties */ + if (imaplen < (newaddrsize + newintsize)) + goto fail; + + imap += newaddrsize + newintsize; + imaplen -= newaddrsize + newintsize; + + DBG(" -> imaplen=%d\n", imaplen); + } + if (!match) + goto fail; + + of_node_put(old); + old = of_node_get(newpar); + addrsize = newaddrsize; + intsize = newintsize; + intspec = imap - intsize; + addr = intspec - addrsize; + + skiplevel: + /* Iterate again with new parent */ + DBG(" -> new parent: %s\n", newpar ? newpar->full_name : "<>"); + of_node_put(ipar); + ipar = newpar; + newpar = NULL; + } + fail: + of_node_put(ipar); + of_node_put(old); + of_node_put(newpar); + + return -EINVAL; +} +EXPORT_SYMBOL_GPL(of_irq_map_raw); + +#if defined(CONFIG_PPC_PMAC) && defined(CONFIG_PPC32) +static int of_irq_map_oldworld(struct device_node *device, int index, + struct of_irq *out_irq) +{ + u32 *ints; + int intlen; + + /* + * Old machines just have a list of interrupt numbers + * and no interrupt-controller nodes. + */ + ints = (u32 *) get_property(device, "AAPL,interrupts", &intlen); + if (ints == NULL) + return -EINVAL; + intlen /= sizeof(u32); + + if (index >= intlen) + return -EINVAL; + + out_irq->controller = NULL; + out_irq->specifier[0] = ints[index]; + out_irq->size = 1; + + return 0; +} +#else /* defined(CONFIG_PPC_PMAC) && defined(CONFIG_PPC32) */ +static int of_irq_map_oldworld(struct device_node *device, int index, + struct of_irq *out_irq) +{ + return -EINVAL; +} +#endif /* !(defined(CONFIG_PPC_PMAC) && defined(CONFIG_PPC32)) */ + +int of_irq_map_one(struct device_node *device, int index, struct of_irq *out_irq) +{ + struct device_node *p; + u32 *intspec, *tmp, intsize, intlen, *addr; + int res; + + DBG("of_irq_map_one: dev=%s, index=%d\n", device->full_name, index); + + /* OldWorld mac stuff is "special", handle out of line */ + if (of_irq_workarounds & OF_IMAP_OLDWORLD_MAC) + return of_irq_map_oldworld(device, index, out_irq); + + /* Get the interrupts property */ + intspec = (u32 *)get_property(device, "interrupts", &intlen); + if (intspec == NULL) + return -EINVAL; + intlen /= sizeof(u32); + + /* Get the reg property (if any) */ + addr = (u32 *)get_property(device, "reg", NULL); + + /* Look for the interrupt parent. */ + p = of_irq_find_parent(device); + if (p == NULL) + return -EINVAL; + + /* Get size of interrupt specifier */ + tmp = (u32 *)get_property(p, "#interrupt-cells", NULL); + if (tmp == NULL) { + of_node_put(p); + return -EINVAL; + } + intsize = *tmp; + + /* Check index */ + if (index * intsize >= intlen) + return -EINVAL; + + /* Get new specifier and map it */ + res = of_irq_map_raw(p, intspec + index * intsize, addr, out_irq); + of_node_put(p); + return res; +} +EXPORT_SYMBOL_GPL(of_irq_map_one); + +int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq) +{ + struct device_node *dn, *ppnode; + struct pci_dev *ppdev; + u32 lspec; + u32 laddr[3]; + u8 pin; + int rc; + + /* Check if we have a device node, if yes, fallback to standard OF + * parsing + */ + dn = pci_device_to_OF_node(pdev); + if (dn) + return of_irq_map_one(dn, 0, out_irq); + + /* Ok, we don't, time to have fun. Let's start by building up an + * interrupt spec. we assume #interrupt-cells is 1, which is standard + * for PCI. If you do different, then don't use that routine. + */ + rc = pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, &pin); + if (rc != 0) + return rc; + /* No pin, exit */ + if (pin == 0) + return -ENODEV; + + /* Now we walk up the PCI tree */ + lspec = pin; + for (;;) { + /* Get the pci_dev of our parent */ + ppdev = pdev->bus->self; + + /* Ouch, it's a host bridge... */ + if (ppdev == NULL) { +#ifdef CONFIG_PPC64 + ppnode = pci_bus_to_OF_node(pdev->bus); +#else + struct pci_controller *host; + host = pci_bus_to_host(pdev->bus); + ppnode = host ? host->arch_data : NULL; +#endif + /* No node for host bridge ? give up */ + if (ppnode == NULL) + return -EINVAL; + } else + /* We found a P2P bridge, check if it has a node */ + ppnode = pci_device_to_OF_node(ppdev); + + /* Ok, we have found a parent with a device-node, hand over to + * the OF parsing code. + * We build a unit address from the linux device to be used for + * resolution. Note that we use the linux bus number which may + * not match your firmware bus numbering. + * Fortunately, in most cases, interrupt-map-mask doesn't include + * the bus number as part of the matching. + * You should still be careful about that though if you intend + * to rely on this function (you ship a firmware that doesn't + * create device nodes for all PCI devices). + */ + if (ppnode) + break; + + /* We can only get here if we hit a P2P bridge with no node, + * let's do standard swizzling and try again + */ + lspec = of_irq_pci_swizzle(PCI_SLOT(pdev->devfn), lspec); + pdev = ppdev; + } + + laddr[0] = (pdev->bus->number << 16) + | (pdev->devfn << 8); + laddr[1] = laddr[2] = 0; + return of_irq_map_raw(ppnode, &lspec, laddr, out_irq); +} +EXPORT_SYMBOL_GPL(of_irq_map_pci); + diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c index 6eb7e49b394a..cda022657324 100644 --- a/arch/powerpc/kernel/rtas_pci.c +++ b/arch/powerpc/kernel/rtas_pci.c @@ -297,19 +297,9 @@ unsigned long __init find_and_init_phbs(void) struct device_node *node; struct pci_controller *phb; unsigned int index; - unsigned int root_size_cells = 0; - unsigned int *opprop = NULL; struct device_node *root = of_find_node_by_path("/"); - if (ppc64_interrupt_controller == IC_OPEN_PIC) { - opprop = (unsigned int *)get_property(root, - "platform-open-pic", NULL); - } - - root_size_cells = prom_n_size_cells(root); - index = 0; - for (node = of_get_next_child(root, NULL); node != NULL; node = of_get_next_child(root, node)) { @@ -324,13 +314,6 @@ unsigned long __init find_and_init_phbs(void) setup_phb(node, phb); pci_process_bridge_OF_ranges(phb, node, 0); pci_setup_phb_io(phb, index == 0); -#ifdef CONFIG_PPC_PSERIES - /* XXX This code need serious fixing ... --BenH */ - if (ppc64_interrupt_controller == IC_OPEN_PIC && pSeries_mpic) { - int addr = root_size_cells * (index + 2) - 1; - mpic_assign_isu(pSeries_mpic, index, opprop[addr]); - } -#endif index++; } diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index ba7cd50d820d..e0df2ba1ab9f 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -51,7 +51,6 @@ extern void bootx_init(unsigned long r4, unsigned long phys); -boot_infos_t *boot_infos; struct ide_machdep_calls ppc_ide_md; int boot_cpuid; @@ -240,7 +239,6 @@ void __init setup_arch(char **cmdline_p) ppc_md.init_early(); find_legacy_serial_ports(); - finish_device_tree(); smp_setup_cpu_maps(); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index ac7276c40685..fd1785e4c9bb 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -361,12 +361,15 @@ void __init setup_system(void) /* * Fill the ppc64_caches & systemcfg structures with informations - * retrieved from the device-tree. Need to be called before - * finish_device_tree() since the later requires some of the - * informations filled up here to properly parse the interrupt tree. + * retrieved from the device-tree. */ initialize_cache_info(); + /* + * Initialize irq remapping subsystem + */ + irq_early_init(); + #ifdef CONFIG_PPC_RTAS /* * Initialize RTAS if available @@ -394,12 +397,6 @@ void __init setup_system(void) find_legacy_serial_ports(); /* - * "Finish" the device-tree, that is do the actual parsing of - * some of the properties like the interrupt map - */ - finish_device_tree(); - - /* * Initialize xmon */ #ifdef CONFIG_XMON_DEFAULT @@ -427,8 +424,6 @@ void __init setup_system(void) printk("-----------------------------------------------------\n"); printk("ppc64_pft_size = 0x%lx\n", ppc64_pft_size); - printk("ppc64_interrupt_controller = 0x%ld\n", - ppc64_interrupt_controller); printk("physicalMemorySize = 0x%lx\n", lmb_phys_mem_size()); printk("ppc64_caches.dcache_line_size = 0x%x\n", ppc64_caches.dline_size); diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index cdf5867838a6..fad8580f9081 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -218,7 +218,6 @@ struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node) { struct vio_dev *viodev; unsigned int *unit_address; - unsigned int *irq_p; /* we need the 'device_type' property, in order to match with drivers */ if (of_node->type == NULL) { @@ -243,16 +242,7 @@ struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node) viodev->dev.platform_data = of_node_get(of_node); - viodev->irq = NO_IRQ; - irq_p = (unsigned int *)get_property(of_node, "interrupts", NULL); - if (irq_p) { - int virq = virt_irq_create_mapping(*irq_p); - if (virq == NO_IRQ) { - printk(KERN_ERR "Unable to allocate interrupt " - "number for %s\n", of_node->full_name); - } else - viodev->irq = irq_offset_up(virq); - } + viodev->irq = irq_of_parse_and_map(of_node, 0); snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%x", *unit_address); viodev->name = of_node->name; diff --git a/arch/powerpc/platforms/83xx/Kconfig b/arch/powerpc/platforms/83xx/Kconfig index 7675e675dce1..5fe7b7faf45f 100644 --- a/arch/powerpc/platforms/83xx/Kconfig +++ b/arch/powerpc/platforms/83xx/Kconfig @@ -16,12 +16,21 @@ config MPC834x_SYS 3 PCI slots. The PIBs PCI initialization is the bootloader's responsiblilty. +config MPC834x_ITX + bool "Freescale MPC834x ITX" + select DEFAULT_UIMAGE + help + This option enables support for the MPC 834x ITX evaluation board. + + Be aware that PCI initialization is the bootloader's + responsiblilty. + endchoice config MPC834x bool select PPC_UDBG_16550 select PPC_INDIRECT_PCI - default y if MPC834x_SYS + default y if MPC834x_SYS || MPC834x_ITX endmenu diff --git a/arch/powerpc/platforms/83xx/Makefile b/arch/powerpc/platforms/83xx/Makefile index 5c72367441a8..9387a110d28a 100644 --- a/arch/powerpc/platforms/83xx/Makefile +++ b/arch/powerpc/platforms/83xx/Makefile @@ -4,3 +4,4 @@ obj-y := misc.o obj-$(CONFIG_PCI) += pci.o obj-$(CONFIG_MPC834x_SYS) += mpc834x_sys.o +obj-$(CONFIG_MPC834x_ITX) += mpc834x_itx.o diff --git a/arch/powerpc/platforms/83xx/mpc834x_itx.c b/arch/powerpc/platforms/83xx/mpc834x_itx.c new file mode 100644 index 000000000000..b46305645d38 --- /dev/null +++ b/arch/powerpc/platforms/83xx/mpc834x_itx.c @@ -0,0 +1,156 @@ +/* + * arch/powerpc/platforms/83xx/mpc834x_itx.c + * + * MPC834x ITX board specific routines + * + * Maintainer: Kumar Gala <galak@kernel.crashing.org> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include <linux/config.h> +#include <linux/stddef.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/reboot.h> +#include <linux/pci.h> +#include <linux/kdev_t.h> +#include <linux/major.h> +#include <linux/console.h> +#include <linux/delay.h> +#include <linux/seq_file.h> +#include <linux/root_dev.h> + +#include <asm/system.h> +#include <asm/atomic.h> +#include <asm/time.h> +#include <asm/io.h> +#include <asm/machdep.h> +#include <asm/ipic.h> +#include <asm/bootinfo.h> +#include <asm/irq.h> +#include <asm/prom.h> +#include <asm/udbg.h> +#include <sysdev/fsl_soc.h> + +#include "mpc83xx.h" + +#include <platforms/83xx/mpc834x_sys.h> + +#ifndef CONFIG_PCI +unsigned long isa_io_base = 0; +unsigned long isa_mem_base = 0; +#endif + +#ifdef CONFIG_PCI +static int +mpc83xx_map_irq(struct pci_dev *dev, unsigned char idsel, unsigned char pin) +{ + static char pci_irq_table[][4] = + /* + * PCI IDSEL/INTPIN->INTLINE + * A B C D + */ + { + {PIRQB, PIRQC, PIRQD, PIRQA}, /* idsel 0x0e */ + {PIRQA, PIRQB, PIRQC, PIRQD}, /* idsel 0x0f */ + {PIRQC, PIRQD, PIRQA, PIRQB}, /* idsel 0x10 */ + }; + + const long min_idsel = 0x0e, max_idsel = 0x10, irqs_per_slot = 4; + return PCI_IRQ_TABLE_LOOKUP; +} +#endif /* CONFIG_PCI */ + +/* ************************************************************************ + * + * Setup the architecture + * + */ +static void __init mpc834x_itx_setup_arch(void) +{ + struct device_node *np; + + if (ppc_md.progress) + ppc_md.progress("mpc834x_itx_setup_arch()", 0); + + np = of_find_node_by_type(NULL, "cpu"); + if (np != 0) { + unsigned int *fp = + (int *)get_property(np, "clock-frequency", NULL); + if (fp != 0) + loops_per_jiffy = *fp / HZ; + else + loops_per_jiffy = 50000000 / HZ; + of_node_put(np); + } +#ifdef CONFIG_PCI + for (np = NULL; (np = of_find_node_by_type(np, "pci")) != NULL;) + add_bridge(np); + + ppc_md.pci_swizzle = common_swizzle; + ppc_md.pci_map_irq = mpc83xx_map_irq; + ppc_md.pci_exclude_device = mpc83xx_exclude_device; +#endif + +#ifdef CONFIG_ROOT_NFS + ROOT_DEV = Root_NFS; +#else + ROOT_DEV = Root_HDA1; +#endif +} + +void __init mpc834x_itx_init_IRQ(void) +{ + u8 senses[8] = { + 0, /* EXT 0 */ + IRQ_SENSE_LEVEL, /* EXT 1 */ + IRQ_SENSE_LEVEL, /* EXT 2 */ + 0, /* EXT 3 */ +#ifdef CONFIG_PCI + IRQ_SENSE_LEVEL, /* EXT 4 */ + IRQ_SENSE_LEVEL, /* EXT 5 */ + IRQ_SENSE_LEVEL, /* EXT 6 */ + IRQ_SENSE_LEVEL, /* EXT 7 */ +#else + 0, /* EXT 4 */ + 0, /* EXT 5 */ + 0, /* EXT 6 */ + 0, /* EXT 7 */ +#endif + }; + + ipic_init(get_immrbase() + 0x00700, 0, 0, senses, 8); + + /* Initialize the default interrupt mapping priorities, + * in case the boot rom changed something on us. + */ + ipic_set_default_priority(); +} + +/* + * Called very early, MMU is off, device-tree isn't unflattened + */ +static int __init mpc834x_itx_probe(void) +{ + /* We always match for now, eventually we should look at the flat + dev tree to ensure this is the board we are suppose to run on + */ + return 1; +} + +define_machine(mpc834x_itx) { + .name = "MPC834x ITX", + .probe = mpc834x_itx_probe, + .setup_arch = mpc834x_itx_setup_arch, + .init_IRQ = mpc834x_itx_init_IRQ, + .get_irq = ipic_get_irq, + .restart = mpc83xx_restart, + .time_init = mpc83xx_time_init, + .calibrate_decr = generic_calibrate_decr, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/83xx/mpc834x_itx.h b/arch/powerpc/platforms/83xx/mpc834x_itx.h new file mode 100644 index 000000000000..174ca4ef55f3 --- /dev/null +++ b/arch/powerpc/platforms/83xx/mpc834x_itx.h @@ -0,0 +1,23 @@ +/* + * arch/powerpc/platforms/83xx/mpc834x_itx.h + * + * MPC834X ITX common board definitions + * + * Maintainer: Kumar Gala <galak@kernel.crashing.org> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#ifndef __MACH_MPC83XX_ITX_H__ +#define __MACH_MPC83XX_ITX_H__ + +#define PIRQA MPC83xx_IRQ_EXT4 +#define PIRQB MPC83xx_IRQ_EXT5 +#define PIRQC MPC83xx_IRQ_EXT6 +#define PIRQD MPC83xx_IRQ_EXT7 + +#endif /* __MACH_MPC83XX_ITX_H__ */ diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index 22da1335445a..9d5da7896892 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -1,6 +1,9 @@ /* * Cell Internal Interrupt Controller * + * Copyright (C) 2006 Benjamin Herrenschmidt (benh@kernel.crashing.org) + * IBM, Corp. + * * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 * * Author: Arnd Bergmann <arndb@de.ibm.com> @@ -25,11 +28,13 @@ #include <linux/module.h> #include <linux/percpu.h> #include <linux/types.h> +#include <linux/ioport.h> #include <asm/io.h> #include <asm/pgtable.h> #include <asm/prom.h> #include <asm/ptrace.h> +#include <asm/machdep.h> #include "interrupt.h" #include "cbe_regs.h" @@ -37,231 +42,65 @@ struct iic { struct cbe_iic_thread_regs __iomem *regs; u8 target_id; + u8 eoi_stack[16]; + int eoi_ptr; + struct irq_host *host; }; static DEFINE_PER_CPU(struct iic, iic); +#define IIC_NODE_COUNT 2 +static struct irq_host *iic_hosts[IIC_NODE_COUNT]; -void iic_local_enable(void) +/* Convert between "pending" bits and hw irq number */ +static irq_hw_number_t iic_pending_to_hwnum(struct cbe_iic_pending_bits bits) { - struct iic *iic = &__get_cpu_var(iic); - u64 tmp; - - /* - * There seems to be a bug that is present in DD2.x CPUs - * and still only partially fixed in DD3.1. - * This bug causes a value written to the priority register - * not to make it there, resulting in a system hang unless we - * write it again. - * Masking with 0xf0 is done because the Cell BE does not - * implement the lower four bits of the interrupt priority, - * they always read back as zeroes, although future CPUs - * might implement different bits. - */ - do { - out_be64(&iic->regs->prio, 0xff); - tmp = in_be64(&iic->regs->prio); - } while ((tmp & 0xf0) != 0xf0); -} - -void iic_local_disable(void) -{ - out_be64(&__get_cpu_var(iic).regs->prio, 0x0); -} + unsigned char unit = bits.source & 0xf; -static unsigned int iic_startup(unsigned int irq) -{ - return 0; + if (bits.flags & CBE_IIC_IRQ_IPI) + return IIC_IRQ_IPI0 | (bits.prio >> 4); + else if (bits.class <= 3) + return (bits.class << 4) | unit; + else + return IIC_IRQ_INVALID; } -static void iic_enable(unsigned int irq) +static void iic_mask(unsigned int irq) { - iic_local_enable(); } -static void iic_disable(unsigned int irq) +static void iic_unmask(unsigned int irq) { } -static void iic_end(unsigned int irq) +static void iic_eoi(unsigned int irq) { - iic_local_enable(); + struct iic *iic = &__get_cpu_var(iic); + out_be64(&iic->regs->prio, iic->eoi_stack[--iic->eoi_ptr]); + BUG_ON(iic->eoi_ptr < 0); } -static struct hw_interrupt_type iic_pic = { +static struct irq_chip iic_chip = { .typename = " CELL-IIC ", - .startup = iic_startup, - .enable = iic_enable, - .disable = iic_disable, - .end = iic_end, + .mask = iic_mask, + .unmask = iic_unmask, + .eoi = iic_eoi, }; -static int iic_external_get_irq(struct cbe_iic_pending_bits pending) -{ - int irq; - unsigned char node, unit; - - node = pending.source >> 4; - unit = pending.source & 0xf; - irq = -1; - - /* - * This mapping is specific to the Cell Broadband - * Engine. We might need to get the numbers - * from the device tree to support future CPUs. - */ - switch (unit) { - case 0x00: - case 0x0b: - /* - * One of these units can be connected - * to an external interrupt controller. - */ - if (pending.class != 2) - break; - irq = IIC_EXT_OFFSET - + spider_get_irq(node) - + node * IIC_NODE_STRIDE; - break; - case 0x01 ... 0x04: - case 0x07 ... 0x0a: - /* - * These units are connected to the SPEs - */ - if (pending.class > 2) - break; - irq = IIC_SPE_OFFSET - + pending.class * IIC_CLASS_STRIDE - + node * IIC_NODE_STRIDE - + unit; - break; - } - if (irq == -1) - printk(KERN_WARNING "Unexpected interrupt class %02x, " - "source %02x, prio %02x, cpu %02x\n", pending.class, - pending.source, pending.prio, smp_processor_id()); - return irq; -} - /* Get an IRQ number from the pending state register of the IIC */ -int iic_get_irq(struct pt_regs *regs) +static unsigned int iic_get_irq(struct pt_regs *regs) { - struct iic *iic; - int irq; - struct cbe_iic_pending_bits pending; - - iic = &__get_cpu_var(iic); - *(unsigned long *) &pending = - in_be64((unsigned long __iomem *) &iic->regs->pending_destr); - - irq = -1; - if (pending.flags & CBE_IIC_IRQ_VALID) { - if (pending.flags & CBE_IIC_IRQ_IPI) { - irq = IIC_IPI_OFFSET + (pending.prio >> 4); -/* - if (irq > 0x80) - printk(KERN_WARNING "Unexpected IPI prio %02x" - "on CPU %02x\n", pending.prio, - smp_processor_id()); -*/ - } else { - irq = iic_external_get_irq(pending); - } - } - return irq; -} - -/* hardcoded part to be compatible with older firmware */ - -static int setup_iic_hardcoded(void) -{ - struct device_node *np; - int nodeid, cpu; - unsigned long regs; - struct iic *iic; - - for_each_possible_cpu(cpu) { - iic = &per_cpu(iic, cpu); - nodeid = cpu/2; - - for (np = of_find_node_by_type(NULL, "cpu"); - np; - np = of_find_node_by_type(np, "cpu")) { - if (nodeid == *(int *)get_property(np, "node-id", NULL)) - break; - } - - if (!np) { - printk(KERN_WARNING "IIC: CPU %d not found\n", cpu); - iic->regs = NULL; - iic->target_id = 0xff; - return -ENODEV; - } - - regs = *(long *)get_property(np, "iic", NULL); - - /* hack until we have decided on the devtree info */ - regs += 0x400; - if (cpu & 1) - regs += 0x20; - - printk(KERN_INFO "IIC for CPU %d at %lx\n", cpu, regs); - iic->regs = ioremap(regs, sizeof(struct cbe_iic_thread_regs)); - iic->target_id = (nodeid << 4) + ((cpu & 1) ? 0xf : 0xe); - } - - return 0; -} - -static int setup_iic(void) -{ - struct device_node *dn; - unsigned long *regs; - char *compatible; - unsigned *np, found = 0; - struct iic *iic = NULL; - - for (dn = NULL; (dn = of_find_node_by_name(dn, "interrupt-controller"));) { - compatible = (char *)get_property(dn, "compatible", NULL); - - if (!compatible) { - printk(KERN_WARNING "no compatible property found !\n"); - continue; - } - - if (strstr(compatible, "IBM,CBEA-Internal-Interrupt-Controller")) - regs = (unsigned long *)get_property(dn,"reg", NULL); - else - continue; - - if (!regs) - printk(KERN_WARNING "IIC: no reg property\n"); - - np = (unsigned int *)get_property(dn, "ibm,interrupt-server-ranges", NULL); - - if (!np) { - printk(KERN_WARNING "IIC: CPU association not found\n"); - iic->regs = NULL; - iic->target_id = 0xff; - return -ENODEV; - } - - iic = &per_cpu(iic, np[0]); - iic->regs = ioremap(regs[0], sizeof(struct cbe_iic_thread_regs)); - iic->target_id = ((np[0] & 2) << 3) + ((np[0] & 1) ? 0xf : 0xe); - printk("IIC for CPU %d at %lx mapped to %p\n", np[0], regs[0], iic->regs); - - iic = &per_cpu(iic, np[1]); - iic->regs = ioremap(regs[2], sizeof(struct cbe_iic_thread_regs)); - iic->target_id = ((np[1] & 2) << 3) + ((np[1] & 1) ? 0xf : 0xe); - printk("IIC for CPU %d at %lx mapped to %p\n", np[1], regs[2], iic->regs); - - found++; - } - - if (found) - return 0; - else - return -ENODEV; + struct cbe_iic_pending_bits pending; + struct iic *iic; + + iic = &__get_cpu_var(iic); + *(unsigned long *) &pending = + in_be64((unsigned long __iomem *) &iic->regs->pending_destr); + iic->eoi_stack[++iic->eoi_ptr] = pending.prio; + BUG_ON(iic->eoi_ptr > 15); + if (pending.flags & CBE_IIC_IRQ_VALID) + return irq_linear_revmap(iic->host, + iic_pending_to_hwnum(pending)); + return NO_IRQ; } #ifdef CONFIG_SMP @@ -269,12 +108,12 @@ static int setup_iic(void) /* Use the highest interrupt priorities for IPI */ static inline int iic_ipi_to_irq(int ipi) { - return IIC_IPI_OFFSET + IIC_NUM_IPIS - 1 - ipi; + return IIC_IRQ_IPI0 + IIC_NUM_IPIS - 1 - ipi; } static inline int iic_irq_to_ipi(int irq) { - return IIC_NUM_IPIS - 1 - (irq - IIC_IPI_OFFSET); + return IIC_NUM_IPIS - 1 - (irq - IIC_IRQ_IPI0); } void iic_setup_cpu(void) @@ -293,22 +132,51 @@ u8 iic_get_target_id(int cpu) } EXPORT_SYMBOL_GPL(iic_get_target_id); +struct irq_host *iic_get_irq_host(int node) +{ + if (node < 0 || node >= IIC_NODE_COUNT) + return NULL; + return iic_hosts[node]; +} +EXPORT_SYMBOL_GPL(iic_get_irq_host); + + static irqreturn_t iic_ipi_action(int irq, void *dev_id, struct pt_regs *regs) { - smp_message_recv(iic_irq_to_ipi(irq), regs); + int ipi = (int)(long)dev_id; + + smp_message_recv(ipi, regs); + return IRQ_HANDLED; } static void iic_request_ipi(int ipi, const char *name) { - int irq; - - irq = iic_ipi_to_irq(ipi); - /* IPIs are marked IRQF_DISABLED as they must run with irqs - * disabled */ - get_irq_desc(irq)->chip = &iic_pic; - get_irq_desc(irq)->status |= IRQ_PER_CPU; - request_irq(irq, iic_ipi_action, IRQF_DISABLED, name, NULL); + int node, virq; + + for (node = 0; node < IIC_NODE_COUNT; node++) { + char *rname; + if (iic_hosts[node] == NULL) + continue; + virq = irq_create_mapping(iic_hosts[node], + iic_ipi_to_irq(ipi), 0); + if (virq == NO_IRQ) { + printk(KERN_ERR + "iic: failed to map IPI %s on node %d\n", + name, node); + continue; + } + rname = kzalloc(strlen(name) + 16, GFP_KERNEL); + if (rname) + sprintf(rname, "%s node %d", name, node); + else + rname = (char *)name; + if (request_irq(virq, iic_ipi_action, IRQF_DISABLED, + rname, (void *)(long)ipi)) + printk(KERN_ERR + "iic: failed to request IPI %s on node %d\n", + name, node); + } } void iic_request_IPIs(void) @@ -319,34 +187,119 @@ void iic_request_IPIs(void) iic_request_ipi(PPC_MSG_DEBUGGER_BREAK, "IPI-debug"); #endif /* CONFIG_DEBUGGER */ } + #endif /* CONFIG_SMP */ -static void iic_setup_spe_handlers(void) + +static int iic_host_match(struct irq_host *h, struct device_node *node) +{ + return h->host_data != NULL && node == h->host_data; +} + +static int iic_host_map(struct irq_host *h, unsigned int virq, + irq_hw_number_t hw, unsigned int flags) +{ + if (hw < IIC_IRQ_IPI0) + set_irq_chip_and_handler(virq, &iic_chip, handle_fasteoi_irq); + else + set_irq_chip_and_handler(virq, &iic_chip, handle_percpu_irq); + return 0; +} + +static int iic_host_xlate(struct irq_host *h, struct device_node *ct, + u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_flags) + +{ + /* Currently, we don't translate anything. That needs to be fixed as + * we get better defined device-trees. iic interrupts have to be + * explicitely mapped by whoever needs them + */ + return -ENODEV; +} + +static struct irq_host_ops iic_host_ops = { + .match = iic_host_match, + .map = iic_host_map, + .xlate = iic_host_xlate, +}; + +static void __init init_one_iic(unsigned int hw_cpu, unsigned long addr, + struct irq_host *host) { - int be, isrc; + /* XXX FIXME: should locate the linux CPU number from the HW cpu + * number properly. We are lucky for now + */ + struct iic *iic = &per_cpu(iic, hw_cpu); + + iic->regs = ioremap(addr, sizeof(struct cbe_iic_thread_regs)); + BUG_ON(iic->regs == NULL); - /* Assume two threads per BE are present */ - for (be=0; be < num_present_cpus() / 2; be++) { - for (isrc = 0; isrc < IIC_CLASS_STRIDE * 3; isrc++) { - int irq = IIC_NODE_STRIDE * be + IIC_SPE_OFFSET + isrc; - get_irq_desc(irq)->chip = &iic_pic; + iic->target_id = ((hw_cpu & 2) << 3) | ((hw_cpu & 1) ? 0xf : 0xe); + iic->eoi_stack[0] = 0xff; + iic->host = host; + out_be64(&iic->regs->prio, 0); + + printk(KERN_INFO "IIC for CPU %d at %lx mapped to %p, target id 0x%x\n", + hw_cpu, addr, iic->regs, iic->target_id); +} + +static int __init setup_iic(void) +{ + struct device_node *dn; + struct resource r0, r1; + struct irq_host *host; + int found = 0; + u32 *np; + + for (dn = NULL; + (dn = of_find_node_by_name(dn,"interrupt-controller")) != NULL;) { + if (!device_is_compatible(dn, + "IBM,CBEA-Internal-Interrupt-Controller")) + continue; + np = (u32 *)get_property(dn, "ibm,interrupt-server-ranges", + NULL); + if (np == NULL) { + printk(KERN_WARNING "IIC: CPU association not found\n"); + of_node_put(dn); + return -ENODEV; + } + if (of_address_to_resource(dn, 0, &r0) || + of_address_to_resource(dn, 1, &r1)) { + printk(KERN_WARNING "IIC: Can't resolve addresses\n"); + of_node_put(dn); + return -ENODEV; } + host = NULL; + if (found < IIC_NODE_COUNT) { + host = irq_alloc_host(IRQ_HOST_MAP_LINEAR, + IIC_SOURCE_COUNT, + &iic_host_ops, + IIC_IRQ_INVALID); + iic_hosts[found] = host; + BUG_ON(iic_hosts[found] == NULL); + iic_hosts[found]->host_data = of_node_get(dn); + found++; + } + init_one_iic(np[0], r0.start, host); + init_one_iic(np[1], r1.start, host); } + + if (found) + return 0; + else + return -ENODEV; } -void iic_init_IRQ(void) +void __init iic_init_IRQ(void) { - int cpu, irq_offset; - struct iic *iic; - + /* Discover and initialize iics */ if (setup_iic() < 0) - setup_iic_hardcoded(); + panic("IIC: Failed to initialize !\n"); - irq_offset = 0; - for_each_possible_cpu(cpu) { - iic = &per_cpu(iic, cpu); - if (iic->regs) - out_be64(&iic->regs->prio, 0xff); - } - iic_setup_spe_handlers(); + /* Set master interrupt handling function */ + ppc_md.get_irq = iic_get_irq; + + /* Enable on current CPU */ + iic_setup_cpu(); } diff --git a/arch/powerpc/platforms/cell/interrupt.h b/arch/powerpc/platforms/cell/interrupt.h index 799f77d98f96..5560a92ec3ab 100644 --- a/arch/powerpc/platforms/cell/interrupt.h +++ b/arch/powerpc/platforms/cell/interrupt.h @@ -37,27 +37,24 @@ */ enum { - IIC_EXT_OFFSET = 0x00, /* Start of south bridge IRQs */ - IIC_NUM_EXT = 0x40, /* Number of south bridge IRQs */ - IIC_SPE_OFFSET = 0x40, /* Start of SPE interrupts */ - IIC_CLASS_STRIDE = 0x10, /* SPE IRQs per class */ - IIC_IPI_OFFSET = 0x70, /* Start of IPI IRQs */ - IIC_NUM_IPIS = 0x10, /* IRQs reserved for IPI */ - IIC_NODE_STRIDE = 0x80, /* Total IRQs per node */ + IIC_IRQ_INVALID = 0xff, + IIC_IRQ_MAX = 0x3f, + IIC_IRQ_EXT_IOIF0 = 0x20, + IIC_IRQ_EXT_IOIF1 = 0x2b, + IIC_IRQ_IPI0 = 0x40, + IIC_NUM_IPIS = 0x10, /* IRQs reserved for IPI */ + IIC_SOURCE_COUNT = 0x50, }; extern void iic_init_IRQ(void); -extern int iic_get_irq(struct pt_regs *regs); extern void iic_cause_IPI(int cpu, int mesg); extern void iic_request_IPIs(void); extern void iic_setup_cpu(void); -extern void iic_local_enable(void); -extern void iic_local_disable(void); extern u8 iic_get_target_id(int cpu); +extern struct irq_host *iic_get_irq_host(int node); extern void spider_init_IRQ(void); -extern int spider_get_irq(int node); #endif #endif /* ASM_CELL_PIC_H */ diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c index d8c2a29b3c15..282987d6d4a2 100644 --- a/arch/powerpc/platforms/cell/setup.c +++ b/arch/powerpc/platforms/cell/setup.c @@ -49,6 +49,7 @@ #include <asm/irq.h> #include <asm/spu.h> #include <asm/spu_priv1.h> +#include <asm/udbg.h> #include "interrupt.h" #include "iommu.h" @@ -79,10 +80,22 @@ static void cell_progress(char *s, unsigned short hex) printk("*** %04x : %s\n", hex, s ? s : ""); } +static void __init cell_pcibios_fixup(void) +{ + struct pci_dev *dev = NULL; + + for_each_pci_dev(dev) + pci_read_irq_line(dev); +} + +static void __init cell_init_irq(void) +{ + iic_init_IRQ(); + spider_init_IRQ(); +} + static void __init cell_setup_arch(void) { - ppc_md.init_IRQ = iic_init_IRQ; - ppc_md.get_irq = iic_get_irq; #ifdef CONFIG_SPU_BASE spu_priv1_ops = &spu_priv1_mmio_ops; #endif @@ -108,7 +121,6 @@ static void __init cell_setup_arch(void) /* Find and initialize PCI host bridges */ init_pci_config_tokens(); find_and_init_phbs(); - spider_init_IRQ(); cbe_pervasive_init(); #ifdef CONFIG_DUMMY_CONSOLE conswitchp = &dummy_con; @@ -126,8 +138,6 @@ static void __init cell_init_early(void) cell_init_iommu(); - ppc64_interrupt_controller = IC_CELL_PIC; - DBG(" <- cell_init_early()\n"); } @@ -173,6 +183,8 @@ define_machine(cell) { .calibrate_decr = generic_calibrate_decr, .check_legacy_ioport = cell_check_legacy_ioport, .progress = cell_progress, + .init_IRQ = cell_init_irq, + .pcibios_fixup = cell_pcibios_fixup, #ifdef CONFIG_KEXEC .machine_kexec = default_machine_kexec, .machine_kexec_prepare = default_machine_kexec_prepare, diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c index 7c3a0b6d34fd..ae7ef88f1a37 100644 --- a/arch/powerpc/platforms/cell/spider-pic.c +++ b/arch/powerpc/platforms/cell/spider-pic.c @@ -22,6 +22,7 @@ #include <linux/interrupt.h> #include <linux/irq.h> +#include <linux/ioport.h> #include <asm/pgtable.h> #include <asm/prom.h> @@ -56,184 +57,313 @@ enum { REISWAITEN = 0x508, /* Reissue Wait Control*/ }; -static void __iomem *spider_pics[4]; +#define SPIDER_CHIP_COUNT 4 +#define SPIDER_SRC_COUNT 64 +#define SPIDER_IRQ_INVALID 63 -static void __iomem *spider_get_pic(int irq) -{ - int node = irq / IIC_NODE_STRIDE; - irq %= IIC_NODE_STRIDE; - - if (irq >= IIC_EXT_OFFSET && - irq < IIC_EXT_OFFSET + IIC_NUM_EXT && - spider_pics) - return spider_pics[node]; - return NULL; -} +struct spider_pic { + struct irq_host *host; + struct device_node *of_node; + void __iomem *regs; + unsigned int node_id; +}; +static struct spider_pic spider_pics[SPIDER_CHIP_COUNT]; -static int spider_get_nr(unsigned int irq) +static struct spider_pic *spider_virq_to_pic(unsigned int virq) { - return (irq % IIC_NODE_STRIDE) - IIC_EXT_OFFSET; + return irq_map[virq].host->host_data; } -static void __iomem *spider_get_irq_config(int irq) +static void __iomem *spider_get_irq_config(struct spider_pic *pic, + unsigned int src) { - void __iomem *pic; - pic = spider_get_pic(irq); - return pic + TIR_CFGA + 8 * spider_get_nr(irq); + return pic->regs + TIR_CFGA + 8 * src; } -static void spider_enable_irq(unsigned int irq) +static void spider_unmask_irq(unsigned int virq) { - int nodeid = (irq / IIC_NODE_STRIDE) * 0x10; - void __iomem *cfg = spider_get_irq_config(irq); - irq = spider_get_nr(irq); + struct spider_pic *pic = spider_virq_to_pic(virq); + void __iomem *cfg = spider_get_irq_config(pic, irq_map[virq].hwirq); - out_be32(cfg, (in_be32(cfg) & ~0xf0)| 0x3107000eu | nodeid); - out_be32(cfg + 4, in_be32(cfg + 4) | 0x00020000u | irq); + /* We use no locking as we should be covered by the descriptor lock + * for access to invidual source configuration registers + */ + out_be32(cfg, in_be32(cfg) | 0x30000000u); } -static void spider_disable_irq(unsigned int irq) +static void spider_mask_irq(unsigned int virq) { - void __iomem *cfg = spider_get_irq_config(irq); - irq = spider_get_nr(irq); + struct spider_pic *pic = spider_virq_to_pic(virq); + void __iomem *cfg = spider_get_irq_config(pic, irq_map[virq].hwirq); + /* We use no locking as we should be covered by the descriptor lock + * for access to invidual source configuration registers + */ out_be32(cfg, in_be32(cfg) & ~0x30000000u); } -static unsigned int spider_startup_irq(unsigned int irq) +static void spider_ack_irq(unsigned int virq) { - spider_enable_irq(irq); - return 0; -} + struct spider_pic *pic = spider_virq_to_pic(virq); + unsigned int src = irq_map[virq].hwirq; -static void spider_shutdown_irq(unsigned int irq) -{ - spider_disable_irq(irq); -} + /* Reset edge detection logic if necessary + */ + if (get_irq_desc(virq)->status & IRQ_LEVEL) + return; -static void spider_end_irq(unsigned int irq) -{ - spider_enable_irq(irq); -} + /* Only interrupts 47 to 50 can be set to edge */ + if (src < 47 || src > 50) + return; -static void spider_ack_irq(unsigned int irq) -{ - spider_disable_irq(irq); - iic_local_enable(); + /* Perform the clear of the edge logic */ + out_be32(pic->regs + TIR_EDC, 0x100 | (src & 0xf)); } -static struct hw_interrupt_type spider_pic = { +static struct irq_chip spider_pic = { .typename = " SPIDER ", - .startup = spider_startup_irq, - .shutdown = spider_shutdown_irq, - .enable = spider_enable_irq, - .disable = spider_disable_irq, + .unmask = spider_unmask_irq, + .mask = spider_mask_irq, .ack = spider_ack_irq, - .end = spider_end_irq, }; -int spider_get_irq(int node) +static int spider_host_match(struct irq_host *h, struct device_node *node) { - unsigned long cs; - void __iomem *regs = spider_pics[node]; - - cs = in_be32(regs + TIR_CS) >> 24; - - if (cs == 63) - return -1; - else - return cs; + struct spider_pic *pic = h->host_data; + return node == pic->of_node; } -/* hardcoded part to be compatible with older firmware */ - -void spider_init_IRQ_hardcoded(void) +static int spider_host_map(struct irq_host *h, unsigned int virq, + irq_hw_number_t hw, unsigned int flags) { - int node; - long spiderpic; - long pics[] = { 0x24000008000, 0x34000008000 }; - int n; - - pr_debug("%s(%d): Using hardcoded defaults\n", __FUNCTION__, __LINE__); - - for (node = 0; node < num_present_cpus()/2; node++) { - spiderpic = pics[node]; - printk(KERN_DEBUG "SPIDER addr: %lx\n", spiderpic); - spider_pics[node] = ioremap(spiderpic, 0x800); - for (n = 0; n < IIC_NUM_EXT; n++) { - int irq = n + IIC_EXT_OFFSET + node * IIC_NODE_STRIDE; - get_irq_desc(irq)->chip = &spider_pic; - } - - /* do not mask any interrupts because of level */ - out_be32(spider_pics[node] + TIR_MSK, 0x0); - - /* disable edge detection clear */ - /* out_be32(spider_pics[node] + TIR_EDC, 0x0); */ - - /* enable interrupt packets to be output */ - out_be32(spider_pics[node] + TIR_PIEN, - in_be32(spider_pics[node] + TIR_PIEN) | 0x1); - - /* Enable the interrupt detection enable bit. Do this last! */ - out_be32(spider_pics[node] + TIR_DEN, - in_be32(spider_pics[node] + TIR_DEN) | 0x1); + unsigned int sense = flags & IRQ_TYPE_SENSE_MASK; + struct spider_pic *pic = h->host_data; + void __iomem *cfg = spider_get_irq_config(pic, hw); + int level = 0; + u32 ic; + + /* Note that only level high is supported for most interrupts */ + if (sense != IRQ_TYPE_NONE && sense != IRQ_TYPE_LEVEL_HIGH && + (hw < 47 || hw > 50)) + return -EINVAL; + + /* Decode sense type */ + switch(sense) { + case IRQ_TYPE_EDGE_RISING: + ic = 0x3; + break; + case IRQ_TYPE_EDGE_FALLING: + ic = 0x2; + break; + case IRQ_TYPE_LEVEL_LOW: + ic = 0x0; + level = 1; + break; + case IRQ_TYPE_LEVEL_HIGH: + case IRQ_TYPE_NONE: + ic = 0x1; + level = 1; + break; + default: + return -EINVAL; } -} -void spider_init_IRQ(void) -{ - long spider_reg; - struct device_node *dn; - char *compatible; - int n, node = 0; + /* Configure the source. One gross hack that was there before and + * that I've kept around is the priority to the BE which I set to + * be the same as the interrupt source number. I don't know wether + * that's supposed to make any kind of sense however, we'll have to + * decide that, but for now, I'm not changing the behaviour. + */ + out_be32(cfg, (ic << 24) | (0x7 << 16) | (pic->node_id << 4) | 0xe); + out_be32(cfg + 4, (0x2 << 16) | (hw & 0xff)); + + if (level) + get_irq_desc(virq)->status |= IRQ_LEVEL; + set_irq_chip_and_handler(virq, &spider_pic, handle_level_irq); + return 0; +} - for (dn = NULL; (dn = of_find_node_by_name(dn, "interrupt-controller"));) { - compatible = (char *)get_property(dn, "compatible", NULL); +static int spider_host_xlate(struct irq_host *h, struct device_node *ct, + u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_flags) - if (!compatible) - continue; +{ + /* Spider interrupts have 2 cells, first is the interrupt source, + * second, well, I don't know for sure yet ... We mask the top bits + * because old device-trees encode a node number in there + */ + *out_hwirq = intspec[0] & 0x3f; + *out_flags = IRQ_TYPE_LEVEL_HIGH; + return 0; +} - if (strstr(compatible, "CBEA,platform-spider-pic")) - spider_reg = *(long *)get_property(dn,"reg", NULL); - else if (strstr(compatible, "sti,platform-spider-pic")) { - spider_init_IRQ_hardcoded(); - return; - } else - continue; +static struct irq_host_ops spider_host_ops = { + .match = spider_host_match, + .map = spider_host_map, + .xlate = spider_host_xlate, +}; - if (!spider_reg) - printk("interrupt controller does not have reg property !\n"); +static void spider_irq_cascade(unsigned int irq, struct irq_desc *desc, + struct pt_regs *regs) +{ + struct spider_pic *pic = desc->handler_data; + unsigned int cs, virq; - n = prom_n_addr_cells(dn); + cs = in_be32(pic->regs + TIR_CS) >> 24; + if (cs == SPIDER_IRQ_INVALID) + virq = NO_IRQ; + else + virq = irq_linear_revmap(pic->host, cs); + if (virq != NO_IRQ) + generic_handle_irq(virq, regs); + desc->chip->eoi(irq); +} - if ( n != 2) - printk("reg property with invalid number of elements \n"); +/* For hooking up the cascace we have a problem. Our device-tree is + * crap and we don't know on which BE iic interrupt we are hooked on at + * least not the "standard" way. We can reconstitute it based on two + * informations though: which BE node we are connected to and wether + * we are connected to IOIF0 or IOIF1. Right now, we really only care + * about the IBM cell blade and we know that its firmware gives us an + * interrupt-map property which is pretty strange. + */ +static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic) +{ + unsigned int virq; + u32 *imap, *tmp; + int imaplen, intsize, unit; + struct device_node *iic; + struct irq_host *iic_host; + +#if 0 /* Enable that when we have a way to retreive the node as well */ + /* First, we check wether we have a real "interrupts" in the device + * tree in case the device-tree is ever fixed + */ + struct of_irq oirq; + if (of_irq_map_one(pic->of_node, 0, &oirq) == 0) { + virq = irq_create_of_mapping(oirq.controller, oirq.specifier, + oirq.size); + goto bail; + } +#endif + + /* Now do the horrible hacks */ + tmp = (u32 *)get_property(pic->of_node, "#interrupt-cells", NULL); + if (tmp == NULL) + return NO_IRQ; + intsize = *tmp; + imap = (u32 *)get_property(pic->of_node, "interrupt-map", &imaplen); + if (imap == NULL || imaplen < (intsize + 1)) + return NO_IRQ; + iic = of_find_node_by_phandle(imap[intsize]); + if (iic == NULL) + return NO_IRQ; + imap += intsize + 1; + tmp = (u32 *)get_property(iic, "#interrupt-cells", NULL); + if (tmp == NULL) + return NO_IRQ; + intsize = *tmp; + /* Assume unit is last entry of interrupt specifier */ + unit = imap[intsize - 1]; + /* Ok, we have a unit, now let's try to get the node */ + tmp = (u32 *)get_property(iic, "ibm,interrupt-server-ranges", NULL); + if (tmp == NULL) { + of_node_put(iic); + return NO_IRQ; + } + /* ugly as hell but works for now */ + pic->node_id = (*tmp) >> 1; + of_node_put(iic); + + /* Ok, now let's get cracking. You may ask me why I just didn't match + * the iic host from the iic OF node, but that way I'm still compatible + * with really really old old firmwares for which we don't have a node + */ + iic_host = iic_get_irq_host(pic->node_id); + if (iic_host == NULL) + return NO_IRQ; + /* Manufacture an IIC interrupt number of class 2 */ + virq = irq_create_mapping(iic_host, 0x20 | unit, 0); + if (virq == NO_IRQ) + printk(KERN_ERR "spider_pic: failed to map cascade !"); + return virq; +} - spider_pics[node] = ioremap(spider_reg, 0x800); - printk("SPIDER addr: %lx with %i addr_cells mapped to %p\n", - spider_reg, n, spider_pics[node]); +static void __init spider_init_one(struct device_node *of_node, int chip, + unsigned long addr) +{ + struct spider_pic *pic = &spider_pics[chip]; + int i, virq; + + /* Map registers */ + pic->regs = ioremap(addr, 0x1000); + if (pic->regs == NULL) + panic("spider_pic: can't map registers !"); + + /* Allocate a host */ + pic->host = irq_alloc_host(IRQ_HOST_MAP_LINEAR, SPIDER_SRC_COUNT, + &spider_host_ops, SPIDER_IRQ_INVALID); + if (pic->host == NULL) + panic("spider_pic: can't allocate irq host !"); + pic->host->host_data = pic; + + /* Fill out other bits */ + pic->of_node = of_node_get(of_node); + + /* Go through all sources and disable them */ + for (i = 0; i < SPIDER_SRC_COUNT; i++) { + void __iomem *cfg = pic->regs + TIR_CFGA + 8 * i; + out_be32(cfg, in_be32(cfg) & ~0x30000000u); + } - for (n = 0; n < IIC_NUM_EXT; n++) { - int irq = n + IIC_EXT_OFFSET + node * IIC_NODE_STRIDE; - get_irq_desc(irq)->chip = &spider_pic; - } + /* do not mask any interrupts because of level */ + out_be32(pic->regs + TIR_MSK, 0x0); - /* do not mask any interrupts because of level */ - out_be32(spider_pics[node] + TIR_MSK, 0x0); + /* enable interrupt packets to be output */ + out_be32(pic->regs + TIR_PIEN, in_be32(pic->regs + TIR_PIEN) | 0x1); - /* disable edge detection clear */ - /* out_be32(spider_pics[node] + TIR_EDC, 0x0); */ + /* Hook up the cascade interrupt to the iic and nodeid */ + virq = spider_find_cascade_and_node(pic); + if (virq == NO_IRQ) + return; + set_irq_data(virq, pic); + set_irq_chained_handler(virq, spider_irq_cascade); - /* enable interrupt packets to be output */ - out_be32(spider_pics[node] + TIR_PIEN, - in_be32(spider_pics[node] + TIR_PIEN) | 0x1); + printk(KERN_INFO "spider_pic: node %d, addr: 0x%lx %s\n", + pic->node_id, addr, of_node->full_name); - /* Enable the interrupt detection enable bit. Do this last! */ - out_be32(spider_pics[node] + TIR_DEN, - in_be32(spider_pics[node] + TIR_DEN) | 0x1); + /* Enable the interrupt detection enable bit. Do this last! */ + out_be32(pic->regs + TIR_DEN, in_be32(pic->regs + TIR_DEN) | 0x1); +} - node++; +void __init spider_init_IRQ(void) +{ + struct resource r; + struct device_node *dn; + int chip = 0; + + /* XXX node numbers are totally bogus. We _hope_ we get the device + * nodes in the right order here but that's definitely not guaranteed, + * we need to get the node from the device tree instead. + * There is currently no proper property for it (but our whole + * device-tree is bogus anyway) so all we can do is pray or maybe test + * the address and deduce the node-id + */ + for (dn = NULL; + (dn = of_find_node_by_name(dn, "interrupt-controller"));) { + if (device_is_compatible(dn, "CBEA,platform-spider-pic")) { + if (of_address_to_resource(dn, 0, &r)) { + printk(KERN_WARNING "spider-pic: Failed\n"); + continue; + } + } else if (device_is_compatible(dn, "sti,platform-spider-pic") + && (chip < 2)) { + static long hard_coded_pics[] = + { 0x24000008000, 0x34000008000 }; + r.start = hard_coded_pics[chip]; + } else + continue; + spider_init_one(dn, chip++, r.start); } } diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index 656c1ef5f4ad..5d2313a6c82b 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -264,51 +264,57 @@ spu_irq_class_2(int irq, void *data, struct pt_regs *regs) return stat ? IRQ_HANDLED : IRQ_NONE; } -static int -spu_request_irqs(struct spu *spu) +static int spu_request_irqs(struct spu *spu) { - int ret; - int irq_base; - - irq_base = IIC_NODE_STRIDE * spu->node + IIC_SPE_OFFSET; - - snprintf(spu->irq_c0, sizeof (spu->irq_c0), "spe%02d.0", spu->number); - ret = request_irq(irq_base + spu->isrc, - spu_irq_class_0, IRQF_DISABLED, spu->irq_c0, spu); - if (ret) - goto out; - - snprintf(spu->irq_c1, sizeof (spu->irq_c1), "spe%02d.1", spu->number); - ret = request_irq(irq_base + IIC_CLASS_STRIDE + spu->isrc, - spu_irq_class_1, IRQF_DISABLED, spu->irq_c1, spu); - if (ret) - goto out1; + int ret = 0; - snprintf(spu->irq_c2, sizeof (spu->irq_c2), "spe%02d.2", spu->number); - ret = request_irq(irq_base + 2*IIC_CLASS_STRIDE + spu->isrc, - spu_irq_class_2, IRQF_DISABLED, spu->irq_c2, spu); - if (ret) - goto out2; - goto out; + if (spu->irqs[0] != NO_IRQ) { + snprintf(spu->irq_c0, sizeof (spu->irq_c0), "spe%02d.0", + spu->number); + ret = request_irq(spu->irqs[0], spu_irq_class_0, + IRQF_DISABLED, + spu->irq_c0, spu); + if (ret) + goto bail0; + } + if (spu->irqs[1] != NO_IRQ) { + snprintf(spu->irq_c1, sizeof (spu->irq_c1), "spe%02d.1", + spu->number); + ret = request_irq(spu->irqs[1], spu_irq_class_1, + IRQF_DISABLED, + spu->irq_c1, spu); + if (ret) + goto bail1; + } + if (spu->irqs[2] != NO_IRQ) { + snprintf(spu->irq_c2, sizeof (spu->irq_c2), "spe%02d.2", + spu->number); + ret = request_irq(spu->irqs[2], spu_irq_class_2, + IRQF_DISABLED, + spu->irq_c2, spu); + if (ret) + goto bail2; + } + return 0; -out2: - free_irq(irq_base + IIC_CLASS_STRIDE + spu->isrc, spu); -out1: - free_irq(irq_base + spu->isrc, spu); -out: +bail2: + if (spu->irqs[1] != NO_IRQ) + free_irq(spu->irqs[1], spu); +bail1: + if (spu->irqs[0] != NO_IRQ) + free_irq(spu->irqs[0], spu); +bail0: return ret; } -static void -spu_free_irqs(struct spu *spu) +static void spu_free_irqs(struct spu *spu) { - int irq_base; - - irq_base = IIC_NODE_STRIDE * spu->node + IIC_SPE_OFFSET; - - free_irq(irq_base + spu->isrc, spu); - free_irq(irq_base + IIC_CLASS_STRIDE + spu->isrc, spu); - free_irq(irq_base + 2*IIC_CLASS_STRIDE + spu->isrc, spu); + if (spu->irqs[0] != NO_IRQ) + free_irq(spu->irqs[0], spu); + if (spu->irqs[1] != NO_IRQ) + free_irq(spu->irqs[1], spu); + if (spu->irqs[2] != NO_IRQ) + free_irq(spu->irqs[2], spu); } static LIST_HEAD(spu_list); @@ -559,17 +565,38 @@ static void spu_unmap(struct spu *spu) iounmap((u8 __iomem *)spu->local_store); } +/* This function shall be abstracted for HV platforms */ +static int __init spu_map_interrupts(struct spu *spu, struct device_node *np) +{ + struct irq_host *host; + unsigned int isrc; + u32 *tmp; + + host = iic_get_irq_host(spu->node); + if (host == NULL) + return -ENODEV; + + /* Get the interrupt source from the device-tree */ + tmp = (u32 *)get_property(np, "isrc", NULL); + if (!tmp) + return -ENODEV; + spu->isrc = isrc = tmp[0]; + + /* Now map interrupts of all 3 classes */ + spu->irqs[0] = irq_create_mapping(host, 0x00 | isrc, 0); + spu->irqs[1] = irq_create_mapping(host, 0x10 | isrc, 0); + spu->irqs[2] = irq_create_mapping(host, 0x20 | isrc, 0); + + /* Right now, we only fail if class 2 failed */ + return spu->irqs[2] == NO_IRQ ? -EINVAL : 0; +} + static int __init spu_map_device(struct spu *spu, struct device_node *node) { char *prop; int ret; ret = -ENODEV; - prop = get_property(node, "isrc", NULL); - if (!prop) - goto out; - spu->isrc = *(unsigned int *)prop; - spu->name = get_property(node, "name", NULL); if (!spu->name) goto out; @@ -636,7 +663,8 @@ static int spu_create_sysdev(struct spu *spu) return ret; } - sysdev_create_file(&spu->sysdev, &attr_isrc); + if (spu->isrc != 0) + sysdev_create_file(&spu->sysdev, &attr_isrc); sysfs_add_device_to_node(&spu->sysdev, spu->nid); return 0; @@ -668,6 +696,9 @@ static int __init create_spu(struct device_node *spe) spu->nid = of_node_to_nid(spe); if (spu->nid == -1) spu->nid = 0; + ret = spu_map_interrupts(spu, spe); + if (ret) + goto out_unmap; spin_lock_init(&spu->register_lock); spu_mfc_sdr_set(spu, mfspr(SPRN_SDR1)); spu_mfc_sr1_set(spu, 0x33); diff --git a/arch/powerpc/platforms/chrp/pci.c b/arch/powerpc/platforms/chrp/pci.c index 66c253498803..6802cdc3168a 100644 --- a/arch/powerpc/platforms/chrp/pci.c +++ b/arch/powerpc/platforms/chrp/pci.c @@ -18,7 +18,6 @@ #include <asm/machdep.h> #include <asm/sections.h> #include <asm/pci-bridge.h> -#include <asm/open_pic.h> #include <asm/grackle.h> #include <asm/rtas.h> @@ -161,15 +160,9 @@ void __init chrp_pcibios_fixup(void) { struct pci_dev *dev = NULL; - struct device_node *np; - /* PCI interrupts are controlled by the OpenPIC */ - for_each_pci_dev(dev) { - np = pci_device_to_OF_node(dev); - if ((np != 0) && (np->n_intrs > 0) && (np->intrs[0].line != 0)) - dev->irq = np->intrs[0].line; - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq); - } + for_each_pci_dev(dev) + pci_read_irq_line(dev); } #define PRG_CL_RESET_VALID 0x00010000 diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index 9df9f2079e9b..538e337d63e2 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -59,7 +59,7 @@ void rtas_indicator_progress(char *, unsigned short); int _chrp_type; EXPORT_SYMBOL(_chrp_type); -struct mpic *chrp_mpic; +static struct mpic *chrp_mpic; /* Used for doing CHRP event-scans */ DEFINE_PER_CPU(struct timer_list, heartbeat_timer); @@ -315,24 +315,32 @@ chrp_event_scan(unsigned long unused) jiffies + event_scan_interval); } +static void chrp_8259_cascade(unsigned int irq, struct irq_desc *desc, + struct pt_regs *regs) +{ + unsigned int cascade_irq = i8259_irq(regs); + if (cascade_irq != NO_IRQ) + generic_handle_irq(cascade_irq, regs); + desc->chip->eoi(irq); +} + /* * Finds the open-pic node and sets up the mpic driver. */ static void __init chrp_find_openpic(void) { struct device_node *np, *root; - int len, i, j, irq_count; + int len, i, j; int isu_size, idu_size; unsigned int *iranges, *opprop = NULL; int oplen = 0; unsigned long opaddr; int na = 1; - unsigned char init_senses[NR_IRQS - NUM_8259_INTERRUPTS]; - np = find_type_devices("open-pic"); + np = of_find_node_by_type(NULL, "open-pic"); if (np == NULL) return; - root = find_path_device("/"); + root = of_find_node_by_path("/"); if (root) { opprop = (unsigned int *) get_property (root, "platform-open-pic", &oplen); @@ -343,19 +351,15 @@ static void __init chrp_find_openpic(void) oplen /= na * sizeof(unsigned int); } else { struct resource r; - if (of_address_to_resource(np, 0, &r)) - return; + if (of_address_to_resource(np, 0, &r)) { + goto bail; + } opaddr = r.start; oplen = 0; } printk(KERN_INFO "OpenPIC at %lx\n", opaddr); - irq_count = NR_IRQS - NUM_ISA_INTERRUPTS - 4; /* leave room for IPIs */ - prom_get_irq_senses(init_senses, NUM_ISA_INTERRUPTS, NR_IRQS - 4); - /* i8259 cascade is always positive level */ - init_senses[0] = IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE; - iranges = (unsigned int *) get_property(np, "interrupt-ranges", &len); if (iranges == NULL) len = 0; /* non-distributed mpic */ @@ -382,15 +386,12 @@ static void __init chrp_find_openpic(void) if (len > 1) isu_size = iranges[3]; - chrp_mpic = mpic_alloc(opaddr, MPIC_PRIMARY, - isu_size, NUM_ISA_INTERRUPTS, irq_count, - NR_IRQS - 4, init_senses, irq_count, - " MPIC "); + chrp_mpic = mpic_alloc(np, opaddr, MPIC_PRIMARY, + isu_size, 0, " MPIC "); if (chrp_mpic == NULL) { printk(KERN_ERR "Failed to allocate MPIC structure\n"); - return; + goto bail; } - j = na - 1; for (i = 1; i < len; ++i) { iranges += 2; @@ -402,7 +403,10 @@ static void __init chrp_find_openpic(void) } mpic_init(chrp_mpic); - mpic_setup_cascade(NUM_ISA_INTERRUPTS, i8259_irq_cascade, NULL); + ppc_md.get_irq = mpic_get_irq; + bail: + of_node_put(root); + of_node_put(np); } #if defined(CONFIG_VT) && defined(CONFIG_INPUT_ADBHID) && defined(XMON) @@ -413,14 +417,34 @@ static struct irqaction xmon_irqaction = { }; #endif -void __init chrp_init_IRQ(void) +static void __init chrp_find_8259(void) { - struct device_node *np; + struct device_node *np, *pic = NULL; unsigned long chrp_int_ack = 0; -#if defined(CONFIG_VT) && defined(CONFIG_INPUT_ADBHID) && defined(XMON) - struct device_node *kbd; -#endif + unsigned int cascade_irq; + + /* Look for cascade */ + for_each_node_by_type(np, "interrupt-controller") + if (device_is_compatible(np, "chrp,iic")) { + pic = np; + break; + } + /* Ok, 8259 wasn't found. We need to handle the case where + * we have a pegasos that claims to be chrp but doesn't have + * a proper interrupt tree + */ + if (pic == NULL && chrp_mpic != NULL) { + printk(KERN_ERR "i8259: Not found in device-tree" + " assuming no legacy interrupts\n"); + return; + } + /* Look for intack. In a perfect world, we would look for it on + * the ISA bus that holds the 8259 but heh... Works that way. If + * we ever see a problem, we can try to re-use the pSeries code here. + * Also, Pegasos-type platforms don't have a proper node to start + * from anyway + */ for (np = find_devices("pci"); np != NULL; np = np->next) { unsigned int *addrp = (unsigned int *) get_property(np, "8259-interrupt-acknowledge", NULL); @@ -431,11 +455,29 @@ void __init chrp_init_IRQ(void) break; } if (np == NULL) - printk(KERN_ERR "Cannot find PCI interrupt acknowledge address\n"); + printk(KERN_WARNING "Cannot find PCI interrupt acknowledge" + " address, polling\n"); + + i8259_init(pic, chrp_int_ack); + if (ppc_md.get_irq == NULL) + ppc_md.get_irq = i8259_irq; + if (chrp_mpic != NULL) { + cascade_irq = irq_of_parse_and_map(pic, 0); + if (cascade_irq == NO_IRQ) + printk(KERN_ERR "i8259: failed to map cascade irq\n"); + else + set_irq_chained_handler(cascade_irq, + chrp_8259_cascade); + } +} +void __init chrp_init_IRQ(void) +{ +#if defined(CONFIG_VT) && defined(CONFIG_INPUT_ADBHID) && defined(XMON) + struct device_node *kbd; +#endif chrp_find_openpic(); - - i8259_init(chrp_int_ack, 0); + chrp_find_8259(); if (_chrp_type == _CHRP_Pegasos) ppc_md.get_irq = i8259_irq; @@ -520,10 +562,6 @@ static int __init chrp_probe(void) DMA_MODE_READ = 0x44; DMA_MODE_WRITE = 0x48; isa_io_base = CHRP_ISA_IO_BASE; /* default value */ - ppc_do_canonicalize_irqs = 1; - - /* Assume we have an 8259... */ - __irq_offset_value = NUM_ISA_INTERRUPTS; return 1; } @@ -535,7 +573,6 @@ define_machine(chrp) { .init = chrp_init2, .show_cpuinfo = chrp_show_cpuinfo, .init_IRQ = chrp_init_IRQ, - .get_irq = mpic_get_irq, .pcibios_fixup = chrp_pcibios_fixup, .restart = rtas_restart, .power_off = rtas_power_off, diff --git a/arch/powerpc/platforms/chrp/smp.c b/arch/powerpc/platforms/chrp/smp.c index c298ca1ea680..1d2307e87c30 100644 --- a/arch/powerpc/platforms/chrp/smp.c +++ b/arch/powerpc/platforms/chrp/smp.c @@ -29,7 +29,6 @@ #include <asm/smp.h> #include <asm/residual.h> #include <asm/time.h> -#include <asm/open_pic.h> #include <asm/machdep.h> #include <asm/smp.h> #include <asm/mpic.h> diff --git a/arch/powerpc/platforms/iseries/irq.c b/arch/powerpc/platforms/iseries/irq.c index f70e820e7304..2275e64f3152 100644 --- a/arch/powerpc/platforms/iseries/irq.c +++ b/arch/powerpc/platforms/iseries/irq.c @@ -162,27 +162,6 @@ static void pci_event_handler(struct HvLpEvent *event, struct pt_regs *regs) printk(KERN_ERR "pci_event_handler: NULL event received\n"); } -/* - * This is called by init_IRQ. set in ppc_md.init_IRQ by iSeries_setup.c - * It must be called before the bus walk. - */ -void __init iSeries_init_IRQ(void) -{ - /* Register PCI event handler and open an event path */ - int ret; - - ret = HvLpEvent_registerHandler(HvLpEvent_Type_PciIo, - &pci_event_handler); - if (ret == 0) { - ret = HvLpEvent_openPath(HvLpEvent_Type_PciIo, 0); - if (ret != 0) - printk(KERN_ERR "iseries_init_IRQ: open event path " - "failed with rc 0x%x\n", ret); - } else - printk(KERN_ERR "iseries_init_IRQ: register handler " - "failed with rc 0x%x\n", ret); -} - #define REAL_IRQ_TO_SUBBUS(irq) (((irq) >> 14) & 0xff) #define REAL_IRQ_TO_BUS(irq) ((((irq) >> 6) & 0xff) + 1) #define REAL_IRQ_TO_IDSEL(irq) ((((irq) >> 3) & 7) + 1) @@ -196,7 +175,7 @@ static void iseries_enable_IRQ(unsigned int irq) { u32 bus, dev_id, function, mask; const u32 sub_bus = 0; - unsigned int rirq = virt_irq_to_real_map[irq]; + unsigned int rirq = (unsigned int)irq_map[irq].hwirq; /* The IRQ has already been locked by the caller */ bus = REAL_IRQ_TO_BUS(rirq); @@ -213,7 +192,7 @@ static unsigned int iseries_startup_IRQ(unsigned int irq) { u32 bus, dev_id, function, mask; const u32 sub_bus = 0; - unsigned int rirq = virt_irq_to_real_map[irq]; + unsigned int rirq = (unsigned int)irq_map[irq].hwirq; bus = REAL_IRQ_TO_BUS(rirq); function = REAL_IRQ_TO_FUNC(rirq); @@ -254,7 +233,7 @@ static void iseries_shutdown_IRQ(unsigned int irq) { u32 bus, dev_id, function, mask; const u32 sub_bus = 0; - unsigned int rirq = virt_irq_to_real_map[irq]; + unsigned int rirq = (unsigned int)irq_map[irq].hwirq; /* irq should be locked by the caller */ bus = REAL_IRQ_TO_BUS(rirq); @@ -277,7 +256,7 @@ static void iseries_disable_IRQ(unsigned int irq) { u32 bus, dev_id, function, mask; const u32 sub_bus = 0; - unsigned int rirq = virt_irq_to_real_map[irq]; + unsigned int rirq = (unsigned int)irq_map[irq].hwirq; /* The IRQ has already been locked by the caller */ bus = REAL_IRQ_TO_BUS(rirq); @@ -291,19 +270,19 @@ static void iseries_disable_IRQ(unsigned int irq) static void iseries_end_IRQ(unsigned int irq) { - unsigned int rirq = virt_irq_to_real_map[irq]; + unsigned int rirq = (unsigned int)irq_map[irq].hwirq; HvCallPci_eoi(REAL_IRQ_TO_BUS(rirq), REAL_IRQ_TO_SUBBUS(rirq), (REAL_IRQ_TO_IDSEL(rirq) << 4) + REAL_IRQ_TO_FUNC(rirq)); } -static hw_irq_controller iSeries_IRQ_handler = { - .typename = "iSeries irq controller", - .startup = iseries_startup_IRQ, - .shutdown = iseries_shutdown_IRQ, - .enable = iseries_enable_IRQ, - .disable = iseries_disable_IRQ, - .end = iseries_end_IRQ +static struct irq_chip iseries_pic = { + .typename = "iSeries irq controller", + .startup = iseries_startup_IRQ, + .shutdown = iseries_shutdown_IRQ, + .unmask = iseries_enable_IRQ, + .mask = iseries_disable_IRQ, + .eoi = iseries_end_IRQ }; /* @@ -314,17 +293,14 @@ static hw_irq_controller iSeries_IRQ_handler = { int __init iSeries_allocate_IRQ(HvBusNumber bus, HvSubBusNumber sub_bus, u32 bsubbus) { - int virtirq; unsigned int realirq; u8 idsel = ISERIES_GET_DEVICE_FROM_SUBBUS(bsubbus); u8 function = ISERIES_GET_FUNCTION_FROM_SUBBUS(bsubbus); realirq = (((((sub_bus << 8) + (bus - 1)) << 3) + (idsel - 1)) << 3) + function; - virtirq = virt_irq_create_mapping(realirq); - irq_desc[virtirq].chip = &iSeries_IRQ_handler; - return virtirq; + return irq_create_mapping(NULL, realirq, IRQ_TYPE_NONE); } #endif /* CONFIG_PCI */ @@ -332,10 +308,9 @@ int __init iSeries_allocate_IRQ(HvBusNumber bus, /* * Get the next pending IRQ. */ -int iSeries_get_irq(struct pt_regs *regs) +unsigned int iSeries_get_irq(struct pt_regs *regs) { - /* -2 means ignore this interrupt */ - int irq = -2; + int irq = NO_IRQ_IGNORE; #ifdef CONFIG_SMP if (get_lppaca()->int_dword.fields.ipi_cnt) { @@ -358,9 +333,57 @@ int iSeries_get_irq(struct pt_regs *regs) } spin_unlock(&pending_irqs_lock); if (irq >= NR_IRQS) - irq = -2; + irq = NO_IRQ_IGNORE; } #endif return irq; } + +static int iseries_irq_host_map(struct irq_host *h, unsigned int virq, + irq_hw_number_t hw, unsigned int flags) +{ + set_irq_chip_and_handler(virq, &iseries_pic, handle_fasteoi_irq); + + return 0; +} + +static struct irq_host_ops iseries_irq_host_ops = { + .map = iseries_irq_host_map, +}; + +/* + * This is called by init_IRQ. set in ppc_md.init_IRQ by iSeries_setup.c + * It must be called before the bus walk. + */ +void __init iSeries_init_IRQ(void) +{ + /* Register PCI event handler and open an event path */ + struct irq_host *host; + int ret; + + /* + * The Hypervisor only allows us up to 256 interrupt + * sources (the irq number is passed in a u8). + */ + irq_set_virq_count(256); + + /* Create irq host. No need for a revmap since HV will give us + * back our virtual irq number + */ + host = irq_alloc_host(IRQ_HOST_MAP_NOMAP, 0, &iseries_irq_host_ops, 0); + BUG_ON(host == NULL); + irq_set_default_host(host); + + ret = HvLpEvent_registerHandler(HvLpEvent_Type_PciIo, + &pci_event_handler); + if (ret == 0) { + ret = HvLpEvent_openPath(HvLpEvent_Type_PciIo, 0); + if (ret != 0) + printk(KERN_ERR "iseries_init_IRQ: open event path " + "failed with rc 0x%x\n", ret); + } else + printk(KERN_ERR "iseries_init_IRQ: register handler " + "failed with rc 0x%x\n", ret); +} + diff --git a/arch/powerpc/platforms/iseries/irq.h b/arch/powerpc/platforms/iseries/irq.h index 188aa808abd7..1ee8985140e5 100644 --- a/arch/powerpc/platforms/iseries/irq.h +++ b/arch/powerpc/platforms/iseries/irq.h @@ -4,6 +4,6 @@ extern void iSeries_init_IRQ(void); extern int iSeries_allocate_IRQ(HvBusNumber, HvSubBusNumber, u32); extern void iSeries_activate_IRQs(void); -extern int iSeries_get_irq(struct pt_regs *); +extern unsigned int iSeries_get_irq(struct pt_regs *); #endif /* _ISERIES_IRQ_H */ diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c index c877074745b2..c9605d773a77 100644 --- a/arch/powerpc/platforms/iseries/setup.c +++ b/arch/powerpc/platforms/iseries/setup.c @@ -294,8 +294,6 @@ static void __init iSeries_init_early(void) { DBG(" -> iSeries_init_early()\n"); - ppc64_interrupt_controller = IC_ISERIES; - #if defined(CONFIG_BLK_DEV_INITRD) /* * If the init RAM disk has been configured and there is @@ -659,12 +657,6 @@ static int __init iseries_probe(void) powerpc_firmware_features |= FW_FEATURE_ISERIES; powerpc_firmware_features |= FW_FEATURE_LPAR; - /* - * The Hypervisor only allows us up to 256 interrupt - * sources (the irq number is passed in a u8). - */ - virt_irq_max = 255; - hpte_init_iSeries(); return 1; diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c index f7170ff86dab..63a1670d3bfd 100644 --- a/arch/powerpc/platforms/maple/pci.c +++ b/arch/powerpc/platforms/maple/pci.c @@ -443,18 +443,23 @@ void __init maple_pci_init(void) int maple_pci_get_legacy_ide_irq(struct pci_dev *pdev, int channel) { struct device_node *np; - int irq = channel ? 15 : 14; + unsigned int defirq = channel ? 15 : 14; + unsigned int irq; if (pdev->vendor != PCI_VENDOR_ID_AMD || pdev->device != PCI_DEVICE_ID_AMD_8111_IDE) - return irq; + return defirq; np = pci_device_to_OF_node(pdev); if (np == NULL) - return irq; - if (np->n_intrs < 2) - return irq; - return np->intrs[channel & 0x1].line; + return defirq; + irq = irq_of_parse_and_map(np, channel & 0x1); + if (irq == NO_IRQ) { + printk("Failed to map onboard IDE interrupt for channel %d\n", + channel); + return defirq; + } + return irq; } /* XXX: To remove once all firmwares are ok */ diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index 5cf90c28b141..cb528c9de4c3 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -11,7 +11,7 @@ * */ -#define DEBUG +#undef DEBUG #include <linux/init.h> #include <linux/errno.h> @@ -198,50 +198,81 @@ static void __init maple_init_early(void) { DBG(" -> maple_init_early\n"); - /* Setup interrupt mapping options */ - ppc64_interrupt_controller = IC_OPEN_PIC; - iommu_init_early_dart(); DBG(" <- maple_init_early\n"); } - -static __init void maple_init_IRQ(void) +/* + * This is almost identical to pSeries and CHRP. We need to make that + * code generic at one point, with appropriate bits in the device-tree to + * identify the presence of an HT APIC + */ +static void __init maple_init_IRQ(void) { - struct device_node *root; + struct device_node *root, *np, *mpic_node = NULL; unsigned int *opprop; - unsigned long opic_addr; + unsigned long openpic_addr = 0; + int naddr, n, i, opplen, has_isus = 0; struct mpic *mpic; - unsigned char senses[128]; - int n; + unsigned int flags = MPIC_PRIMARY; - DBG(" -> maple_init_IRQ\n"); + /* Locate MPIC in the device-tree. Note that there is a bug + * in Maple device-tree where the type of the controller is + * open-pic and not interrupt-controller + */ + for_each_node_by_type(np, "open-pic") { + mpic_node = np; + break; + } + if (mpic_node == NULL) { + printk(KERN_ERR + "Failed to locate the MPIC interrupt controller\n"); + return; + } - /* XXX: Non standard, replace that with a proper openpic/mpic node - * in the device-tree. Find the Open PIC if present */ + /* Find address list in /platform-open-pic */ root = of_find_node_by_path("/"); - opprop = (unsigned int *) get_property(root, - "platform-open-pic", NULL); - if (opprop == 0) - panic("OpenPIC not found !\n"); - - n = prom_n_addr_cells(root); - for (opic_addr = 0; n > 0; --n) - opic_addr = (opic_addr << 32) + *opprop++; + naddr = prom_n_addr_cells(root); + opprop = (unsigned int *) get_property(root, "platform-open-pic", + &opplen); + if (opprop != 0) { + openpic_addr = of_read_number(opprop, naddr); + has_isus = (opplen > naddr); + printk(KERN_DEBUG "OpenPIC addr: %lx, has ISUs: %d\n", + openpic_addr, has_isus); + } of_node_put(root); - /* Obtain sense values from device-tree */ - prom_get_irq_senses(senses, 0, 128); + BUG_ON(openpic_addr == 0); + + /* Check for a big endian MPIC */ + if (get_property(np, "big-endian", NULL) != NULL) + flags |= MPIC_BIG_ENDIAN; - mpic = mpic_alloc(opic_addr, - MPIC_PRIMARY | MPIC_BIG_ENDIAN | - MPIC_BROKEN_U3 | MPIC_WANTS_RESET, - 0, 0, 128, 128, senses, 128, "U3-MPIC"); + /* XXX Maple specific bits */ + flags |= MPIC_BROKEN_U3 | MPIC_WANTS_RESET; + + /* Setup the openpic driver. More device-tree junks, we hard code no + * ISUs for now. I'll have to revisit some stuffs with the folks doing + * the firmware for those + */ + mpic = mpic_alloc(mpic_node, openpic_addr, flags, + /*has_isus ? 16 :*/ 0, 0, " MPIC "); BUG_ON(mpic == NULL); - mpic_init(mpic); - DBG(" <- maple_init_IRQ\n"); + /* Add ISUs */ + opplen /= sizeof(u32); + for (n = 0, i = naddr; i < opplen; i += naddr, n++) { + unsigned long isuaddr = of_read_number(opprop + i, naddr); + mpic_assign_isu(mpic, n, isuaddr); + } + + /* All ISUs are setup, complete initialization */ + mpic_init(mpic); + ppc_md.get_irq = mpic_get_irq; + of_node_put(mpic_node); + of_node_put(root); } static void __init maple_progress(char *s, unsigned short hex) @@ -256,7 +287,9 @@ static void __init maple_progress(char *s, unsigned short hex) static int __init maple_probe(void) { unsigned long root = of_get_flat_dt_root(); - if (!of_flat_dt_is_compatible(root, "Momentum,Maple")) + + if (!of_flat_dt_is_compatible(root, "Momentum,Maple") && + !of_flat_dt_is_compatible(root, "Momentum,Apache")) return 0; /* * On U3, the DART (iommu) must be allocated now since it @@ -277,7 +310,6 @@ define_machine(maple_md) { .setup_arch = maple_setup_arch, .init_early = maple_init_early, .init_IRQ = maple_init_IRQ, - .get_irq = mpic_get_irq, .pcibios_fixup = maple_pcibios_fixup, .pci_get_legacy_ide_irq = maple_pci_get_legacy_ide_irq, .restart = maple_restart, diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c index 24f09e2a5775..871b002c9f90 100644 --- a/arch/powerpc/platforms/powermac/bootx_init.c +++ b/arch/powerpc/platforms/powermac/bootx_init.c @@ -162,6 +162,8 @@ static void __init bootx_add_chosen_props(unsigned long base, { u32 val; + bootx_dt_add_prop("linux,bootx", NULL, 0, mem_end); + if (bootx_info->kernelParamsOffset) { char *args = (char *)((unsigned long)bootx_info) + bootx_info->kernelParamsOffset; @@ -181,8 +183,25 @@ static void __init bootx_add_chosen_props(unsigned long base, static void __init bootx_add_display_props(unsigned long base, unsigned long *mem_end) { + boot_infos_t *bi = bootx_info; + u32 tmp; + bootx_dt_add_prop("linux,boot-display", NULL, 0, mem_end); bootx_dt_add_prop("linux,opened", NULL, 0, mem_end); + tmp = bi->dispDeviceDepth; + bootx_dt_add_prop("linux,bootx-depth", &tmp, 4, mem_end); + tmp = bi->dispDeviceRect[2] - bi->dispDeviceRect[0]; + bootx_dt_add_prop("linux,bootx-width", &tmp, 4, mem_end); + tmp = bi->dispDeviceRect[3] - bi->dispDeviceRect[1]; + bootx_dt_add_prop("linux,bootx-height", &tmp, 4, mem_end); + tmp = bi->dispDeviceRowBytes; + bootx_dt_add_prop("linux,bootx-linebytes", &tmp, 4, mem_end); + tmp = (u32)bi->dispDeviceBase; + if (tmp == 0) + tmp = (u32)bi->logicalDisplayBase; + tmp += bi->dispDeviceRect[1] * bi->dispDeviceRowBytes; + tmp += bi->dispDeviceRect[0] * ((bi->dispDeviceDepth + 7) / 8); + bootx_dt_add_prop("linux,bootx-addr", &tmp, 4, mem_end); } static void __init bootx_dt_add_string(char *s, unsigned long *mem_end) @@ -211,7 +230,7 @@ static void __init bootx_scan_dt_build_strings(unsigned long base, if (!strcmp(namep, "/chosen")) { DBG(" detected /chosen ! adding properties names !\n"); - bootx_dt_add_string("linux,platform", mem_end); + bootx_dt_add_string("linux,bootx", mem_end); bootx_dt_add_string("linux,stdout-path", mem_end); bootx_dt_add_string("linux,initrd-start", mem_end); bootx_dt_add_string("linux,initrd-end", mem_end); @@ -222,6 +241,11 @@ static void __init bootx_scan_dt_build_strings(unsigned long base, DBG(" detected display ! adding properties names !\n"); bootx_dt_add_string("linux,boot-display", mem_end); bootx_dt_add_string("linux,opened", mem_end); + bootx_dt_add_string("linux,bootx-depth", mem_end); + bootx_dt_add_string("linux,bootx-width", mem_end); + bootx_dt_add_string("linux,bootx-height", mem_end); + bootx_dt_add_string("linux,bootx-linebytes", mem_end); + bootx_dt_add_string("linux,bootx-addr", mem_end); strncpy(bootx_disp_path, namep, 255); } @@ -443,7 +467,14 @@ void __init bootx_init(unsigned long r3, unsigned long r4) if (!BOOT_INFO_IS_V2_COMPATIBLE(bi)) bi->logicalDisplayBase = bi->dispDeviceBase; + /* Fixup depth 16 -> 15 as that's what MacOS calls 16bpp */ + if (bi->dispDeviceDepth == 16) + bi->dispDeviceDepth = 15; + #ifdef CONFIG_BOOTX_TEXT + ptr = (unsigned long)bi->logicalDisplayBase; + ptr += bi->dispDeviceRect[1] * bi->dispDeviceRowBytes; + ptr += bi->dispDeviceRect[0] * ((bi->dispDeviceDepth + 7) / 8); btext_setup_display(bi->dispDeviceRect[2] - bi->dispDeviceRect[0], bi->dispDeviceRect[3] - bi->dispDeviceRect[1], bi->dispDeviceDepth, bi->dispDeviceRowBytes, diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c index ceafaf52a668..8677f50c2586 100644 --- a/arch/powerpc/platforms/powermac/low_i2c.c +++ b/arch/powerpc/platforms/powermac/low_i2c.c @@ -522,10 +522,11 @@ static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np) host->speed = KW_I2C_MODE_25KHZ; break; } - if (np->n_intrs > 0) - host->irq = np->intrs[0].line; - else - host->irq = NO_IRQ; + host->irq = irq_of_parse_and_map(np, 0); + if (host->irq == NO_IRQ) + printk(KERN_WARNING + "low_i2c: Failed to map interrupt for %s\n", + np->full_name); host->base = ioremap((*addrp), 0x1000); if (host->base == NULL) { diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c index 41fa2409482a..6a36ea9bf673 100644 --- a/arch/powerpc/platforms/powermac/nvram.c +++ b/arch/powerpc/platforms/powermac/nvram.c @@ -29,6 +29,8 @@ #include <asm/machdep.h> #include <asm/nvram.h> +#include "pmac.h" + #define DEBUG #ifdef DEBUG @@ -80,9 +82,6 @@ static int nvram_partitions[3]; // XXX Turn that into a sem static DEFINE_SPINLOCK(nv_lock); -extern int pmac_newworld; -extern int system_running; - static int (*core99_write_bank)(int bank, u8* datas); static int (*core99_erase_bank)(int bank); diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c index d524a915aa86..556b349797e8 100644 --- a/arch/powerpc/platforms/powermac/pci.c +++ b/arch/powerpc/platforms/powermac/pci.c @@ -46,6 +46,9 @@ static int has_uninorth; static struct pci_controller *u3_agp; static struct pci_controller *u4_pcie; static struct pci_controller *u3_ht; +#define has_second_ohare 0 +#else +static int has_second_ohare; #endif /* CONFIG_PPC64 */ extern u8 pci_cache_line_size; @@ -647,6 +650,33 @@ static void __init init_p2pbridge(void) early_write_config_word(hose, bus, devfn, PCI_BRIDGE_CONTROL, val); } +static void __init init_second_ohare(void) +{ + struct device_node *np = of_find_node_by_name(NULL, "pci106b,7"); + unsigned char bus, devfn; + unsigned short cmd; + + if (np == NULL) + return; + + /* This must run before we initialize the PICs since the second + * ohare hosts a PIC that will be accessed there. + */ + if (pci_device_from_OF_node(np, &bus, &devfn) == 0) { + struct pci_controller* hose = + pci_find_hose_for_OF_device(np); + if (!hose) { + printk(KERN_ERR "Can't find PCI hose for OHare2 !\n"); + return; + } + early_read_config_word(hose, bus, devfn, PCI_COMMAND, &cmd); + cmd |= PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER; + cmd &= ~PCI_COMMAND_IO; + early_write_config_word(hose, bus, devfn, PCI_COMMAND, cmd); + } + has_second_ohare = 1; +} + /* * Some Apple desktop machines have a NEC PD720100A USB2 controller * on the motherboard. Open Firmware, on these, will disable the @@ -688,9 +718,6 @@ static void __init fixup_nec_usb2(void) " EHCI, fixing up...\n"); data &= ~1UL; early_write_config_dword(hose, bus, devfn, 0xe4, data); - early_write_config_byte(hose, bus, - devfn | 2, PCI_INTERRUPT_LINE, - nec->intrs[0].line); } } } @@ -958,32 +985,28 @@ static int __init add_bridge(struct device_node *dev) return 0; } -static void __init pcibios_fixup_OF_interrupts(void) +void __init pmac_pcibios_fixup(void) { struct pci_dev* dev = NULL; - /* - * Open Firmware often doesn't initialize the - * PCI_INTERRUPT_LINE config register properly, so we - * should find the device node and apply the interrupt - * obtained from the OF device-tree - */ for_each_pci_dev(dev) { - struct device_node *node; - node = pci_device_to_OF_node(dev); - /* this is the node, see if it has interrupts */ - if (node && node->n_intrs > 0) - dev->irq = node->intrs[0].line; - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq); + /* Read interrupt from the device-tree */ + pci_read_irq_line(dev); + + /* Fixup interrupt for the modem/ethernet combo controller. + * on machines with a second ohare chip. + * The number in the device tree (27) is bogus (correct for + * the ethernet-only board but not the combo ethernet/modem + * board). The real interrupt is 28 on the second controller + * -> 28+32 = 60. + */ + if (has_second_ohare && + dev->vendor == PCI_VENDOR_ID_DEC && + dev->device == PCI_DEVICE_ID_DEC_TULIP_PLUS) + dev->irq = irq_create_mapping(NULL, 60, 0); } } -void __init pmac_pcibios_fixup(void) -{ - /* Fixup interrupts according to OF tree */ - pcibios_fixup_OF_interrupts(); -} - #ifdef CONFIG_PPC64 static void __init pmac_fixup_phb_resources(void) { @@ -1071,6 +1094,7 @@ void __init pmac_pci_init(void) #else /* CONFIG_PPC64 */ init_p2pbridge(); + init_second_ohare(); fixup_nec_usb2(); /* We are still having some issues with the Xserve G4, enabling diff --git a/arch/powerpc/platforms/powermac/pfunc_base.c b/arch/powerpc/platforms/powermac/pfunc_base.c index d6eab8b3f7de..6d66359ec8c8 100644 --- a/arch/powerpc/platforms/powermac/pfunc_base.c +++ b/arch/powerpc/platforms/powermac/pfunc_base.c @@ -24,19 +24,18 @@ static irqreturn_t macio_gpio_irq(int irq, void *data, struct pt_regs *regs) static int macio_do_gpio_irq_enable(struct pmf_function *func) { - if (func->node->n_intrs < 1) + unsigned int irq = irq_of_parse_and_map(func->node, 0); + if (irq == NO_IRQ) return -EINVAL; - - return request_irq(func->node->intrs[0].line, macio_gpio_irq, 0, - func->node->name, func); + return request_irq(irq, macio_gpio_irq, 0, func->node->name, func); } static int macio_do_gpio_irq_disable(struct pmf_function *func) { - if (func->node->n_intrs < 1) + unsigned int irq = irq_of_parse_and_map(func->node, 0); + if (irq == NO_IRQ) return -EINVAL; - - free_irq(func->node->intrs[0].line, func); + free_irq(irq, func); return 0; } diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c index c9b09a9e6050..3d328bc1f7e0 100644 --- a/arch/powerpc/platforms/powermac/pic.c +++ b/arch/powerpc/platforms/powermac/pic.c @@ -65,39 +65,36 @@ static u32 level_mask[4]; static DEFINE_SPINLOCK(pmac_pic_lock); -#define GATWICK_IRQ_POOL_SIZE 10 -static struct interrupt_info gatwick_int_pool[GATWICK_IRQ_POOL_SIZE]; - #define NR_MASK_WORDS ((NR_IRQS + 31) / 32) static unsigned long ppc_lost_interrupts[NR_MASK_WORDS]; +static unsigned long ppc_cached_irq_mask[NR_MASK_WORDS]; +static int pmac_irq_cascade = -1; +static struct irq_host *pmac_pic_host; -/* - * Mark an irq as "lost". This is only used on the pmac - * since it can lose interrupts (see pmac_set_irq_mask). - * -- Cort - */ -void __set_lost(unsigned long irq_nr, int nokick) +static void __pmac_retrigger(unsigned int irq_nr) { - if (!test_and_set_bit(irq_nr, ppc_lost_interrupts)) { + if (irq_nr >= max_real_irqs && pmac_irq_cascade > 0) { + __set_bit(irq_nr, ppc_lost_interrupts); + irq_nr = pmac_irq_cascade; + mb(); + } + if (!__test_and_set_bit(irq_nr, ppc_lost_interrupts)) { atomic_inc(&ppc_n_lost_interrupts); - if (!nokick) - set_dec(1); + set_dec(1); } } -static void pmac_mask_and_ack_irq(unsigned int irq_nr) +static void pmac_mask_and_ack_irq(unsigned int virq) { - unsigned long bit = 1UL << (irq_nr & 0x1f); - int i = irq_nr >> 5; + unsigned int src = irq_map[virq].hwirq; + unsigned long bit = 1UL << (virq & 0x1f); + int i = virq >> 5; unsigned long flags; - if ((unsigned)irq_nr >= max_irqs) - return; - - clear_bit(irq_nr, ppc_cached_irq_mask); - if (test_and_clear_bit(irq_nr, ppc_lost_interrupts)) - atomic_dec(&ppc_n_lost_interrupts); spin_lock_irqsave(&pmac_pic_lock, flags); + __clear_bit(src, ppc_cached_irq_mask); + if (__test_and_clear_bit(src, ppc_lost_interrupts)) + atomic_dec(&ppc_n_lost_interrupts); out_le32(&pmac_irq_hw[i]->enable, ppc_cached_irq_mask[i]); out_le32(&pmac_irq_hw[i]->ack, bit); do { @@ -109,16 +106,29 @@ static void pmac_mask_and_ack_irq(unsigned int irq_nr) spin_unlock_irqrestore(&pmac_pic_lock, flags); } -static void pmac_set_irq_mask(unsigned int irq_nr, int nokicklost) +static void pmac_ack_irq(unsigned int virq) +{ + unsigned int src = irq_map[virq].hwirq; + unsigned long bit = 1UL << (src & 0x1f); + int i = src >> 5; + unsigned long flags; + + spin_lock_irqsave(&pmac_pic_lock, flags); + if (__test_and_clear_bit(src, ppc_lost_interrupts)) + atomic_dec(&ppc_n_lost_interrupts); + out_le32(&pmac_irq_hw[i]->ack, bit); + (void)in_le32(&pmac_irq_hw[i]->ack); + spin_unlock_irqrestore(&pmac_pic_lock, flags); +} + +static void __pmac_set_irq_mask(unsigned int irq_nr, int nokicklost) { unsigned long bit = 1UL << (irq_nr & 0x1f); int i = irq_nr >> 5; - unsigned long flags; if ((unsigned)irq_nr >= max_irqs) return; - spin_lock_irqsave(&pmac_pic_lock, flags); /* enable unmasked interrupts */ out_le32(&pmac_irq_hw[i]->enable, ppc_cached_irq_mask[i]); @@ -135,71 +145,78 @@ static void pmac_set_irq_mask(unsigned int irq_nr, int nokicklost) * the bit in the flag register or request another interrupt. */ if (bit & ppc_cached_irq_mask[i] & in_le32(&pmac_irq_hw[i]->level)) - __set_lost((ulong)irq_nr, nokicklost); - spin_unlock_irqrestore(&pmac_pic_lock, flags); + __pmac_retrigger(irq_nr); } /* When an irq gets requested for the first client, if it's an * edge interrupt, we clear any previous one on the controller */ -static unsigned int pmac_startup_irq(unsigned int irq_nr) +static unsigned int pmac_startup_irq(unsigned int virq) { - unsigned long bit = 1UL << (irq_nr & 0x1f); - int i = irq_nr >> 5; + unsigned long flags; + unsigned int src = irq_map[virq].hwirq; + unsigned long bit = 1UL << (src & 0x1f); + int i = src >> 5; - if ((irq_desc[irq_nr].status & IRQ_LEVEL) == 0) + spin_lock_irqsave(&pmac_pic_lock, flags); + if ((irq_desc[virq].status & IRQ_LEVEL) == 0) out_le32(&pmac_irq_hw[i]->ack, bit); - set_bit(irq_nr, ppc_cached_irq_mask); - pmac_set_irq_mask(irq_nr, 0); + __set_bit(src, ppc_cached_irq_mask); + __pmac_set_irq_mask(src, 0); + spin_unlock_irqrestore(&pmac_pic_lock, flags); return 0; } -static void pmac_mask_irq(unsigned int irq_nr) +static void pmac_mask_irq(unsigned int virq) { - clear_bit(irq_nr, ppc_cached_irq_mask); - pmac_set_irq_mask(irq_nr, 0); - mb(); + unsigned long flags; + unsigned int src = irq_map[virq].hwirq; + + spin_lock_irqsave(&pmac_pic_lock, flags); + __clear_bit(src, ppc_cached_irq_mask); + __pmac_set_irq_mask(src, 0); + spin_unlock_irqrestore(&pmac_pic_lock, flags); } -static void pmac_unmask_irq(unsigned int irq_nr) +static void pmac_unmask_irq(unsigned int virq) { - set_bit(irq_nr, ppc_cached_irq_mask); - pmac_set_irq_mask(irq_nr, 0); + unsigned long flags; + unsigned int src = irq_map[virq].hwirq; + + spin_lock_irqsave(&pmac_pic_lock, flags); + __set_bit(src, ppc_cached_irq_mask); + __pmac_set_irq_mask(src, 0); + spin_unlock_irqrestore(&pmac_pic_lock, flags); } -static void pmac_end_irq(unsigned int irq_nr) +static int pmac_retrigger(unsigned int virq) { - if (!(irq_desc[irq_nr].status & (IRQ_DISABLED|IRQ_INPROGRESS)) - && irq_desc[irq_nr].action) { - set_bit(irq_nr, ppc_cached_irq_mask); - pmac_set_irq_mask(irq_nr, 1); - } -} + unsigned long flags; + spin_lock_irqsave(&pmac_pic_lock, flags); + __pmac_retrigger(irq_map[virq].hwirq); + spin_unlock_irqrestore(&pmac_pic_lock, flags); + return 1; +} -struct hw_interrupt_type pmac_pic = { +static struct irq_chip pmac_pic = { .typename = " PMAC-PIC ", .startup = pmac_startup_irq, - .enable = pmac_unmask_irq, - .disable = pmac_mask_irq, - .ack = pmac_mask_and_ack_irq, - .end = pmac_end_irq, -}; - -struct hw_interrupt_type gatwick_pic = { - .typename = " GATWICK ", - .startup = pmac_startup_irq, - .enable = pmac_unmask_irq, - .disable = pmac_mask_irq, - .ack = pmac_mask_and_ack_irq, - .end = pmac_end_irq, + .mask = pmac_mask_irq, + .ack = pmac_ack_irq, + .mask_ack = pmac_mask_and_ack_irq, + .unmask = pmac_unmask_irq, + .retrigger = pmac_retrigger, }; static irqreturn_t gatwick_action(int cpl, void *dev_id, struct pt_regs *regs) { + unsigned long flags; int irq, bits; + int rc = IRQ_NONE; + spin_lock_irqsave(&pmac_pic_lock, flags); for (irq = max_irqs; (irq -= 32) >= max_real_irqs; ) { int i = irq >> 5; bits = in_le32(&pmac_irq_hw[i]->event) | ppc_lost_interrupts[i]; @@ -209,17 +226,20 @@ static irqreturn_t gatwick_action(int cpl, void *dev_id, struct pt_regs *regs) if (bits == 0) continue; irq += __ilog2(bits); + spin_unlock_irqrestore(&pmac_pic_lock, flags); __do_IRQ(irq, regs); - return IRQ_HANDLED; + spin_lock_irqsave(&pmac_pic_lock, flags); + rc = IRQ_HANDLED; } - printk("gatwick irq not from gatwick pic\n"); - return IRQ_NONE; + spin_unlock_irqrestore(&pmac_pic_lock, flags); + return rc; } -static int pmac_get_irq(struct pt_regs *regs) +static unsigned int pmac_pic_get_irq(struct pt_regs *regs) { int irq; unsigned long bits = 0; + unsigned long flags; #ifdef CONFIG_SMP void psurge_smp_message_recv(struct pt_regs *); @@ -227,9 +247,10 @@ static int pmac_get_irq(struct pt_regs *regs) /* IPI's are a hack on the powersurge -- Cort */ if ( smp_processor_id() != 0 ) { psurge_smp_message_recv(regs); - return -2; /* ignore, already handled */ + return NO_IRQ_IGNORE; /* ignore, already handled */ } #endif /* CONFIG_SMP */ + spin_lock_irqsave(&pmac_pic_lock, flags); for (irq = max_real_irqs; (irq -= 32) >= 0; ) { int i = irq >> 5; bits = in_le32(&pmac_irq_hw[i]->event) | ppc_lost_interrupts[i]; @@ -241,133 +262,10 @@ static int pmac_get_irq(struct pt_regs *regs) irq += __ilog2(bits); break; } - - return irq; -} - -/* This routine will fix some missing interrupt values in the device tree - * on the gatwick mac-io controller used by some PowerBooks - * - * Walking of OF nodes could use a bit more fixing up here, but it's not - * very important as this is all boot time code on static portions of the - * device-tree. - * - * However, the modifications done to "intrs" will have to be removed and - * replaced with proper updates of the "interrupts" properties or - * AAPL,interrupts, yet to be decided, once the dynamic parsing is there. - */ -static void __init pmac_fix_gatwick_interrupts(struct device_node *gw, - int irq_base) -{ - struct device_node *node; - int count; - - memset(gatwick_int_pool, 0, sizeof(gatwick_int_pool)); - count = 0; - for (node = NULL; (node = of_get_next_child(gw, node)) != NULL;) { - /* Fix SCC */ - if ((strcasecmp(node->name, "escc") == 0) && node->child) { - if (node->child->n_intrs < 3) { - node->child->intrs = &gatwick_int_pool[count]; - count += 3; - } - node->child->n_intrs = 3; - node->child->intrs[0].line = 15+irq_base; - node->child->intrs[1].line = 4+irq_base; - node->child->intrs[2].line = 5+irq_base; - printk(KERN_INFO "irq: fixed SCC on gatwick" - " (%d,%d,%d)\n", - node->child->intrs[0].line, - node->child->intrs[1].line, - node->child->intrs[2].line); - } - /* Fix media-bay & left SWIM */ - if (strcasecmp(node->name, "media-bay") == 0) { - struct device_node* ya_node; - - if (node->n_intrs == 0) - node->intrs = &gatwick_int_pool[count++]; - node->n_intrs = 1; - node->intrs[0].line = 29+irq_base; - printk(KERN_INFO "irq: fixed media-bay on gatwick" - " (%d)\n", node->intrs[0].line); - - ya_node = node->child; - while(ya_node) { - if (strcasecmp(ya_node->name, "floppy") == 0) { - if (ya_node->n_intrs < 2) { - ya_node->intrs = &gatwick_int_pool[count]; - count += 2; - } - ya_node->n_intrs = 2; - ya_node->intrs[0].line = 19+irq_base; - ya_node->intrs[1].line = 1+irq_base; - printk(KERN_INFO "irq: fixed floppy on second controller (%d,%d)\n", - ya_node->intrs[0].line, ya_node->intrs[1].line); - } - if (strcasecmp(ya_node->name, "ata4") == 0) { - if (ya_node->n_intrs < 2) { - ya_node->intrs = &gatwick_int_pool[count]; - count += 2; - } - ya_node->n_intrs = 2; - ya_node->intrs[0].line = 14+irq_base; - ya_node->intrs[1].line = 3+irq_base; - printk(KERN_INFO "irq: fixed ide on second controller (%d,%d)\n", - ya_node->intrs[0].line, ya_node->intrs[1].line); - } - ya_node = ya_node->sibling; - } - } - } - if (count > 10) { - printk("WARNING !! Gatwick interrupt pool overflow\n"); - printk(" GATWICK_IRQ_POOL_SIZE = %d\n", GATWICK_IRQ_POOL_SIZE); - printk(" requested = %d\n", count); - } -} - -/* - * The PowerBook 3400/2400/3500 can have a combo ethernet/modem - * card which includes an ohare chip that acts as a second interrupt - * controller. If we find this second ohare, set it up and fix the - * interrupt value in the device tree for the ethernet chip. - */ -static void __init enable_second_ohare(struct device_node *np) -{ - unsigned char bus, devfn; - unsigned short cmd; - struct device_node *ether; - - /* This code doesn't strictly belong here, it could be part of - * either the PCI initialisation or the feature code. It's kept - * here for historical reasons. - */ - if (pci_device_from_OF_node(np, &bus, &devfn) == 0) { - struct pci_controller* hose = - pci_find_hose_for_OF_device(np); - if (!hose) { - printk(KERN_ERR "Can't find PCI hose for OHare2 !\n"); - return; - } - early_read_config_word(hose, bus, devfn, PCI_COMMAND, &cmd); - cmd |= PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER; - cmd &= ~PCI_COMMAND_IO; - early_write_config_word(hose, bus, devfn, PCI_COMMAND, cmd); - } - - /* Fix interrupt for the modem/ethernet combo controller. The number - * in the device tree (27) is bogus (correct for the ethernet-only - * board but not the combo ethernet/modem board). - * The real interrupt is 28 on the second controller -> 28+32 = 60. - */ - ether = of_find_node_by_name(NULL, "pci1011,14"); - if (ether && ether->n_intrs > 0) { - ether->intrs[0].line = 60; - printk(KERN_INFO "irq: Fixed ethernet IRQ to %d\n", - ether->intrs[0].line); - } - of_node_put(ether); + spin_unlock_irqrestore(&pmac_pic_lock, flags); + if (unlikely(irq < 0)) + return NO_IRQ; + return irq_linear_revmap(pmac_pic_host, irq); } #ifdef CONFIG_XMON @@ -386,17 +284,60 @@ static struct irqaction gatwick_cascade_action = { .name = "cascade", }; +static int pmac_pic_host_match(struct irq_host *h, struct device_node *node) +{ + /* We match all, we don't always have a node anyway */ + return 1; +} + +static int pmac_pic_host_map(struct irq_host *h, unsigned int virq, + irq_hw_number_t hw, unsigned int flags) +{ + struct irq_desc *desc = get_irq_desc(virq); + int level; + + if (hw >= max_irqs) + return -EINVAL; + + /* Mark level interrupts, set delayed disable for edge ones and set + * handlers + */ + level = !!(level_mask[hw >> 5] & (1UL << (hw & 0x1f))); + if (level) + desc->status |= IRQ_LEVEL; + else + desc->status |= IRQ_DELAYED_DISABLE; + set_irq_chip_and_handler(virq, &pmac_pic, level ? + handle_level_irq : handle_edge_irq); + return 0; +} + +static int pmac_pic_host_xlate(struct irq_host *h, struct device_node *ct, + u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, + unsigned int *out_flags) + +{ + *out_hwirq = *intspec; + return 0; +} + +static struct irq_host_ops pmac_pic_host_ops = { + .match = pmac_pic_host_match, + .map = pmac_pic_host_map, + .xlate = pmac_pic_host_xlate, +}; + static void __init pmac_pic_probe_oldstyle(void) { int i; - int irq_cascade = -1; struct device_node *master = NULL; struct device_node *slave = NULL; u8 __iomem *addr; struct resource r; /* Set our get_irq function */ - ppc_md.get_irq = pmac_get_irq; + ppc_md.get_irq = pmac_pic_get_irq; /* * Find the interrupt controller type & node @@ -414,7 +355,6 @@ static void __init pmac_pic_probe_oldstyle(void) if (slave) { max_irqs = 64; level_mask[1] = OHARE_LEVEL_MASK; - enable_second_ohare(slave); } } else if ((master = of_find_node_by_name(NULL, "mac-io")) != NULL) { max_irqs = max_real_irqs = 64; @@ -438,14 +378,18 @@ static void __init pmac_pic_probe_oldstyle(void) max_irqs = 128; level_mask[2] = HEATHROW_LEVEL_MASK; level_mask[3] = 0; - pmac_fix_gatwick_interrupts(slave, max_real_irqs); } } BUG_ON(master == NULL); - /* Set the handler for the main PIC */ - for ( i = 0; i < max_real_irqs ; i++ ) - irq_desc[i].chip = &pmac_pic; + /* + * Allocate an irq host + */ + pmac_pic_host = irq_alloc_host(IRQ_HOST_MAP_LINEAR, max_irqs, + &pmac_pic_host_ops, + max_irqs); + BUG_ON(pmac_pic_host == NULL); + irq_set_default_host(pmac_pic_host); /* Get addresses of first controller if we have a node for it */ BUG_ON(of_address_to_resource(master, 0, &r)); @@ -472,39 +416,38 @@ static void __init pmac_pic_probe_oldstyle(void) pmac_irq_hw[i++] = (volatile struct pmac_irq_hw __iomem *) (addr + 0x10); - irq_cascade = slave->intrs[0].line; + pmac_irq_cascade = irq_of_parse_and_map(slave, 0); printk(KERN_INFO "irq: Found slave Apple PIC %s for %d irqs" " cascade: %d\n", slave->full_name, - max_irqs - max_real_irqs, irq_cascade); + max_irqs - max_real_irqs, pmac_irq_cascade); } of_node_put(slave); - /* disable all interrupts in all controllers */ + /* Disable all interrupts in all controllers */ for (i = 0; i * 32 < max_irqs; ++i) out_le32(&pmac_irq_hw[i]->enable, 0); - /* mark level interrupts */ - for (i = 0; i < max_irqs; i++) - if (level_mask[i >> 5] & (1UL << (i & 0x1f))) - irq_desc[i].status = IRQ_LEVEL; + /* Hookup cascade irq */ + if (slave && pmac_irq_cascade != NO_IRQ) + setup_irq(pmac_irq_cascade, &gatwick_cascade_action); - /* Setup handlers for secondary controller and hook cascade irq*/ - if (slave) { - for ( i = max_real_irqs ; i < max_irqs ; i++ ) - irq_desc[i].chip = &gatwick_pic; - setup_irq(irq_cascade, &gatwick_cascade_action); - } printk(KERN_INFO "irq: System has %d possible interrupts\n", max_irqs); #ifdef CONFIG_XMON - setup_irq(20, &xmon_action); + setup_irq(irq_create_mapping(NULL, 20, 0), &xmon_action); #endif } #endif /* CONFIG_PPC32 */ -static int pmac_u3_cascade(struct pt_regs *regs, void *data) +static void pmac_u3_cascade(unsigned int irq, struct irq_desc *desc, + struct pt_regs *regs) { - return mpic_get_one_irq((struct mpic *)data, regs); + struct mpic *mpic = desc->handler_data; + + unsigned int cascade_irq = mpic_get_one_irq(mpic, regs); + if (cascade_irq != NO_IRQ) + generic_handle_irq(cascade_irq, regs); + desc->chip->eoi(irq); } static void __init pmac_pic_setup_mpic_nmi(struct mpic *mpic) @@ -514,21 +457,20 @@ static void __init pmac_pic_setup_mpic_nmi(struct mpic *mpic) int nmi_irq; pswitch = of_find_node_by_name(NULL, "programmer-switch"); - if (pswitch && pswitch->n_intrs) { - nmi_irq = pswitch->intrs[0].line; - mpic_irq_set_priority(nmi_irq, 9); - setup_irq(nmi_irq, &xmon_action); + if (pswitch) { + nmi_irq = irq_of_parse_and_map(pswitch, 0); + if (nmi_irq != NO_IRQ) { + mpic_irq_set_priority(nmi_irq, 9); + setup_irq(nmi_irq, &xmon_action); + } + of_node_put(pswitch); } - of_node_put(pswitch); #endif /* defined(CONFIG_XMON) && defined(CONFIG_PPC32) */ } static struct mpic * __init pmac_setup_one_mpic(struct device_node *np, int master) { - unsigned char senses[128]; - int offset = master ? 0 : 128; - int count = master ? 128 : 124; const char *name = master ? " MPIC 1 " : " MPIC 2 "; struct resource r; struct mpic *mpic; @@ -541,8 +483,6 @@ static struct mpic * __init pmac_setup_one_mpic(struct device_node *np, pmac_call_feature(PMAC_FTR_ENABLE_MPIC, np, 0, 0); - prom_get_irq_senses(senses, offset, offset + count); - flags |= MPIC_WANTS_RESET; if (get_property(np, "big-endian", NULL)) flags |= MPIC_BIG_ENDIAN; @@ -553,8 +493,7 @@ static struct mpic * __init pmac_setup_one_mpic(struct device_node *np, if (master && (flags & MPIC_BIG_ENDIAN)) flags |= MPIC_BROKEN_U3; - mpic = mpic_alloc(r.start, flags, 0, offset, count, master ? 252 : 0, - senses, count, name); + mpic = mpic_alloc(np, r.start, flags, 0, 0, name); if (mpic == NULL) return NULL; @@ -567,6 +506,7 @@ static int __init pmac_pic_probe_mpic(void) { struct mpic *mpic1, *mpic2; struct device_node *np, *master = NULL, *slave = NULL; + unsigned int cascade; /* We can have up to 2 MPICs cascaded */ for (np = NULL; (np = of_find_node_by_type(np, "open-pic")) @@ -603,8 +543,15 @@ static int __init pmac_pic_probe_mpic(void) of_node_put(master); /* No slave, let's go out */ - if (slave == NULL || slave->n_intrs < 1) + if (slave == NULL) + return 0; + + /* Get/Map slave interrupt */ + cascade = irq_of_parse_and_map(slave, 0); + if (cascade == NO_IRQ) { + printk(KERN_ERR "Failed to map cascade IRQ\n"); return 0; + } mpic2 = pmac_setup_one_mpic(slave, 0); if (mpic2 == NULL) { @@ -612,7 +559,8 @@ static int __init pmac_pic_probe_mpic(void) of_node_put(slave); return 0; } - mpic_setup_cascade(slave->intrs[0].line, pmac_u3_cascade, mpic2); + set_irq_data(cascade, mpic2); + set_irq_chained_handler(cascade, pmac_u3_cascade); of_node_put(slave); return 0; @@ -621,6 +569,19 @@ static int __init pmac_pic_probe_mpic(void) void __init pmac_pic_init(void) { + unsigned int flags = 0; + + /* We configure the OF parsing based on our oldworld vs. newworld + * platform type and wether we were booted by BootX. + */ +#ifdef CONFIG_PPC32 + if (!pmac_newworld) + flags |= OF_IMAP_OLDWORLD_MAC; + if (get_property(of_chosen, "linux,bootx", NULL) != NULL) + flags |= OF_IMAP_NO_PHANDLE; + of_irq_map_init(flags); +#endif /* CONFIG_PPC_32 */ + /* We first try to detect Apple's new Core99 chipset, since mac-io * is quite different on those machines and contains an IBM MPIC2. */ @@ -643,6 +604,7 @@ unsigned long sleep_save_mask[2]; /* This used to be passed by the PMU driver but that link got * broken with the new driver model. We use this tweak for now... + * We really want to do things differently though... */ static int pmacpic_find_viaint(void) { @@ -656,7 +618,7 @@ static int pmacpic_find_viaint(void) np = of_find_node_by_name(NULL, "via-pmu"); if (np == NULL) goto not_found; - viaint = np->intrs[0].line; + viaint = irq_of_parse_and_map(np, 0);; #endif /* CONFIG_ADB_PMU */ not_found: diff --git a/arch/powerpc/platforms/powermac/pmac.h b/arch/powerpc/platforms/powermac/pmac.h index 21c7b0f8f329..94e7b24b840b 100644 --- a/arch/powerpc/platforms/powermac/pmac.h +++ b/arch/powerpc/platforms/powermac/pmac.h @@ -12,6 +12,8 @@ struct rtc_time; +extern int pmac_newworld; + extern long pmac_time_init(void); extern unsigned long pmac_get_boot_time(void); extern void pmac_get_rtc_time(struct rtc_time *); diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 8654b5f07836..31a9da769fa2 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -613,9 +613,6 @@ static void __init pmac_init_early(void) udbg_adb_init(!!strstr(cmd_line, "btextdbg")); #ifdef CONFIG_PPC64 - /* Setup interrupt mapping options */ - ppc64_interrupt_controller = IC_OPEN_PIC; - iommu_init_early_dart(); #endif } diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 9639c66b453d..9df783088b61 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -72,32 +72,62 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id, /* #define DEBUG */ -static void request_ras_irqs(struct device_node *np, char *propname, + +static void request_ras_irqs(struct device_node *np, irqreturn_t (*handler)(int, void *, struct pt_regs *), const char *name) { - unsigned int *ireg, len, i; - int virq, n_intr; - - ireg = (unsigned int *)get_property(np, propname, &len); - if (ireg == NULL) - return; - n_intr = prom_n_intr_cells(np); - len /= n_intr * sizeof(*ireg); - - for (i = 0; i < len; i++) { - virq = virt_irq_create_mapping(*ireg); - if (virq == NO_IRQ) { - printk(KERN_ERR "Unable to allocate interrupt " - "number for %s\n", np->full_name); - return; + int i, index, count = 0; + struct of_irq oirq; + u32 *opicprop; + unsigned int opicplen; + unsigned int virqs[16]; + + /* Check for obsolete "open-pic-interrupt" property. If present, then + * map those interrupts using the default interrupt host and default + * trigger + */ + opicprop = (u32 *)get_property(np, "open-pic-interrupt", &opicplen); + if (opicprop) { + opicplen /= sizeof(u32); + for (i = 0; i < opicplen; i++) { + if (count > 15) + break; + virqs[count] = irq_create_mapping(NULL, *(opicprop++), + IRQ_TYPE_NONE); + if (virqs[count] == NO_IRQ) + printk(KERN_ERR "Unable to allocate interrupt " + "number for %s\n", np->full_name); + else + count++; + } - if (request_irq(irq_offset_up(virq), handler, 0, name, NULL)) { + } + /* Else use normal interrupt tree parsing */ + else { + /* First try to do a proper OF tree parsing */ + for (index = 0; of_irq_map_one(np, index, &oirq) == 0; + index++) { + if (count > 15) + break; + virqs[count] = irq_create_of_mapping(oirq.controller, + oirq.specifier, + oirq.size); + if (virqs[count] == NO_IRQ) + printk(KERN_ERR "Unable to allocate interrupt " + "number for %s\n", np->full_name); + else + count++; + } + } + + /* Now request them */ + for (i = 0; i < count; i++) { + if (request_irq(virqs[i], handler, 0, name, NULL)) { printk(KERN_ERR "Unable to request interrupt %d for " - "%s\n", irq_offset_up(virq), np->full_name); + "%s\n", virqs[i], np->full_name); return; } - ireg += n_intr; } } @@ -115,20 +145,14 @@ static int __init init_ras_IRQ(void) /* Internal Errors */ np = of_find_node_by_path("/event-sources/internal-errors"); if (np != NULL) { - request_ras_irqs(np, "open-pic-interrupt", ras_error_interrupt, - "RAS_ERROR"); - request_ras_irqs(np, "interrupts", ras_error_interrupt, - "RAS_ERROR"); + request_ras_irqs(np, ras_error_interrupt, "RAS_ERROR"); of_node_put(np); } /* EPOW Events */ np = of_find_node_by_path("/event-sources/epow-events"); if (np != NULL) { - request_ras_irqs(np, "open-pic-interrupt", ras_epow_interrupt, - "RAS_EPOW"); - request_ras_irqs(np, "interrupts", ras_epow_interrupt, - "RAS_EPOW"); + request_ras_irqs(np, ras_epow_interrupt, "RAS_EPOW"); of_node_put(np); } @@ -162,7 +186,7 @@ ras_epow_interrupt(int irq, void *dev_id, struct pt_regs * regs) status = rtas_call(ras_check_exception_token, 6, 1, NULL, RAS_VECTOR_OFFSET, - virt_irq_to_real(irq_offset_down(irq)), + irq_map[irq].hwirq, RTAS_EPOW_WARNING | RTAS_POWERMGM_EVENTS, critical, __pa(&ras_log_buf), rtas_get_error_log_max()); @@ -198,7 +222,7 @@ ras_error_interrupt(int irq, void *dev_id, struct pt_regs * regs) status = rtas_call(ras_check_exception_token, 6, 1, NULL, RAS_VECTOR_OFFSET, - virt_irq_to_real(irq_offset_down(irq)), + irq_map[irq].hwirq, RTAS_INTERNAL_ERROR, 1 /*Time Critical */, __pa(&ras_log_buf), rtas_get_error_log_max()); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 999509d28af8..54a52437265c 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -76,6 +76,9 @@ #define DBG(fmt...) #endif +/* move those away to a .h */ +extern void smp_init_pseries_mpic(void); +extern void smp_init_pseries_xics(void); extern void find_udbg_vterm(void); int fwnmi_active; /* TRUE if an FWNMI handler is present */ @@ -83,7 +86,7 @@ int fwnmi_active; /* TRUE if an FWNMI handler is present */ static void pseries_shared_idle_sleep(void); static void pseries_dedicated_idle_sleep(void); -struct mpic *pSeries_mpic; +static struct device_node *pSeries_mpic_node; static void pSeries_show_cpuinfo(struct seq_file *m) { @@ -118,63 +121,92 @@ static void __init fwnmi_init(void) fwnmi_active = 1; } -static void __init pSeries_init_mpic(void) +void pseries_8259_cascade(unsigned int irq, struct irq_desc *desc, + struct pt_regs *regs) { - unsigned int *addrp; - struct device_node *np; - unsigned long intack = 0; - - /* All ISUs are setup, complete initialization */ - mpic_init(pSeries_mpic); - - /* Check what kind of cascade ACK we have */ - if (!(np = of_find_node_by_name(NULL, "pci")) - || !(addrp = (unsigned int *) - get_property(np, "8259-interrupt-acknowledge", NULL))) - printk(KERN_ERR "Cannot find pci to get ack address\n"); - else - intack = addrp[prom_n_addr_cells(np)-1]; - of_node_put(np); - - /* Setup the legacy interrupts & controller */ - i8259_init(intack, 0); - - /* Hook cascade to mpic */ - mpic_setup_cascade(NUM_ISA_INTERRUPTS, i8259_irq_cascade, NULL); + unsigned int cascade_irq = i8259_irq(regs); + if (cascade_irq != NO_IRQ) + generic_handle_irq(cascade_irq, regs); + desc->chip->eoi(irq); } -static void __init pSeries_setup_mpic(void) +static void __init pseries_mpic_init_IRQ(void) { + struct device_node *np, *old, *cascade = NULL; + unsigned int *addrp; + unsigned long intack = 0; unsigned int *opprop; unsigned long openpic_addr = 0; - unsigned char senses[NR_IRQS - NUM_ISA_INTERRUPTS]; - struct device_node *root; - int irq_count; + unsigned int cascade_irq; + int naddr, n, i, opplen; + struct mpic *mpic; - /* Find the Open PIC if present */ - root = of_find_node_by_path("/"); - opprop = (unsigned int *) get_property(root, "platform-open-pic", NULL); + np = of_find_node_by_path("/"); + naddr = prom_n_addr_cells(np); + opprop = (unsigned int *) get_property(np, "platform-open-pic", &opplen); if (opprop != 0) { - int n = prom_n_addr_cells(root); - - for (openpic_addr = 0; n > 0; --n) - openpic_addr = (openpic_addr << 32) + *opprop++; + openpic_addr = of_read_number(opprop, naddr); printk(KERN_DEBUG "OpenPIC addr: %lx\n", openpic_addr); } - of_node_put(root); + of_node_put(np); BUG_ON(openpic_addr == 0); - /* Get the sense values from OF */ - prom_get_irq_senses(senses, NUM_ISA_INTERRUPTS, NR_IRQS); - /* Setup the openpic driver */ - irq_count = NR_IRQS - NUM_ISA_INTERRUPTS - 4; /* leave room for IPIs */ - pSeries_mpic = mpic_alloc(openpic_addr, MPIC_PRIMARY, - 16, 16, irq_count, /* isu size, irq offset, irq count */ - NR_IRQS - 4, /* ipi offset */ - senses, irq_count, /* sense & sense size */ - " MPIC "); + mpic = mpic_alloc(pSeries_mpic_node, openpic_addr, + MPIC_PRIMARY, + 16, 250, /* isu size, irq count */ + " MPIC "); + BUG_ON(mpic == NULL); + + /* Add ISUs */ + opplen /= sizeof(u32); + for (n = 0, i = naddr; i < opplen; i += naddr, n++) { + unsigned long isuaddr = of_read_number(opprop + i, naddr); + mpic_assign_isu(mpic, n, isuaddr); + } + + /* All ISUs are setup, complete initialization */ + mpic_init(mpic); + + /* Look for cascade */ + for_each_node_by_type(np, "interrupt-controller") + if (device_is_compatible(np, "chrp,iic")) { + cascade = np; + break; + } + if (cascade == NULL) + return; + + cascade_irq = irq_of_parse_and_map(cascade, 0); + if (cascade == NO_IRQ) { + printk(KERN_ERR "xics: failed to map cascade interrupt"); + return; + } + + /* Check ACK type */ + for (old = of_node_get(cascade); old != NULL ; old = np) { + np = of_get_parent(old); + of_node_put(old); + if (np == NULL) + break; + if (strcmp(np->name, "pci") != 0) + continue; + addrp = (u32 *)get_property(np, "8259-interrupt-acknowledge", + NULL); + if (addrp == NULL) + continue; + naddr = prom_n_addr_cells(np); + intack = addrp[naddr-1]; + if (naddr > 1) + intack |= ((unsigned long)addrp[naddr-2]) << 32; + } + if (intack) + printk(KERN_DEBUG "mpic: PCI 8259 intack at 0x%016lx\n", + intack); + i8259_init(cascade, intack); + of_node_put(cascade); + set_irq_chained_handler(cascade_irq, pseries_8259_cascade); } static void pseries_lpar_enable_pmcs(void) @@ -192,23 +224,67 @@ static void pseries_lpar_enable_pmcs(void) get_lppaca()->pmcregs_in_use = 1; } -static void __init pSeries_setup_arch(void) +#ifdef CONFIG_KEXEC +static void pseries_kexec_cpu_down_mpic(int crash_shutdown, int secondary) { - /* Fixup ppc_md depending on the type of interrupt controller */ - if (ppc64_interrupt_controller == IC_OPEN_PIC) { - ppc_md.init_IRQ = pSeries_init_mpic; - ppc_md.get_irq = mpic_get_irq; - /* Allocate the mpic now, so that find_and_init_phbs() can - * fill the ISUs */ - pSeries_setup_mpic(); - } else { - ppc_md.init_IRQ = xics_init_IRQ; - ppc_md.get_irq = xics_get_irq; + mpic_teardown_this_cpu(secondary); +} + +static void pseries_kexec_cpu_down_xics(int crash_shutdown, int secondary) +{ + /* Don't risk a hypervisor call if we're crashing */ + if (firmware_has_feature(FW_FEATURE_SPLPAR) && !crash_shutdown) { + unsigned long vpa = __pa(get_lppaca()); + + if (unregister_vpa(hard_smp_processor_id(), vpa)) { + printk("VPA deregistration of cpu %u (hw_cpu_id %d) " + "failed\n", smp_processor_id(), + hard_smp_processor_id()); + } } + xics_teardown_cpu(secondary); +} +#endif /* CONFIG_KEXEC */ +static void __init pseries_discover_pic(void) +{ + struct device_node *np; + char *typep; + + for (np = NULL; (np = of_find_node_by_name(np, + "interrupt-controller"));) { + typep = (char *)get_property(np, "compatible", NULL); + if (strstr(typep, "open-pic")) { + pSeries_mpic_node = of_node_get(np); + ppc_md.init_IRQ = pseries_mpic_init_IRQ; + ppc_md.get_irq = mpic_get_irq; +#ifdef CONFIG_KEXEC + ppc_md.kexec_cpu_down = pseries_kexec_cpu_down_mpic; +#endif +#ifdef CONFIG_SMP + smp_init_pseries_mpic(); +#endif + return; + } else if (strstr(typep, "ppc-xicp")) { + ppc_md.init_IRQ = xics_init_IRQ; +#ifdef CONFIG_KEXEC + ppc_md.kexec_cpu_down = pseries_kexec_cpu_down_xics; +#endif #ifdef CONFIG_SMP - smp_init_pSeries(); + smp_init_pseries_xics(); #endif + return; + } + } + printk(KERN_ERR "pSeries_discover_pic: failed to recognize" + " interrupt-controller\n"); +} + +static void __init pSeries_setup_arch(void) +{ + /* Discover PIC type and setup ppc_md accordingly */ + pseries_discover_pic(); + /* openpic global configuration register (64-bit format). */ /* openpic Interrupt Source Unit pointer (64-bit format). */ /* python0 facility area (mmio) (64-bit format) REAL address. */ @@ -260,41 +336,11 @@ static int __init pSeries_init_panel(void) } arch_initcall(pSeries_init_panel); -static void __init pSeries_discover_pic(void) -{ - struct device_node *np; - char *typep; - - /* - * Setup interrupt mapping options that are needed for finish_device_tree - * to properly parse the OF interrupt tree & do the virtual irq mapping - */ - __irq_offset_value = NUM_ISA_INTERRUPTS; - ppc64_interrupt_controller = IC_INVALID; - for (np = NULL; (np = of_find_node_by_name(np, "interrupt-controller"));) { - typep = (char *)get_property(np, "compatible", NULL); - if (strstr(typep, "open-pic")) { - ppc64_interrupt_controller = IC_OPEN_PIC; - break; - } else if (strstr(typep, "ppc-xicp")) { - ppc64_interrupt_controller = IC_PPC_XIC; - break; - } - } - if (ppc64_interrupt_controller == IC_INVALID) - printk("pSeries_discover_pic: failed to recognize" - " interrupt-controller\n"); - -} - static void pSeries_mach_cpu_die(void) { local_irq_disable(); idle_task_exit(); - /* Some hardware requires clearing the CPPR, while other hardware does not - * it is safe either way - */ - pSeriesLP_cppr_info(0, 0); + xics_teardown_cpu(0); rtas_stop_self(); /* Should never get here... */ BUG(); @@ -332,8 +378,6 @@ static void __init pSeries_init_early(void) iommu_init_early_pSeries(); - pSeries_discover_pic(); - DBG(" <- pSeries_init_early()\n"); } @@ -505,27 +549,6 @@ static int pSeries_pci_probe_mode(struct pci_bus *bus) return PCI_PROBE_NORMAL; } -#ifdef CONFIG_KEXEC -static void pseries_kexec_cpu_down(int crash_shutdown, int secondary) -{ - /* Don't risk a hypervisor call if we're crashing */ - if (firmware_has_feature(FW_FEATURE_SPLPAR) && !crash_shutdown) { - unsigned long vpa = __pa(get_lppaca()); - - if (unregister_vpa(hard_smp_processor_id(), vpa)) { - printk("VPA deregistration of cpu %u (hw_cpu_id %d) " - "failed\n", smp_processor_id(), - hard_smp_processor_id()); - } - } - - if (ppc64_interrupt_controller == IC_OPEN_PIC) - mpic_teardown_this_cpu(secondary); - else - xics_teardown_cpu(secondary); -} -#endif - define_machine(pseries) { .name = "pSeries", .probe = pSeries_probe, @@ -550,7 +573,6 @@ define_machine(pseries) { .system_reset_exception = pSeries_system_reset_exception, .machine_check_exception = pSeries_machine_check_exception, #ifdef CONFIG_KEXEC - .kexec_cpu_down = pseries_kexec_cpu_down, .machine_kexec = default_machine_kexec, .machine_kexec_prepare = default_machine_kexec_prepare, .machine_crash_shutdown = default_machine_crash_shutdown, diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index 4ad144df49c2..ac61098ff401 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -416,27 +416,12 @@ static struct smp_ops_t pSeries_xics_smp_ops = { #endif /* This is called very early */ -void __init smp_init_pSeries(void) +static void __init smp_init_pseries(void) { int i; DBG(" -> smp_init_pSeries()\n"); - switch (ppc64_interrupt_controller) { -#ifdef CONFIG_MPIC - case IC_OPEN_PIC: - smp_ops = &pSeries_mpic_smp_ops; - break; -#endif -#ifdef CONFIG_XICS - case IC_PPC_XIC: - smp_ops = &pSeries_xics_smp_ops; - break; -#endif - default: - panic("Invalid interrupt controller"); - } - #ifdef CONFIG_HOTPLUG_CPU smp_ops->cpu_disable = pSeries_cpu_disable; smp_ops->cpu_die = pSeries_cpu_die; @@ -471,3 +456,18 @@ void __init smp_init_pSeries(void) DBG(" <- smp_init_pSeries()\n"); } +#ifdef CONFIG_MPIC +void __init smp_init_pseries_mpic(void) +{ + smp_ops = &pSeries_mpic_smp_ops; + + smp_init_pseries(); +} +#endif + +void __init smp_init_pseries_xics(void) +{ + smp_ops = &pSeries_xics_smp_ops; + + smp_init_pseries(); +} diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c index 2ffebe31cb2d..716972aa9777 100644 --- a/arch/powerpc/platforms/pseries/xics.c +++ b/arch/powerpc/platforms/pseries/xics.c @@ -8,6 +8,9 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ + +#undef DEBUG + #include <linux/types.h> #include <linux/threads.h> #include <linux/kernel.h> @@ -19,6 +22,7 @@ #include <linux/gfp.h> #include <linux/radix-tree.h> #include <linux/cpu.h> + #include <asm/firmware.h> #include <asm/prom.h> #include <asm/io.h> @@ -31,26 +35,6 @@ #include "xics.h" -static unsigned int xics_startup(unsigned int irq); -static void xics_enable_irq(unsigned int irq); -static void xics_disable_irq(unsigned int irq); -static void xics_mask_and_ack_irq(unsigned int irq); -static void xics_end_irq(unsigned int irq); -static void xics_set_affinity(unsigned int irq_nr, cpumask_t cpumask); - -static struct hw_interrupt_type xics_pic = { - .typename = " XICS ", - .startup = xics_startup, - .enable = xics_enable_irq, - .disable = xics_disable_irq, - .ack = xics_mask_and_ack_irq, - .end = xics_end_irq, - .set_affinity = xics_set_affinity -}; - -/* This is used to map real irq numbers to virtual */ -static struct radix_tree_root irq_map = RADIX_TREE_INIT(GFP_ATOMIC); - #define XICS_IPI 2 #define XICS_IRQ_SPURIOUS 0 @@ -81,12 +65,12 @@ struct xics_ipl { static struct xics_ipl __iomem *xics_per_cpu[NR_CPUS]; -static int xics_irq_8259_cascade = 0; -static int xics_irq_8259_cascade_real = 0; static unsigned int default_server = 0xFF; static unsigned int default_distrib_server = 0; static unsigned int interrupt_server_size = 8; +static struct irq_host *xics_host; + /* * XICS only has a single IPI, so encode the messages per CPU */ @@ -98,48 +82,34 @@ static int ibm_set_xive; static int ibm_int_on; static int ibm_int_off; -typedef struct { - int (*xirr_info_get)(int cpu); - void (*xirr_info_set)(int cpu, int val); - void (*cppr_info)(int cpu, u8 val); - void (*qirr_info)(int cpu, u8 val); -} xics_ops; +/* Direct HW low level accessors */ -/* SMP */ -static int pSeries_xirr_info_get(int n_cpu) +static inline unsigned int direct_xirr_info_get(int n_cpu) { return in_be32(&xics_per_cpu[n_cpu]->xirr.word); } -static void pSeries_xirr_info_set(int n_cpu, int value) +static inline void direct_xirr_info_set(int n_cpu, int value) { out_be32(&xics_per_cpu[n_cpu]->xirr.word, value); } -static void pSeries_cppr_info(int n_cpu, u8 value) +static inline void direct_cppr_info(int n_cpu, u8 value) { out_8(&xics_per_cpu[n_cpu]->xirr.bytes[0], value); } -static void pSeries_qirr_info(int n_cpu, u8 value) +static inline void direct_qirr_info(int n_cpu, u8 value) { out_8(&xics_per_cpu[n_cpu]->qirr.bytes[0], value); } -static xics_ops pSeries_ops = { - pSeries_xirr_info_get, - pSeries_xirr_info_set, - pSeries_cppr_info, - pSeries_qirr_info -}; -static xics_ops *ops = &pSeries_ops; +/* LPAR low level accessors */ -/* LPAR */ - static inline long plpar_eoi(unsigned long xirr) { return plpar_hcall_norets(H_EOI, xirr); @@ -161,7 +131,7 @@ static inline long plpar_xirr(unsigned long *xirr_ret) return plpar_hcall(H_XIRR, 0, 0, 0, 0, xirr_ret, &dummy, &dummy); } -static int pSeriesLP_xirr_info_get(int n_cpu) +static inline unsigned int lpar_xirr_info_get(int n_cpu) { unsigned long lpar_rc; unsigned long return_value; @@ -169,10 +139,10 @@ static int pSeriesLP_xirr_info_get(int n_cpu) lpar_rc = plpar_xirr(&return_value); if (lpar_rc != H_SUCCESS) panic(" bad return code xirr - rc = %lx \n", lpar_rc); - return (int)return_value; + return (unsigned int)return_value; } -static void pSeriesLP_xirr_info_set(int n_cpu, int value) +static inline void lpar_xirr_info_set(int n_cpu, int value) { unsigned long lpar_rc; unsigned long val64 = value & 0xffffffff; @@ -183,7 +153,7 @@ static void pSeriesLP_xirr_info_set(int n_cpu, int value) val64); } -void pSeriesLP_cppr_info(int n_cpu, u8 value) +static inline void lpar_cppr_info(int n_cpu, u8 value) { unsigned long lpar_rc; @@ -192,7 +162,7 @@ void pSeriesLP_cppr_info(int n_cpu, u8 value) panic("bad return code cppr - rc = %lx\n", lpar_rc); } -static void pSeriesLP_qirr_info(int n_cpu , u8 value) +static inline void lpar_qirr_info(int n_cpu , u8 value) { unsigned long lpar_rc; @@ -201,43 +171,16 @@ static void pSeriesLP_qirr_info(int n_cpu , u8 value) panic("bad return code qirr - rc = %lx\n", lpar_rc); } -xics_ops pSeriesLP_ops = { - pSeriesLP_xirr_info_get, - pSeriesLP_xirr_info_set, - pSeriesLP_cppr_info, - pSeriesLP_qirr_info -}; - -static unsigned int xics_startup(unsigned int virq) -{ - unsigned int irq; - - irq = irq_offset_down(virq); - if (radix_tree_insert(&irq_map, virt_irq_to_real(irq), - &virt_irq_to_real_map[irq]) == -ENOMEM) - printk(KERN_CRIT "Out of memory creating real -> virtual" - " IRQ mapping for irq %u (real 0x%x)\n", - virq, virt_irq_to_real(irq)); - xics_enable_irq(virq); - return 0; /* return value is ignored */ -} -static unsigned int real_irq_to_virt(unsigned int real_irq) -{ - unsigned int *ptr; +/* High level handlers and init code */ - ptr = radix_tree_lookup(&irq_map, real_irq); - if (ptr == NULL) - return NO_IRQ; - return ptr - virt_irq_to_real_map; -} #ifdef CONFIG_SMP -static int get_irq_server(unsigned int irq) +static int get_irq_server(unsigned int virq) { unsigned int server; /* For the moment only implement delivery to all cpus or one cpu */ - cpumask_t cpumask = irq_desc[irq].affinity; + cpumask_t cpumask = irq_desc[virq].affinity; cpumask_t tmp = CPU_MASK_NONE; if (!distribute_irqs) @@ -258,23 +201,28 @@ static int get_irq_server(unsigned int irq) } #else -static int get_irq_server(unsigned int irq) +static int get_irq_server(unsigned int virq) { return default_server; } #endif -static void xics_enable_irq(unsigned int virq) + +static void xics_unmask_irq(unsigned int virq) { unsigned int irq; int call_status; unsigned int server; - irq = virt_irq_to_real(irq_offset_down(virq)); - if (irq == XICS_IPI) + pr_debug("xics: unmask virq %d\n", virq); + + irq = (unsigned int)irq_map[virq].hwirq; + pr_debug(" -> map to hwirq 0x%x\n", irq); + if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS) return; server = get_irq_server(virq); + call_status = rtas_call(ibm_set_xive, 3, 1, NULL, irq, server, DEFAULT_PRIORITY); if (call_status != 0) { @@ -293,7 +241,7 @@ static void xics_enable_irq(unsigned int virq) } } -static void xics_disable_real_irq(unsigned int irq) +static void xics_mask_real_irq(unsigned int irq) { int call_status; unsigned int server; @@ -318,75 +266,86 @@ static void xics_disable_real_irq(unsigned int irq) } } -static void xics_disable_irq(unsigned int virq) +static void xics_mask_irq(unsigned int virq) { unsigned int irq; - irq = virt_irq_to_real(irq_offset_down(virq)); - xics_disable_real_irq(irq); + pr_debug("xics: mask virq %d\n", virq); + + irq = (unsigned int)irq_map[virq].hwirq; + if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS) + return; + xics_mask_real_irq(irq); +} + +static unsigned int xics_startup(unsigned int virq) +{ + unsigned int irq; + + /* force a reverse mapping of the interrupt so it gets in the cache */ + irq = (unsigned int)irq_map[virq].hwirq; + irq_radix_revmap(xics_host, irq); + + /* unmask it */ + xics_unmask_irq(virq); + return 0; } -static void xics_end_irq(unsigned int irq) +static void xics_eoi_direct(unsigned int virq) { int cpu = smp_processor_id(); + unsigned int irq = (unsigned int)irq_map[virq].hwirq; iosync(); - ops->xirr_info_set(cpu, ((0xff << 24) | - (virt_irq_to_real(irq_offset_down(irq))))); - + direct_xirr_info_set(cpu, (0xff << 24) | irq); } -static void xics_mask_and_ack_irq(unsigned int irq) + +static void xics_eoi_lpar(unsigned int virq) { int cpu = smp_processor_id(); + unsigned int irq = (unsigned int)irq_map[virq].hwirq; - if (irq < irq_offset_value()) { - i8259_pic.ack(irq); - iosync(); - ops->xirr_info_set(cpu, ((0xff<<24) | - xics_irq_8259_cascade_real)); - iosync(); - } + iosync(); + lpar_xirr_info_set(cpu, (0xff << 24) | irq); } -int xics_get_irq(struct pt_regs *regs) +static inline unsigned int xics_remap_irq(unsigned int vec) { - unsigned int cpu = smp_processor_id(); - unsigned int vec; - int irq; + unsigned int irq; - vec = ops->xirr_info_get(cpu); - /* (vec >> 24) == old priority */ vec &= 0x00ffffff; - /* for sanity, this had better be < NR_IRQS - 16 */ - if (vec == xics_irq_8259_cascade_real) { - irq = i8259_irq(regs); - xics_end_irq(irq_offset_up(xics_irq_8259_cascade)); - } else if (vec == XICS_IRQ_SPURIOUS) { - irq = -1; - } else { - irq = real_irq_to_virt(vec); - if (irq == NO_IRQ) - irq = real_irq_to_virt_slowpath(vec); - if (irq == NO_IRQ) { - printk(KERN_ERR "Interrupt %u (real) is invalid," - " disabling it.\n", vec); - xics_disable_real_irq(vec); - } else - irq = irq_offset_up(irq); - } - return irq; + if (vec == XICS_IRQ_SPURIOUS) + return NO_IRQ; + irq = irq_radix_revmap(xics_host, vec); + if (likely(irq != NO_IRQ)) + return irq; + + printk(KERN_ERR "Interrupt %u (real) is invalid," + " disabling it.\n", vec); + xics_mask_real_irq(vec); + return NO_IRQ; } -#ifdef CONFIG_SMP +static unsigned int xics_get_irq_direct(struct pt_regs *regs) +{ + unsigned int cpu = smp_processor_id(); -static irqreturn_t xics_ipi_action(int irq, void *dev_id, struct pt_regs *regs) + return xics_remap_irq(direct_xirr_info_get(cpu)); +} + +static unsigned int xics_get_irq_lpar(struct pt_regs *regs) { - int cpu = smp_processor_id(); + unsigned int cpu = smp_processor_id(); + + return xics_remap_irq(lpar_xirr_info_get(cpu)); +} - ops->qirr_info(cpu, 0xff); +#ifdef CONFIG_SMP +static irqreturn_t xics_ipi_dispatch(int cpu, struct pt_regs *regs) +{ WARN_ON(cpu_is_offline(cpu)); while (xics_ipi_message[cpu].value) { @@ -418,18 +377,88 @@ static irqreturn_t xics_ipi_action(int irq, void *dev_id, struct pt_regs *regs) return IRQ_HANDLED; } +static irqreturn_t xics_ipi_action_direct(int irq, void *dev_id, struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + + direct_qirr_info(cpu, 0xff); + + return xics_ipi_dispatch(cpu, regs); +} + +static irqreturn_t xics_ipi_action_lpar(int irq, void *dev_id, struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + + lpar_qirr_info(cpu, 0xff); + + return xics_ipi_dispatch(cpu, regs); +} + void xics_cause_IPI(int cpu) { - ops->qirr_info(cpu, IPI_PRIORITY); + if (firmware_has_feature(FW_FEATURE_LPAR)) + lpar_qirr_info(cpu, IPI_PRIORITY); + else + direct_qirr_info(cpu, IPI_PRIORITY); } + #endif /* CONFIG_SMP */ +static void xics_set_cpu_priority(int cpu, unsigned char cppr) +{ + if (firmware_has_feature(FW_FEATURE_LPAR)) + lpar_cppr_info(cpu, cppr); + else + direct_cppr_info(cpu, cppr); + iosync(); +} + +static void xics_set_affinity(unsigned int virq, cpumask_t cpumask) +{ + unsigned int irq; + int status; + int xics_status[2]; + unsigned long newmask; + cpumask_t tmp = CPU_MASK_NONE; + + irq = (unsigned int)irq_map[virq].hwirq; + if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS) + return; + + status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq); + + if (status) { + printk(KERN_ERR "xics_set_affinity: irq=%u ibm,get-xive " + "returns %d\n", irq, status); + return; + } + + /* For the moment only implement delivery to all cpus or one cpu */ + if (cpus_equal(cpumask, CPU_MASK_ALL)) { + newmask = default_distrib_server; + } else { + cpus_and(tmp, cpu_online_map, cpumask); + if (cpus_empty(tmp)) + return; + newmask = get_hard_smp_processor_id(first_cpu(tmp)); + } + + status = rtas_call(ibm_set_xive, 3, 1, NULL, + irq, newmask, xics_status[1]); + + if (status) { + printk(KERN_ERR "xics_set_affinity: irq=%u ibm,set-xive " + "returns %d\n", irq, status); + return; + } +} + void xics_setup_cpu(void) { int cpu = smp_processor_id(); - ops->cppr_info(cpu, 0xff); - iosync(); + xics_set_cpu_priority(cpu, 0xff); /* * Put the calling processor into the GIQ. This is really only @@ -442,72 +471,266 @@ void xics_setup_cpu(void) (1UL << interrupt_server_size) - 1 - default_distrib_server, 1); } -void xics_init_IRQ(void) + +static struct irq_chip xics_pic_direct = { + .typename = " XICS ", + .startup = xics_startup, + .mask = xics_mask_irq, + .unmask = xics_unmask_irq, + .eoi = xics_eoi_direct, + .set_affinity = xics_set_affinity +}; + + +static struct irq_chip xics_pic_lpar = { + .typename = " XICS ", + .startup = xics_startup, + .mask = xics_mask_irq, + .unmask = xics_unmask_irq, + .eoi = xics_eoi_lpar, + .set_affinity = xics_set_affinity +}; + + +static int xics_host_match(struct irq_host *h, struct device_node *node) +{ + /* IBM machines have interrupt parents of various funky types for things + * like vdevices, events, etc... The trick we use here is to match + * everything here except the legacy 8259 which is compatible "chrp,iic" + */ + return !device_is_compatible(node, "chrp,iic"); +} + +static int xics_host_map_direct(struct irq_host *h, unsigned int virq, + irq_hw_number_t hw, unsigned int flags) +{ + unsigned int sense = flags & IRQ_TYPE_SENSE_MASK; + + pr_debug("xics: map_direct virq %d, hwirq 0x%lx, flags: 0x%x\n", + virq, hw, flags); + + if (sense && sense != IRQ_TYPE_LEVEL_LOW) + printk(KERN_WARNING "xics: using unsupported sense 0x%x" + " for irq %d (h: 0x%lx)\n", flags, virq, hw); + + get_irq_desc(virq)->status |= IRQ_LEVEL; + set_irq_chip_and_handler(virq, &xics_pic_direct, handle_fasteoi_irq); + return 0; +} + +static int xics_host_map_lpar(struct irq_host *h, unsigned int virq, + irq_hw_number_t hw, unsigned int flags) +{ + unsigned int sense = flags & IRQ_TYPE_SENSE_MASK; + + pr_debug("xics: map_lpar virq %d, hwirq 0x%lx, flags: 0x%x\n", + virq, hw, flags); + + if (sense && sense != IRQ_TYPE_LEVEL_LOW) + printk(KERN_WARNING "xics: using unsupported sense 0x%x" + " for irq %d (h: 0x%lx)\n", flags, virq, hw); + + get_irq_desc(virq)->status |= IRQ_LEVEL; + set_irq_chip_and_handler(virq, &xics_pic_lpar, handle_fasteoi_irq); + return 0; +} + +static int xics_host_xlate(struct irq_host *h, struct device_node *ct, + u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_flags) + +{ + /* Current xics implementation translates everything + * to level. It is not technically right for MSIs but this + * is irrelevant at this point. We might get smarter in the future + */ + *out_hwirq = intspec[0]; + *out_flags = IRQ_TYPE_LEVEL_LOW; + + return 0; +} + +static struct irq_host_ops xics_host_direct_ops = { + .match = xics_host_match, + .map = xics_host_map_direct, + .xlate = xics_host_xlate, +}; + +static struct irq_host_ops xics_host_lpar_ops = { + .match = xics_host_match, + .map = xics_host_map_lpar, + .xlate = xics_host_xlate, +}; + +static void __init xics_init_host(void) +{ + struct irq_host_ops *ops; + + if (firmware_has_feature(FW_FEATURE_LPAR)) + ops = &xics_host_lpar_ops; + else + ops = &xics_host_direct_ops; + xics_host = irq_alloc_host(IRQ_HOST_MAP_TREE, 0, ops, + XICS_IRQ_SPURIOUS); + BUG_ON(xics_host == NULL); + irq_set_default_host(xics_host); +} + +static void __init xics_map_one_cpu(int hw_id, unsigned long addr, + unsigned long size) { +#ifdef CONFIG_SMP int i; - unsigned long intr_size = 0; - struct device_node *np; - uint *ireg, ilen, indx = 0; - unsigned long intr_base = 0; - struct xics_interrupt_node { - unsigned long addr; - unsigned long size; - } intnodes[NR_CPUS]; - ppc64_boot_msg(0x20, "XICS Init"); + /* This may look gross but it's good enough for now, we don't quite + * have a hard -> linux processor id matching. + */ + for_each_possible_cpu(i) { + if (!cpu_present(i)) + continue; + if (hw_id == get_hard_smp_processor_id(i)) { + xics_per_cpu[i] = ioremap(addr, size); + return; + } + } +#else + if (hw_id != 0) + return; + xics_per_cpu[0] = ioremap(addr, size); +#endif /* CONFIG_SMP */ +} - ibm_get_xive = rtas_token("ibm,get-xive"); - ibm_set_xive = rtas_token("ibm,set-xive"); - ibm_int_on = rtas_token("ibm,int-on"); - ibm_int_off = rtas_token("ibm,int-off"); +static void __init xics_init_one_node(struct device_node *np, + unsigned int *indx) +{ + unsigned int ilen; + u32 *ireg; - np = of_find_node_by_type(NULL, "PowerPC-External-Interrupt-Presentation"); - if (!np) - panic("xics_init_IRQ: can't find interrupt presentation"); + /* This code does the theorically broken assumption that the interrupt + * server numbers are the same as the hard CPU numbers. + * This happens to be the case so far but we are playing with fire... + * should be fixed one of these days. -BenH. + */ + ireg = (u32 *)get_property(np, "ibm,interrupt-server-ranges", NULL); -nextnode: - ireg = (uint *)get_property(np, "ibm,interrupt-server-ranges", NULL); + /* Do that ever happen ? we'll know soon enough... but even good'old + * f80 does have that property .. + */ + WARN_ON(ireg == NULL); if (ireg) { /* * set node starting index for this node */ - indx = *ireg; + *indx = *ireg; } - - ireg = (uint *)get_property(np, "reg", &ilen); + ireg = (u32 *)get_property(np, "reg", &ilen); if (!ireg) panic("xics_init_IRQ: can't find interrupt reg property"); - while (ilen) { - intnodes[indx].addr = (unsigned long)*ireg++ << 32; - ilen -= sizeof(uint); - intnodes[indx].addr |= *ireg++; - ilen -= sizeof(uint); - intnodes[indx].size = (unsigned long)*ireg++ << 32; - ilen -= sizeof(uint); - intnodes[indx].size |= *ireg++; - ilen -= sizeof(uint); - indx++; - if (indx >= NR_CPUS) break; + while (ilen >= (4 * sizeof(u32))) { + unsigned long addr, size; + + /* XXX Use proper OF parsing code here !!! */ + addr = (unsigned long)*ireg++ << 32; + ilen -= sizeof(u32); + addr |= *ireg++; + ilen -= sizeof(u32); + size = (unsigned long)*ireg++ << 32; + ilen -= sizeof(u32); + size |= *ireg++; + ilen -= sizeof(u32); + xics_map_one_cpu(*indx, addr, size); + (*indx)++; + } +} + + +static void __init xics_setup_8259_cascade(void) +{ + struct device_node *np, *old, *found = NULL; + int cascade, naddr; + u32 *addrp; + unsigned long intack = 0; + + for_each_node_by_type(np, "interrupt-controller") + if (device_is_compatible(np, "chrp,iic")) { + found = np; + break; + } + if (found == NULL) { + printk(KERN_DEBUG "xics: no ISA interrupt controller\n"); + return; + } + cascade = irq_of_parse_and_map(found, 0); + if (cascade == NO_IRQ) { + printk(KERN_ERR "xics: failed to map cascade interrupt"); + return; + } + pr_debug("xics: cascade mapped to irq %d\n", cascade); + + for (old = of_node_get(found); old != NULL ; old = np) { + np = of_get_parent(old); + of_node_put(old); + if (np == NULL) + break; + if (strcmp(np->name, "pci") != 0) + continue; + addrp = (u32 *)get_property(np, "8259-interrupt-acknowledge", NULL); + if (addrp == NULL) + continue; + naddr = prom_n_addr_cells(np); + intack = addrp[naddr-1]; + if (naddr > 1) + intack |= ((unsigned long)addrp[naddr-2]) << 32; + } + if (intack) + printk(KERN_DEBUG "xics: PCI 8259 intack at 0x%016lx\n", intack); + i8259_init(found, intack); + of_node_put(found); + set_irq_chained_handler(cascade, pseries_8259_cascade); +} + +void __init xics_init_IRQ(void) +{ + int i; + struct device_node *np; + u32 *ireg, ilen, indx = 0; + int found = 0; + + ppc64_boot_msg(0x20, "XICS Init"); + + ibm_get_xive = rtas_token("ibm,get-xive"); + ibm_set_xive = rtas_token("ibm,set-xive"); + ibm_int_on = rtas_token("ibm,int-on"); + ibm_int_off = rtas_token("ibm,int-off"); + + for_each_node_by_type(np, "PowerPC-External-Interrupt-Presentation") { + found = 1; + if (firmware_has_feature(FW_FEATURE_LPAR)) + break; + xics_init_one_node(np, &indx); } + if (found == 0) + return; - np = of_find_node_by_type(np, "PowerPC-External-Interrupt-Presentation"); - if ((indx < NR_CPUS) && np) goto nextnode; + xics_init_host(); /* Find the server numbers for the boot cpu. */ for (np = of_find_node_by_type(NULL, "cpu"); np; np = of_find_node_by_type(np, "cpu")) { - ireg = (uint *)get_property(np, "reg", &ilen); + ireg = (u32 *)get_property(np, "reg", &ilen); if (ireg && ireg[0] == get_hard_smp_processor_id(boot_cpuid)) { - ireg = (uint *)get_property(np, "ibm,ppc-interrupt-gserver#s", - &ilen); + ireg = (u32 *)get_property(np, + "ibm,ppc-interrupt-gserver#s", + &ilen); i = ilen / sizeof(int); if (ireg && i > 0) { default_server = ireg[0]; - default_distrib_server = ireg[i-1]; /* take last element */ + /* take last element */ + default_distrib_server = ireg[i-1]; } - ireg = (uint *)get_property(np, + ireg = (u32 *)get_property(np, "ibm,interrupt-server#-size", NULL); if (ireg) interrupt_server_size = *ireg; @@ -516,135 +739,48 @@ nextnode: } of_node_put(np); - intr_base = intnodes[0].addr; - intr_size = intnodes[0].size; - - np = of_find_node_by_type(NULL, "interrupt-controller"); - if (!np) { - printk(KERN_DEBUG "xics: no ISA interrupt controller\n"); - xics_irq_8259_cascade_real = -1; - xics_irq_8259_cascade = -1; - } else { - ireg = (uint *) get_property(np, "interrupts", NULL); - if (!ireg) - panic("xics_init_IRQ: can't find ISA interrupts property"); - - xics_irq_8259_cascade_real = *ireg; - xics_irq_8259_cascade - = virt_irq_create_mapping(xics_irq_8259_cascade_real); - i8259_init(0, 0); - of_node_put(np); - } - if (firmware_has_feature(FW_FEATURE_LPAR)) - ops = &pSeriesLP_ops; - else { -#ifdef CONFIG_SMP - for_each_possible_cpu(i) { - int hard_id; - - /* FIXME: Do this dynamically! --RR */ - if (!cpu_present(i)) - continue; - - hard_id = get_hard_smp_processor_id(i); - xics_per_cpu[i] = ioremap(intnodes[hard_id].addr, - intnodes[hard_id].size); - } -#else - xics_per_cpu[0] = ioremap(intr_base, intr_size); -#endif /* CONFIG_SMP */ - } - - for (i = irq_offset_value(); i < NR_IRQS; ++i) - get_irq_desc(i)->chip = &xics_pic; + ppc_md.get_irq = xics_get_irq_lpar; + else + ppc_md.get_irq = xics_get_irq_direct; xics_setup_cpu(); + xics_setup_8259_cascade(); + ppc64_boot_msg(0x21, "XICS Done"); } -/* - * We cant do this in init_IRQ because we need the memory subsystem up for - * request_irq() - */ -static int __init xics_setup_i8259(void) -{ - if (ppc64_interrupt_controller == IC_PPC_XIC && - xics_irq_8259_cascade != -1) { - if (request_irq(irq_offset_up(xics_irq_8259_cascade), - no_action, 0, "8259 cascade", NULL)) - printk(KERN_ERR "xics_setup_i8259: couldn't get 8259 " - "cascade\n"); - } - return 0; -} -arch_initcall(xics_setup_i8259); #ifdef CONFIG_SMP void xics_request_IPIs(void) { - virt_irq_to_real_map[XICS_IPI] = XICS_IPI; + unsigned int ipi; + + ipi = irq_create_mapping(xics_host, XICS_IPI, 0); + BUG_ON(ipi == NO_IRQ); /* * IPIs are marked IRQF_DISABLED as they must run with irqs * disabled */ - request_irq(irq_offset_up(XICS_IPI), xics_ipi_action, - IRQF_DISABLED, "IPI", NULL); - get_irq_desc(irq_offset_up(XICS_IPI))->status |= IRQ_PER_CPU; -} -#endif - -static void xics_set_affinity(unsigned int virq, cpumask_t cpumask) -{ - unsigned int irq; - int status; - int xics_status[2]; - unsigned long newmask; - cpumask_t tmp = CPU_MASK_NONE; - - irq = virt_irq_to_real(irq_offset_down(virq)); - if (irq == XICS_IPI || irq == NO_IRQ) - return; - - status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq); - - if (status) { - printk(KERN_ERR "xics_set_affinity: irq=%u ibm,get-xive " - "returns %d\n", irq, status); - return; - } - - /* For the moment only implement delivery to all cpus or one cpu */ - if (cpus_equal(cpumask, CPU_MASK_ALL)) { - newmask = default_distrib_server; - } else { - cpus_and(tmp, cpu_online_map, cpumask); - if (cpus_empty(tmp)) - return; - newmask = get_hard_smp_processor_id(first_cpu(tmp)); - } - - status = rtas_call(ibm_set_xive, 3, 1, NULL, - irq, newmask, xics_status[1]); - - if (status) { - printk(KERN_ERR "xics_set_affinity: irq=%u ibm,set-xive " - "returns %d\n", irq, status); - return; - } + set_irq_handler(ipi, handle_percpu_irq); + if (firmware_has_feature(FW_FEATURE_LPAR)) + request_irq(ipi, xics_ipi_action_lpar, IRQF_DISABLED, + "IPI", NULL); + else + request_irq(ipi, xics_ipi_action_direct, IRQF_DISABLED, + "IPI", NULL); } +#endif /* CONFIG_SMP */ void xics_teardown_cpu(int secondary) { int cpu = smp_processor_id(); + unsigned int ipi; + struct irq_desc *desc; - ops->cppr_info(cpu, 0x00); - iosync(); - - /* Clear IPI */ - ops->qirr_info(cpu, 0xff); + xics_set_cpu_priority(cpu, 0); /* * we need to EOI the IPI if we got here from kexec down IPI @@ -653,7 +789,13 @@ void xics_teardown_cpu(int secondary) * should we be flagging idle loop instead? * or creating some task to be scheduled? */ - ops->xirr_info_set(cpu, XICS_IPI); + + ipi = irq_find_mapping(xics_host, XICS_IPI); + if (ipi == XICS_IRQ_SPURIOUS) + return; + desc = get_irq_desc(ipi); + if (desc->chip && desc->chip->eoi) + desc->chip->eoi(XICS_IPI); /* * Some machines need to have at least one cpu in the GIQ, @@ -661,8 +803,8 @@ void xics_teardown_cpu(int secondary) */ if (secondary) rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE, - (1UL << interrupt_server_size) - 1 - - default_distrib_server, 0); + (1UL << interrupt_server_size) - 1 - + default_distrib_server, 0); } #ifdef CONFIG_HOTPLUG_CPU @@ -674,8 +816,7 @@ void xics_migrate_irqs_away(void) unsigned int irq, virq, cpu = smp_processor_id(); /* Reject any interrupt that was queued to us... */ - ops->cppr_info(cpu, 0); - iosync(); + xics_set_cpu_priority(cpu, 0); /* remove ourselves from the global interrupt queue */ status = rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE, @@ -683,24 +824,23 @@ void xics_migrate_irqs_away(void) WARN_ON(status < 0); /* Allow IPIs again... */ - ops->cppr_info(cpu, DEFAULT_PRIORITY); - iosync(); + xics_set_cpu_priority(cpu, DEFAULT_PRIORITY); for_each_irq(virq) { - irq_desc_t *desc; + struct irq_desc *desc; int xics_status[2]; unsigned long flags; /* We cant set affinity on ISA interrupts */ - if (virq < irq_offset_value()) + if (virq < NUM_ISA_INTERRUPTS) continue; - - desc = get_irq_desc(virq); - irq = virt_irq_to_real(irq_offset_down(virq)); - + if (irq_map[virq].host != xics_host) + continue; + irq = (unsigned int)irq_map[virq].hwirq; /* We need to get IPIs still. */ - if (irq == XICS_IPI || irq == NO_IRQ) + if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS) continue; + desc = get_irq_desc(virq); /* We only need to migrate enabled IRQS */ if (desc == NULL || desc->chip == NULL diff --git a/arch/powerpc/platforms/pseries/xics.h b/arch/powerpc/platforms/pseries/xics.h index e14c70868f1d..6ee1055b0ffb 100644 --- a/arch/powerpc/platforms/pseries/xics.h +++ b/arch/powerpc/platforms/pseries/xics.h @@ -14,13 +14,12 @@ #include <linux/cache.h> -void xics_init_IRQ(void); -int xics_get_irq(struct pt_regs *); -void xics_setup_cpu(void); -void xics_teardown_cpu(int secondary); -void xics_cause_IPI(int cpu); -void xics_request_IPIs(void); -void xics_migrate_irqs_away(void); +extern void xics_init_IRQ(void); +extern void xics_setup_cpu(void); +extern void xics_teardown_cpu(int secondary); +extern void xics_cause_IPI(int cpu); +extern void xics_request_IPIs(void); +extern void xics_migrate_irqs_away(void); /* first argument is ignored for now*/ void pSeriesLP_cppr_info(int n_cpu, u8 value); @@ -31,4 +30,8 @@ struct xics_ipi_struct { extern struct xics_ipi_struct xics_ipi_message[NR_CPUS] __cacheline_aligned; +struct irq_desc; +extern void pseries_8259_cascade(unsigned int irq, struct irq_desc *desc, + struct pt_regs *regs); + #endif /* _POWERPC_KERNEL_XICS_H */ diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile index 054bd8b41ef5..cebfae242602 100644 --- a/arch/powerpc/sysdev/Makefile +++ b/arch/powerpc/sysdev/Makefile @@ -4,7 +4,6 @@ endif obj-$(CONFIG_MPIC) += mpic.o obj-$(CONFIG_PPC_INDIRECT_PCI) += indirect_pci.o -obj-$(CONFIG_PPC_I8259) += i8259.o obj-$(CONFIG_PPC_MPC106) += grackle.o obj-$(CONFIG_BOOKE) += dcr.o obj-$(CONFIG_40x) += dcr.o @@ -14,3 +13,7 @@ obj-$(CONFIG_PPC_83xx) += ipic.o obj-$(CONFIG_FSL_SOC) += fsl_soc.o obj-$(CONFIG_PPC_TODC) += todc.o obj-$(CONFIG_TSI108_BRIDGE) += tsi108_pci.o tsi108_dev.o + +ifeq ($(CONFIG_PPC_MERGE),y) +obj-$(CONFIG_PPC_I8259) += i8259.o + endif diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c index 1a3ef1ab9d6e..72c73a6105cd 100644 --- a/arch/powerpc/sysdev/i8259.c +++ b/arch/powerpc/sysdev/i8259.c @@ -6,11 +6,16 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ +#undef DEBUG + #include <linux/init.h> #include <linux/ioport.h> #include <linux/interrupt.h> +#include <linux/kernel.h> +#include <linux/delay.h> #include <asm/io.h> #include <asm/i8259.h> +#include <asm/prom.h> static volatile void __iomem *pci_intack; /* RO, gives us the irq vector */ @@ -20,7 +25,8 @@ static unsigned char cached_8259[2] = { 0xff, 0xff }; static DEFINE_SPINLOCK(i8259_lock); -static int i8259_pic_irq_offset; +static struct device_node *i8259_node; +static struct irq_host *i8259_host; /* * Acknowledge the IRQ using either the PCI host bridge's interrupt @@ -28,16 +34,18 @@ static int i8259_pic_irq_offset; * which is called. It should be noted that polling is broken on some * IBM and Motorola PReP boxes so we must use the int-ack feature on them. */ -int i8259_irq(struct pt_regs *regs) +unsigned int i8259_irq(struct pt_regs *regs) { int irq; - - spin_lock(&i8259_lock); + int lock = 0; /* Either int-ack or poll for the IRQ */ if (pci_intack) irq = readb(pci_intack); else { + spin_lock(&i8259_lock); + lock = 1; + /* Perform an interrupt acknowledge cycle on controller 1. */ outb(0x0C, 0x20); /* prepare for poll */ irq = inb(0x20) & 7; @@ -62,16 +70,13 @@ int i8259_irq(struct pt_regs *regs) if (!pci_intack) outb(0x0B, 0x20); /* ISR register */ if(~inb(0x20) & 0x80) - irq = -1; - } + irq = NO_IRQ; + } else if (irq == 0xff) + irq = NO_IRQ; - spin_unlock(&i8259_lock); - return irq + i8259_pic_irq_offset; -} - -int i8259_irq_cascade(struct pt_regs *regs, void *unused) -{ - return i8259_irq(regs); + if (lock) + spin_unlock(&i8259_lock); + return irq; } static void i8259_mask_and_ack_irq(unsigned int irq_nr) @@ -79,7 +84,6 @@ static void i8259_mask_and_ack_irq(unsigned int irq_nr) unsigned long flags; spin_lock_irqsave(&i8259_lock, flags); - irq_nr -= i8259_pic_irq_offset; if (irq_nr > 7) { cached_A1 |= 1 << (irq_nr-8); inb(0xA1); /* DUMMY */ @@ -105,8 +109,9 @@ static void i8259_mask_irq(unsigned int irq_nr) { unsigned long flags; + pr_debug("i8259_mask_irq(%d)\n", irq_nr); + spin_lock_irqsave(&i8259_lock, flags); - irq_nr -= i8259_pic_irq_offset; if (irq_nr < 8) cached_21 |= 1 << irq_nr; else @@ -119,8 +124,9 @@ static void i8259_unmask_irq(unsigned int irq_nr) { unsigned long flags; + pr_debug("i8259_unmask_irq(%d)\n", irq_nr); + spin_lock_irqsave(&i8259_lock, flags); - irq_nr -= i8259_pic_irq_offset; if (irq_nr < 8) cached_21 &= ~(1 << irq_nr); else @@ -129,19 +135,11 @@ static void i8259_unmask_irq(unsigned int irq_nr) spin_unlock_irqrestore(&i8259_lock, flags); } -static void i8259_end_irq(unsigned int irq) -{ - if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)) - && irq_desc[irq].action) - i8259_unmask_irq(irq); -} - -struct hw_interrupt_type i8259_pic = { - .typename = " i8259 ", - .enable = i8259_unmask_irq, - .disable = i8259_mask_irq, - .ack = i8259_mask_and_ack_irq, - .end = i8259_end_irq, +static struct irq_chip i8259_pic = { + .typename = " i8259 ", + .mask = i8259_mask_irq, + .unmask = i8259_unmask_irq, + .mask_ack = i8259_mask_and_ack_irq, }; static struct resource pic1_iores = { @@ -165,25 +163,84 @@ static struct resource pic_edgectrl_iores = { .flags = IORESOURCE_BUSY, }; -static struct irqaction i8259_irqaction = { - .handler = no_action, - .flags = IRQF_DISABLED, - .mask = CPU_MASK_NONE, - .name = "82c59 secondary cascade", +static int i8259_host_match(struct irq_host *h, struct device_node *node) +{ + return i8259_node == NULL || i8259_node == node; +} + +static int i8259_host_map(struct irq_host *h, unsigned int virq, + irq_hw_number_t hw, unsigned int flags) +{ + pr_debug("i8259_host_map(%d, 0x%lx)\n", virq, hw); + + /* We block the internal cascade */ + if (hw == 2) + get_irq_desc(virq)->status |= IRQ_NOREQUEST; + + /* We use the level stuff only for now, we might want to + * be more cautious here but that works for now + */ + get_irq_desc(virq)->status |= IRQ_LEVEL; + set_irq_chip_and_handler(virq, &i8259_pic, handle_level_irq); + return 0; +} + +static void i8259_host_unmap(struct irq_host *h, unsigned int virq) +{ + /* Make sure irq is masked in hardware */ + i8259_mask_irq(virq); + + /* remove chip and handler */ + set_irq_chip_and_handler(virq, NULL, NULL); + + /* Make sure it's completed */ + synchronize_irq(virq); +} + +static int i8259_host_xlate(struct irq_host *h, struct device_node *ct, + u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_flags) +{ + static unsigned char map_isa_senses[4] = { + IRQ_TYPE_LEVEL_LOW, + IRQ_TYPE_LEVEL_HIGH, + IRQ_TYPE_EDGE_FALLING, + IRQ_TYPE_EDGE_RISING, + }; + + *out_hwirq = intspec[0]; + if (intsize > 1 && intspec[1] < 4) + *out_flags = map_isa_senses[intspec[1]]; + else + *out_flags = IRQ_TYPE_NONE; + + return 0; +} + +static struct irq_host_ops i8259_host_ops = { + .match = i8259_host_match, + .map = i8259_host_map, + .unmap = i8259_host_unmap, + .xlate = i8259_host_xlate, }; -/* - * i8259_init() - * intack_addr - PCI interrupt acknowledge (real) address which will return - * the active irq from the 8259 +/**** + * i8259_init - Initialize the legacy controller + * @node: device node of the legacy PIC (can be NULL, but then, it will match + * all interrupts, so beware) + * @intack_addr: PCI interrupt acknowledge (real) address which will return + * the active irq from the 8259 */ -void __init i8259_init(unsigned long intack_addr, int offset) +void i8259_init(struct device_node *node, unsigned long intack_addr) { unsigned long flags; - int i; + /* initialize the controller */ spin_lock_irqsave(&i8259_lock, flags); - i8259_pic_irq_offset = offset; + + /* Mask all first */ + outb(0xff, 0xA1); + outb(0xff, 0x21); /* init master interrupt controller */ outb(0x11, 0x20); /* Start init sequence */ @@ -197,21 +254,36 @@ void __init i8259_init(unsigned long intack_addr, int offset) outb(0x02, 0xA1); /* edge triggered, Cascade (slave) on IRQ2 */ outb(0x01, 0xA1); /* Select 8086 mode */ + /* That thing is slow */ + udelay(100); + /* always read ISR */ outb(0x0B, 0x20); outb(0x0B, 0xA0); - /* Mask all interrupts */ + /* Unmask the internal cascade */ + cached_21 &= ~(1 << 2); + + /* Set interrupt masks */ outb(cached_A1, 0xA1); outb(cached_21, 0x21); spin_unlock_irqrestore(&i8259_lock, flags); - for (i = 0; i < NUM_ISA_INTERRUPTS; ++i) - irq_desc[offset + i].chip = &i8259_pic; + /* create a legacy host */ + if (node) + i8259_node = of_node_get(node); + i8259_host = irq_alloc_host(IRQ_HOST_MAP_LEGACY, 0, &i8259_host_ops, 0); + if (i8259_host == NULL) { + printk(KERN_ERR "i8259: failed to allocate irq host !\n"); + return; + } /* reserve our resources */ - setup_irq(offset + 2, &i8259_irqaction); + /* XXX should we continue doing that ? it seems to cause problems + * with further requesting of PCI IO resources for that range... + * need to look into it. + */ request_resource(&ioport_resource, &pic1_iores); request_resource(&ioport_resource, &pic2_iores); request_resource(&ioport_resource, &pic_edgectrl_iores); @@ -219,4 +291,5 @@ void __init i8259_init(unsigned long intack_addr, int offset) if (intack_addr != 0) pci_intack = ioremap(intack_addr, 1); + printk(KERN_INFO "i8259 legacy interrupt controller initialized\n"); } diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 7e469358895f..7d31d7cc392d 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -100,8 +100,8 @@ static inline u32 _mpic_cpu_read(struct mpic *mpic, unsigned int reg) if (mpic->flags & MPIC_PRIMARY) cpu = hard_smp_processor_id(); - - return _mpic_read(mpic->flags & MPIC_BIG_ENDIAN, mpic->cpuregs[cpu], reg); + return _mpic_read(mpic->flags & MPIC_BIG_ENDIAN, + mpic->cpuregs[cpu], reg); } static inline void _mpic_cpu_write(struct mpic *mpic, unsigned int reg, u32 value) @@ -340,27 +340,19 @@ static void __init mpic_scan_ht_pics(struct mpic *mpic) #endif /* CONFIG_MPIC_BROKEN_U3 */ +#define mpic_irq_to_hw(virq) ((unsigned int)irq_map[virq].hwirq) + /* Find an mpic associated with a given linux interrupt */ static struct mpic *mpic_find(unsigned int irq, unsigned int *is_ipi) { - struct mpic *mpic = mpics; - - while(mpic) { - /* search IPIs first since they may override the main interrupts */ - if (irq >= mpic->ipi_offset && irq < (mpic->ipi_offset + 4)) { - if (is_ipi) - *is_ipi = 1; - return mpic; - } - if (irq >= mpic->irq_offset && - irq < (mpic->irq_offset + mpic->irq_count)) { - if (is_ipi) - *is_ipi = 0; - return mpic; - } - mpic = mpic -> next; - } - return NULL; + unsigned int src = mpic_irq_to_hw(irq); + + if (irq < NUM_ISA_INTERRUPTS) + return NULL; + if (is_ipi) + *is_ipi = (src >= MPIC_VEC_IPI_0 && src <= MPIC_VEC_IPI_3); + + return irq_desc[irq].chip_data; } /* Convert a cpu mask from logical to physical cpu numbers. */ @@ -378,14 +370,14 @@ static inline u32 mpic_physmask(u32 cpumask) /* Get the mpic structure from the IPI number */ static inline struct mpic * mpic_from_ipi(unsigned int ipi) { - return container_of(irq_desc[ipi].chip, struct mpic, hc_ipi); + return irq_desc[ipi].chip_data; } #endif /* Get the mpic structure from the irq number */ static inline struct mpic * mpic_from_irq(unsigned int irq) { - return container_of(irq_desc[irq].chip, struct mpic, hc_irq); + return irq_desc[irq].chip_data; } /* Send an EOI */ @@ -398,9 +390,7 @@ static inline void mpic_eoi(struct mpic *mpic) #ifdef CONFIG_SMP static irqreturn_t mpic_ipi_action(int irq, void *dev_id, struct pt_regs *regs) { - struct mpic *mpic = dev_id; - - smp_message_recv(irq - mpic->ipi_offset, regs); + smp_message_recv(mpic_irq_to_hw(irq) - MPIC_VEC_IPI_0, regs); return IRQ_HANDLED; } #endif /* CONFIG_SMP */ @@ -410,11 +400,11 @@ static irqreturn_t mpic_ipi_action(int irq, void *dev_id, struct pt_regs *regs) */ -static void mpic_enable_irq(unsigned int irq) +static void mpic_unmask_irq(unsigned int irq) { unsigned int loops = 100000; struct mpic *mpic = mpic_from_irq(irq); - unsigned int src = irq - mpic->irq_offset; + unsigned int src = mpic_irq_to_hw(irq); DBG("%p: %s: enable_irq: %d (src %d)\n", mpic, mpic->name, irq, src); @@ -429,39 +419,13 @@ static void mpic_enable_irq(unsigned int irq) break; } } while(mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & MPIC_VECPRI_MASK); - -#ifdef CONFIG_MPIC_BROKEN_U3 - if (mpic->flags & MPIC_BROKEN_U3) { - unsigned int src = irq - mpic->irq_offset; - if (mpic_is_ht_interrupt(mpic, src) && - (irq_desc[irq].status & IRQ_LEVEL)) - mpic_ht_end_irq(mpic, src); - } -#endif /* CONFIG_MPIC_BROKEN_U3 */ -} - -static unsigned int mpic_startup_irq(unsigned int irq) -{ -#ifdef CONFIG_MPIC_BROKEN_U3 - struct mpic *mpic = mpic_from_irq(irq); - unsigned int src = irq - mpic->irq_offset; -#endif /* CONFIG_MPIC_BROKEN_U3 */ - - mpic_enable_irq(irq); - -#ifdef CONFIG_MPIC_BROKEN_U3 - if (mpic_is_ht_interrupt(mpic, src)) - mpic_startup_ht_interrupt(mpic, src, irq_desc[irq].status); -#endif /* CONFIG_MPIC_BROKEN_U3 */ - - return 0; } -static void mpic_disable_irq(unsigned int irq) +static void mpic_mask_irq(unsigned int irq) { unsigned int loops = 100000; struct mpic *mpic = mpic_from_irq(irq); - unsigned int src = irq - mpic->irq_offset; + unsigned int src = mpic_irq_to_hw(irq); DBG("%s: disable_irq: %d (src %d)\n", mpic->name, irq, src); @@ -478,23 +442,58 @@ static void mpic_disable_irq(unsigned int irq) } while(!(mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & MPIC_VECPRI_MASK)); } -static void mpic_shutdown_irq(unsigned int irq) +static void mpic_end_irq(unsigned int irq) { + struct mpic *mpic = mpic_from_irq(irq); + +#ifdef DEBUG_IRQ + DBG("%s: end_irq: %d\n", mpic->name, irq); +#endif + /* We always EOI on end_irq() even for edge interrupts since that + * should only lower the priority, the MPIC should have properly + * latched another edge interrupt coming in anyway + */ + + mpic_eoi(mpic); +} + #ifdef CONFIG_MPIC_BROKEN_U3 + +static void mpic_unmask_ht_irq(unsigned int irq) +{ struct mpic *mpic = mpic_from_irq(irq); - unsigned int src = irq - mpic->irq_offset; + unsigned int src = mpic_irq_to_hw(irq); - if (mpic_is_ht_interrupt(mpic, src)) - mpic_shutdown_ht_interrupt(mpic, src, irq_desc[irq].status); + mpic_unmask_irq(irq); -#endif /* CONFIG_MPIC_BROKEN_U3 */ + if (irq_desc[irq].status & IRQ_LEVEL) + mpic_ht_end_irq(mpic, src); +} + +static unsigned int mpic_startup_ht_irq(unsigned int irq) +{ + struct mpic *mpic = mpic_from_irq(irq); + unsigned int src = mpic_irq_to_hw(irq); + + mpic_unmask_irq(irq); + mpic_startup_ht_interrupt(mpic, src, irq_desc[irq].status); - mpic_disable_irq(irq); + return 0; } -static void mpic_end_irq(unsigned int irq) +static void mpic_shutdown_ht_irq(unsigned int irq) { struct mpic *mpic = mpic_from_irq(irq); + unsigned int src = mpic_irq_to_hw(irq); + + mpic_shutdown_ht_interrupt(mpic, src, irq_desc[irq].status); + mpic_mask_irq(irq); +} + +static void mpic_end_ht_irq(unsigned int irq) +{ + struct mpic *mpic = mpic_from_irq(irq); + unsigned int src = mpic_irq_to_hw(irq); #ifdef DEBUG_IRQ DBG("%s: end_irq: %d\n", mpic->name, irq); @@ -504,30 +503,25 @@ static void mpic_end_irq(unsigned int irq) * latched another edge interrupt coming in anyway */ -#ifdef CONFIG_MPIC_BROKEN_U3 - if (mpic->flags & MPIC_BROKEN_U3) { - unsigned int src = irq - mpic->irq_offset; - if (mpic_is_ht_interrupt(mpic, src) && - (irq_desc[irq].status & IRQ_LEVEL)) - mpic_ht_end_irq(mpic, src); - } -#endif /* CONFIG_MPIC_BROKEN_U3 */ - + if (irq_desc[irq].status & IRQ_LEVEL) + mpic_ht_end_irq(mpic, src); mpic_eoi(mpic); } +#endif /* CONFIG_MPIC_BROKEN_U3 */ + #ifdef CONFIG_SMP -static void mpic_enable_ipi(unsigned int irq) +static void mpic_unmask_ipi(unsigned int irq) { struct mpic *mpic = mpic_from_ipi(irq); - unsigned int src = irq - mpic->ipi_offset; + unsigned int src = mpic_irq_to_hw(irq) - MPIC_VEC_IPI_0; DBG("%s: enable_ipi: %d (ipi %d)\n", mpic->name, irq, src); mpic_ipi_write(src, mpic_ipi_read(src) & ~MPIC_VECPRI_MASK); } -static void mpic_disable_ipi(unsigned int irq) +static void mpic_mask_ipi(unsigned int irq) { /* NEVER disable an IPI... that's just plain wrong! */ } @@ -551,29 +545,176 @@ static void mpic_end_ipi(unsigned int irq) static void mpic_set_affinity(unsigned int irq, cpumask_t cpumask) { struct mpic *mpic = mpic_from_irq(irq); + unsigned int src = mpic_irq_to_hw(irq); cpumask_t tmp; cpus_and(tmp, cpumask, cpu_online_map); - mpic_irq_write(irq - mpic->irq_offset, MPIC_IRQ_DESTINATION, + mpic_irq_write(src, MPIC_IRQ_DESTINATION, mpic_physmask(cpus_addr(tmp)[0])); } +static unsigned int mpic_flags_to_vecpri(unsigned int flags, int *level) +{ + unsigned int vecpri; + + /* Now convert sense value */ + switch(flags & IRQ_TYPE_SENSE_MASK) { + case IRQ_TYPE_EDGE_RISING: + vecpri = MPIC_VECPRI_SENSE_EDGE | + MPIC_VECPRI_POLARITY_POSITIVE; + *level = 0; + break; + case IRQ_TYPE_EDGE_FALLING: + vecpri = MPIC_VECPRI_SENSE_EDGE | + MPIC_VECPRI_POLARITY_NEGATIVE; + *level = 0; + break; + case IRQ_TYPE_LEVEL_HIGH: + vecpri = MPIC_VECPRI_SENSE_LEVEL | + MPIC_VECPRI_POLARITY_POSITIVE; + *level = 1; + break; + case IRQ_TYPE_LEVEL_LOW: + default: + vecpri = MPIC_VECPRI_SENSE_LEVEL | + MPIC_VECPRI_POLARITY_NEGATIVE; + *level = 1; + } + return vecpri; +} + +static struct irq_chip mpic_irq_chip = { + .mask = mpic_mask_irq, + .unmask = mpic_unmask_irq, + .eoi = mpic_end_irq, +}; + +#ifdef CONFIG_SMP +static struct irq_chip mpic_ipi_chip = { + .mask = mpic_mask_ipi, + .unmask = mpic_unmask_ipi, + .eoi = mpic_end_ipi, +}; +#endif /* CONFIG_SMP */ + +#ifdef CONFIG_MPIC_BROKEN_U3 +static struct irq_chip mpic_irq_ht_chip = { + .startup = mpic_startup_ht_irq, + .shutdown = mpic_shutdown_ht_irq, + .mask = mpic_mask_irq, + .unmask = mpic_unmask_ht_irq, + .eoi = mpic_end_ht_irq, +}; +#endif /* CONFIG_MPIC_BROKEN_U3 */ + + +static int mpic_host_match(struct irq_host *h, struct device_node *node) +{ + struct mpic *mpic = h->host_data; + + /* Exact match, unless mpic node is NULL */ + return mpic->of_node == NULL || mpic->of_node == node; +} + +static int mpic_host_map(struct irq_host *h, unsigned int virq, + irq_hw_number_t hw, unsigned int flags) +{ + struct irq_desc *desc = get_irq_desc(virq); + struct irq_chip *chip; + struct mpic *mpic = h->host_data; + unsigned int vecpri = MPIC_VECPRI_SENSE_LEVEL | + MPIC_VECPRI_POLARITY_NEGATIVE; + int level; + + pr_debug("mpic: map virq %d, hwirq 0x%lx, flags: 0x%x\n", + virq, hw, flags); + + if (hw == MPIC_VEC_SPURRIOUS) + return -EINVAL; +#ifdef CONFIG_SMP + else if (hw >= MPIC_VEC_IPI_0) { + WARN_ON(!(mpic->flags & MPIC_PRIMARY)); + + pr_debug("mpic: mapping as IPI\n"); + set_irq_chip_data(virq, mpic); + set_irq_chip_and_handler(virq, &mpic->hc_ipi, + handle_percpu_irq); + return 0; + } +#endif /* CONFIG_SMP */ + + if (hw >= mpic->irq_count) + return -EINVAL; + + /* If no sense provided, check default sense array */ + if (((flags & IRQ_TYPE_SENSE_MASK) == IRQ_TYPE_NONE) && + mpic->senses && hw < mpic->senses_count) + flags |= mpic->senses[hw]; + + vecpri = mpic_flags_to_vecpri(flags, &level); + if (level) + desc->status |= IRQ_LEVEL; + chip = &mpic->hc_irq; + +#ifdef CONFIG_MPIC_BROKEN_U3 + /* Check for HT interrupts, override vecpri */ + if (mpic_is_ht_interrupt(mpic, hw)) { + vecpri &= ~(MPIC_VECPRI_SENSE_MASK | + MPIC_VECPRI_POLARITY_MASK); + vecpri |= MPIC_VECPRI_POLARITY_POSITIVE; + chip = &mpic->hc_ht_irq; + } +#endif + + /* Reconfigure irq */ + vecpri |= MPIC_VECPRI_MASK | hw | (8 << MPIC_VECPRI_PRIORITY_SHIFT); + mpic_irq_write(hw, MPIC_IRQ_VECTOR_PRI, vecpri); + + pr_debug("mpic: mapping as IRQ\n"); + + set_irq_chip_data(virq, mpic); + set_irq_chip_and_handler(virq, chip, handle_fasteoi_irq); + return 0; +} + +static int mpic_host_xlate(struct irq_host *h, struct device_node *ct, + u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_flags) + +{ + static unsigned char map_mpic_senses[4] = { + IRQ_TYPE_EDGE_RISING, + IRQ_TYPE_LEVEL_LOW, + IRQ_TYPE_LEVEL_HIGH, + IRQ_TYPE_EDGE_FALLING, + }; + + *out_hwirq = intspec[0]; + if (intsize > 1 && intspec[1] < 4) + *out_flags = map_mpic_senses[intspec[1]]; + else + *out_flags = IRQ_TYPE_NONE; + + return 0; +} + +static struct irq_host_ops mpic_host_ops = { + .match = mpic_host_match, + .map = mpic_host_map, + .xlate = mpic_host_xlate, +}; /* * Exported functions */ - -struct mpic * __init mpic_alloc(unsigned long phys_addr, +struct mpic * __init mpic_alloc(struct device_node *node, + unsigned long phys_addr, unsigned int flags, unsigned int isu_size, - unsigned int irq_offset, unsigned int irq_count, - unsigned int ipi_offset, - unsigned char *senses, - unsigned int senses_count, const char *name) { struct mpic *mpic; @@ -585,33 +726,38 @@ struct mpic * __init mpic_alloc(unsigned long phys_addr, if (mpic == NULL) return NULL; - memset(mpic, 0, sizeof(struct mpic)); mpic->name = name; + mpic->of_node = node ? of_node_get(node) : NULL; + mpic->irqhost = irq_alloc_host(IRQ_HOST_MAP_LINEAR, 256, + &mpic_host_ops, + MPIC_VEC_SPURRIOUS); + if (mpic->irqhost == NULL) { + of_node_put(node); + return NULL; + } + + mpic->irqhost->host_data = mpic; + mpic->hc_irq = mpic_irq_chip; mpic->hc_irq.typename = name; - mpic->hc_irq.startup = mpic_startup_irq; - mpic->hc_irq.shutdown = mpic_shutdown_irq; - mpic->hc_irq.enable = mpic_enable_irq; - mpic->hc_irq.disable = mpic_disable_irq; - mpic->hc_irq.end = mpic_end_irq; if (flags & MPIC_PRIMARY) mpic->hc_irq.set_affinity = mpic_set_affinity; +#ifdef CONFIG_MPIC_BROKEN_U3 + mpic->hc_ht_irq = mpic_irq_ht_chip; + mpic->hc_ht_irq.typename = name; + if (flags & MPIC_PRIMARY) + mpic->hc_ht_irq.set_affinity = mpic_set_affinity; +#endif /* CONFIG_MPIC_BROKEN_U3 */ #ifdef CONFIG_SMP + mpic->hc_ipi = mpic_ipi_chip; mpic->hc_ipi.typename = name; - mpic->hc_ipi.enable = mpic_enable_ipi; - mpic->hc_ipi.disable = mpic_disable_ipi; - mpic->hc_ipi.end = mpic_end_ipi; #endif /* CONFIG_SMP */ mpic->flags = flags; mpic->isu_size = isu_size; - mpic->irq_offset = irq_offset; mpic->irq_count = irq_count; - mpic->ipi_offset = ipi_offset; mpic->num_sources = 0; /* so far */ - mpic->senses = senses; - mpic->senses_count = senses_count; /* Map the global registers */ mpic->gregs = ioremap(phys_addr + MPIC_GREG_BASE, 0x1000); @@ -679,8 +825,10 @@ struct mpic * __init mpic_alloc(unsigned long phys_addr, mpic->next = mpics; mpics = mpic; - if (flags & MPIC_PRIMARY) + if (flags & MPIC_PRIMARY) { mpic_primary = mpic; + irq_set_default_host(mpic->irqhost); + } return mpic; } @@ -697,26 +845,10 @@ void __init mpic_assign_isu(struct mpic *mpic, unsigned int isu_num, mpic->num_sources = isu_first + mpic->isu_size; } -void __init mpic_setup_cascade(unsigned int irq, mpic_cascade_t handler, - void *data) +void __init mpic_set_default_senses(struct mpic *mpic, u8 *senses, int count) { - struct mpic *mpic = mpic_find(irq, NULL); - unsigned long flags; - - /* Synchronization here is a bit dodgy, so don't try to replace cascade - * interrupts on the fly too often ... but normally it's set up at boot. - */ - spin_lock_irqsave(&mpic_lock, flags); - if (mpic->cascade) - mpic_disable_irq(mpic->cascade_vec + mpic->irq_offset); - mpic->cascade = NULL; - wmb(); - mpic->cascade_vec = irq - mpic->irq_offset; - mpic->cascade_data = data; - wmb(); - mpic->cascade = handler; - mpic_enable_irq(irq); - spin_unlock_irqrestore(&mpic_lock, flags); + mpic->senses = senses; + mpic->senses_count = count; } void __init mpic_init(struct mpic *mpic) @@ -724,6 +856,11 @@ void __init mpic_init(struct mpic *mpic) int i; BUG_ON(mpic->num_sources == 0); + WARN_ON(mpic->num_sources > MPIC_VEC_IPI_0); + + /* Sanitize source count */ + if (mpic->num_sources > MPIC_VEC_IPI_0) + mpic->num_sources = MPIC_VEC_IPI_0; printk(KERN_INFO "mpic: Initializing for %d sources\n", mpic->num_sources); @@ -747,12 +884,6 @@ void __init mpic_init(struct mpic *mpic) MPIC_VECPRI_MASK | (10 << MPIC_VECPRI_PRIORITY_SHIFT) | (MPIC_VEC_IPI_0 + i)); -#ifdef CONFIG_SMP - if (!(mpic->flags & MPIC_PRIMARY)) - continue; - irq_desc[mpic->ipi_offset+i].status |= IRQ_PER_CPU; - irq_desc[mpic->ipi_offset+i].chip = &mpic->hc_ipi; -#endif /* CONFIG_SMP */ } /* Initialize interrupt sources */ @@ -763,31 +894,21 @@ void __init mpic_init(struct mpic *mpic) /* Do the HT PIC fixups on U3 broken mpic */ DBG("MPIC flags: %x\n", mpic->flags); if ((mpic->flags & MPIC_BROKEN_U3) && (mpic->flags & MPIC_PRIMARY)) - mpic_scan_ht_pics(mpic); + mpic_scan_ht_pics(mpic); #endif /* CONFIG_MPIC_BROKEN_U3 */ for (i = 0; i < mpic->num_sources; i++) { /* start with vector = source number, and masked */ u32 vecpri = MPIC_VECPRI_MASK | i | (8 << MPIC_VECPRI_PRIORITY_SHIFT); - int level = 0; + int level = 1; - /* if it's an IPI, we skip it */ - if ((mpic->irq_offset + i) >= (mpic->ipi_offset + i) && - (mpic->irq_offset + i) < (mpic->ipi_offset + i + 4)) - continue; - /* do senses munging */ - if (mpic->senses && i < mpic->senses_count) { - if (mpic->senses[i] & IRQ_SENSE_LEVEL) - vecpri |= MPIC_VECPRI_SENSE_LEVEL; - if (mpic->senses[i] & IRQ_POLARITY_POSITIVE) - vecpri |= MPIC_VECPRI_POLARITY_POSITIVE; - } else + if (mpic->senses && i < mpic->senses_count) + vecpri = mpic_flags_to_vecpri(mpic->senses[i], + &level); + else vecpri |= MPIC_VECPRI_SENSE_LEVEL; - /* remember if it was a level interrupts */ - level = (vecpri & MPIC_VECPRI_SENSE_LEVEL); - /* deal with broken U3 */ if (mpic->flags & MPIC_BROKEN_U3) { #ifdef CONFIG_MPIC_BROKEN_U3 @@ -808,12 +929,6 @@ void __init mpic_init(struct mpic *mpic) mpic_irq_write(i, MPIC_IRQ_VECTOR_PRI, vecpri); mpic_irq_write(i, MPIC_IRQ_DESTINATION, 1 << hard_smp_processor_id()); - - /* init linux descriptors */ - if (i < mpic->irq_count) { - irq_desc[mpic->irq_offset+i].status = level ? IRQ_LEVEL : 0; - irq_desc[mpic->irq_offset+i].chip = &mpic->hc_irq; - } } /* Init spurrious vector */ @@ -854,19 +969,20 @@ void mpic_irq_set_priority(unsigned int irq, unsigned int pri) { int is_ipi; struct mpic *mpic = mpic_find(irq, &is_ipi); + unsigned int src = mpic_irq_to_hw(irq); unsigned long flags; u32 reg; spin_lock_irqsave(&mpic_lock, flags); if (is_ipi) { - reg = mpic_ipi_read(irq - mpic->ipi_offset) & + reg = mpic_ipi_read(src - MPIC_VEC_IPI_0) & ~MPIC_VECPRI_PRIORITY_MASK; - mpic_ipi_write(irq - mpic->ipi_offset, + mpic_ipi_write(src - MPIC_VEC_IPI_0, reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT)); } else { - reg = mpic_irq_read(irq - mpic->irq_offset,MPIC_IRQ_VECTOR_PRI) + reg = mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & ~MPIC_VECPRI_PRIORITY_MASK; - mpic_irq_write(irq - mpic->irq_offset, MPIC_IRQ_VECTOR_PRI, + mpic_irq_write(src, MPIC_IRQ_VECTOR_PRI, reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT)); } spin_unlock_irqrestore(&mpic_lock, flags); @@ -876,14 +992,15 @@ unsigned int mpic_irq_get_priority(unsigned int irq) { int is_ipi; struct mpic *mpic = mpic_find(irq, &is_ipi); + unsigned int src = mpic_irq_to_hw(irq); unsigned long flags; u32 reg; spin_lock_irqsave(&mpic_lock, flags); if (is_ipi) - reg = mpic_ipi_read(irq - mpic->ipi_offset); + reg = mpic_ipi_read(src = MPIC_VEC_IPI_0); else - reg = mpic_irq_read(irq - mpic->irq_offset, MPIC_IRQ_VECTOR_PRI); + reg = mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI); spin_unlock_irqrestore(&mpic_lock, flags); return (reg & MPIC_VECPRI_PRIORITY_MASK) >> MPIC_VECPRI_PRIORITY_SHIFT; } @@ -978,37 +1095,20 @@ void mpic_send_ipi(unsigned int ipi_no, unsigned int cpu_mask) mpic_physmask(cpu_mask & cpus_addr(cpu_online_map)[0])); } -int mpic_get_one_irq(struct mpic *mpic, struct pt_regs *regs) +unsigned int mpic_get_one_irq(struct mpic *mpic, struct pt_regs *regs) { - u32 irq; + u32 src; - irq = mpic_cpu_read(MPIC_CPU_INTACK) & MPIC_VECPRI_VECTOR_MASK; + src = mpic_cpu_read(MPIC_CPU_INTACK) & MPIC_VECPRI_VECTOR_MASK; #ifdef DEBUG_LOW - DBG("%s: get_one_irq(): %d\n", mpic->name, irq); + DBG("%s: get_one_irq(): %d\n", mpic->name, src); #endif - if (mpic->cascade && irq == mpic->cascade_vec) { -#ifdef DEBUG_LOW - DBG("%s: cascading ...\n", mpic->name); -#endif - irq = mpic->cascade(regs, mpic->cascade_data); - mpic_eoi(mpic); - return irq; - } - if (unlikely(irq == MPIC_VEC_SPURRIOUS)) - return -1; - if (irq < MPIC_VEC_IPI_0) { -#ifdef DEBUG_IRQ - DBG("%s: irq %d\n", mpic->name, irq + mpic->irq_offset); -#endif - return irq + mpic->irq_offset; - } -#ifdef DEBUG_IPI - DBG("%s: ipi %d !\n", mpic->name, irq - MPIC_VEC_IPI_0); -#endif - return irq - MPIC_VEC_IPI_0 + mpic->ipi_offset; + if (unlikely(src == MPIC_VEC_SPURRIOUS)) + return NO_IRQ; + return irq_linear_revmap(mpic->irqhost, src); } -int mpic_get_irq(struct pt_regs *regs) +unsigned int mpic_get_irq(struct pt_regs *regs) { struct mpic *mpic = mpic_primary; @@ -1022,25 +1122,27 @@ int mpic_get_irq(struct pt_regs *regs) void mpic_request_ipis(void) { struct mpic *mpic = mpic_primary; - + int i; + static char *ipi_names[] = { + "IPI0 (call function)", + "IPI1 (reschedule)", + "IPI2 (unused)", + "IPI3 (debugger break)", + }; BUG_ON(mpic == NULL); - - printk("requesting IPIs ... \n"); - /* - * IPIs are marked IRQF_DISABLED as they must run with irqs - * disabled - */ - request_irq(mpic->ipi_offset+0, mpic_ipi_action, IRQF_DISABLED, - "IPI0 (call function)", mpic); - request_irq(mpic->ipi_offset+1, mpic_ipi_action, IRQF_DISABLED, - "IPI1 (reschedule)", mpic); - request_irq(mpic->ipi_offset+2, mpic_ipi_action, IRQF_DISABLED, - "IPI2 (unused)", mpic); - request_irq(mpic->ipi_offset+3, mpic_ipi_action, IRQF_DISABLED, - "IPI3 (debugger break)", mpic); - - printk("IPIs requested... \n"); + printk(KERN_INFO "mpic: requesting IPIs ... \n"); + + for (i = 0; i < 4; i++) { + unsigned int vipi = irq_create_mapping(mpic->irqhost, + MPIC_VEC_IPI_0 + i, 0); + if (vipi == NO_IRQ) { + printk(KERN_ERR "Failed to map IPI %d\n", i); + break; + } + request_irq(vipi, mpic_ipi_action, IRQF_DISABLED, + ipi_names[i], mpic); + } } void smp_mpic_message_pass(int target, int msg) diff --git a/arch/ppc/syslib/Makefile b/arch/ppc/syslib/Makefile index 490749ca88f9..2497bbc07e76 100644 --- a/arch/ppc/syslib/Makefile +++ b/arch/ppc/syslib/Makefile @@ -104,3 +104,5 @@ obj-$(CONFIG_PPC_MPC52xx) += mpc52xx_setup.o mpc52xx_pic.o \ ifeq ($(CONFIG_PPC_MPC52xx),y) obj-$(CONFIG_PCI) += mpc52xx_pci.o endif + +obj-$(CONFIG_PPC_I8259) += i8259.o diff --git a/arch/ppc/syslib/i8259.c b/arch/ppc/syslib/i8259.c new file mode 100644 index 000000000000..eb35353af837 --- /dev/null +++ b/arch/ppc/syslib/i8259.c @@ -0,0 +1,212 @@ +/* + * i8259 interrupt controller driver. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/init.h> +#include <linux/ioport.h> +#include <linux/interrupt.h> +#include <asm/io.h> +#include <asm/i8259.h> + +static volatile void __iomem *pci_intack; /* RO, gives us the irq vector */ + +static unsigned char cached_8259[2] = { 0xff, 0xff }; +#define cached_A1 (cached_8259[0]) +#define cached_21 (cached_8259[1]) + +static DEFINE_SPINLOCK(i8259_lock); + +static int i8259_pic_irq_offset; + +/* + * Acknowledge the IRQ using either the PCI host bridge's interrupt + * acknowledge feature or poll. How i8259_init() is called determines + * which is called. It should be noted that polling is broken on some + * IBM and Motorola PReP boxes so we must use the int-ack feature on them. + */ +int i8259_irq(struct pt_regs *regs) +{ + int irq; + + spin_lock(&i8259_lock); + + /* Either int-ack or poll for the IRQ */ + if (pci_intack) + irq = readb(pci_intack); + else { + /* Perform an interrupt acknowledge cycle on controller 1. */ + outb(0x0C, 0x20); /* prepare for poll */ + irq = inb(0x20) & 7; + if (irq == 2 ) { + /* + * Interrupt is cascaded so perform interrupt + * acknowledge on controller 2. + */ + outb(0x0C, 0xA0); /* prepare for poll */ + irq = (inb(0xA0) & 7) + 8; + } + } + + if (irq == 7) { + /* + * This may be a spurious interrupt. + * + * Read the interrupt status register (ISR). If the most + * significant bit is not set then there is no valid + * interrupt. + */ + if (!pci_intack) + outb(0x0B, 0x20); /* ISR register */ + if(~inb(0x20) & 0x80) + irq = -1; + } + + spin_unlock(&i8259_lock); + return irq + i8259_pic_irq_offset; +} + +static void i8259_mask_and_ack_irq(unsigned int irq_nr) +{ + unsigned long flags; + + spin_lock_irqsave(&i8259_lock, flags); + irq_nr -= i8259_pic_irq_offset; + if (irq_nr > 7) { + cached_A1 |= 1 << (irq_nr-8); + inb(0xA1); /* DUMMY */ + outb(cached_A1, 0xA1); + outb(0x20, 0xA0); /* Non-specific EOI */ + outb(0x20, 0x20); /* Non-specific EOI to cascade */ + } else { + cached_21 |= 1 << irq_nr; + inb(0x21); /* DUMMY */ + outb(cached_21, 0x21); + outb(0x20, 0x20); /* Non-specific EOI */ + } + spin_unlock_irqrestore(&i8259_lock, flags); +} + +static void i8259_set_irq_mask(int irq_nr) +{ + outb(cached_A1,0xA1); + outb(cached_21,0x21); +} + +static void i8259_mask_irq(unsigned int irq_nr) +{ + unsigned long flags; + + spin_lock_irqsave(&i8259_lock, flags); + irq_nr -= i8259_pic_irq_offset; + if (irq_nr < 8) + cached_21 |= 1 << irq_nr; + else + cached_A1 |= 1 << (irq_nr-8); + i8259_set_irq_mask(irq_nr); + spin_unlock_irqrestore(&i8259_lock, flags); +} + +static void i8259_unmask_irq(unsigned int irq_nr) +{ + unsigned long flags; + + spin_lock_irqsave(&i8259_lock, flags); + irq_nr -= i8259_pic_irq_offset; + if (irq_nr < 8) + cached_21 &= ~(1 << irq_nr); + else + cached_A1 &= ~(1 << (irq_nr-8)); + i8259_set_irq_mask(irq_nr); + spin_unlock_irqrestore(&i8259_lock, flags); +} + +static struct irq_chip i8259_pic = { + .typename = " i8259 ", + .mask = i8259_mask_irq, + .unmask = i8259_unmask_irq, + .mask_ack = i8259_mask_and_ack_irq, +}; + +static struct resource pic1_iores = { + .name = "8259 (master)", + .start = 0x20, + .end = 0x21, + .flags = IORESOURCE_BUSY, +}; + +static struct resource pic2_iores = { + .name = "8259 (slave)", + .start = 0xa0, + .end = 0xa1, + .flags = IORESOURCE_BUSY, +}; + +static struct resource pic_edgectrl_iores = { + .name = "8259 edge control", + .start = 0x4d0, + .end = 0x4d1, + .flags = IORESOURCE_BUSY, +}; + +static struct irqaction i8259_irqaction = { + .handler = no_action, + .flags = SA_INTERRUPT, + .mask = CPU_MASK_NONE, + .name = "82c59 secondary cascade", +}; + +/* + * i8259_init() + * intack_addr - PCI interrupt acknowledge (real) address which will return + * the active irq from the 8259 + */ +void __init i8259_init(unsigned long intack_addr, int offset) +{ + unsigned long flags; + int i; + + spin_lock_irqsave(&i8259_lock, flags); + i8259_pic_irq_offset = offset; + + /* init master interrupt controller */ + outb(0x11, 0x20); /* Start init sequence */ + outb(0x00, 0x21); /* Vector base */ + outb(0x04, 0x21); /* edge tiggered, Cascade (slave) on IRQ2 */ + outb(0x01, 0x21); /* Select 8086 mode */ + + /* init slave interrupt controller */ + outb(0x11, 0xA0); /* Start init sequence */ + outb(0x08, 0xA1); /* Vector base */ + outb(0x02, 0xA1); /* edge triggered, Cascade (slave) on IRQ2 */ + outb(0x01, 0xA1); /* Select 8086 mode */ + + /* always read ISR */ + outb(0x0B, 0x20); + outb(0x0B, 0xA0); + + /* Mask all interrupts */ + outb(cached_A1, 0xA1); + outb(cached_21, 0x21); + + spin_unlock_irqrestore(&i8259_lock, flags); + + for (i = 0; i < NUM_ISA_INTERRUPTS; ++i) { + set_irq_chip_and_handler(offset + i, &i8259_pic, + handle_level_irq); + irq_desc[offset + i].status |= IRQ_LEVEL; + } + + /* reserve our resources */ + setup_irq(offset + 2, &i8259_irqaction); + request_resource(&ioport_resource, &pic1_iores); + request_resource(&ioport_resource, &pic2_iores); + request_resource(&ioport_resource, &pic_edgectrl_iores); + + if (intack_addr != 0) + pci_intack = ioremap(intack_addr, 1); + +} diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 821a141889de..224fbff79969 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -7,6 +7,14 @@ config MMU bool default y +config LOCKDEP_SUPPORT + bool + default y + +config STACKTRACE_SUPPORT + bool + default y + config RWSEM_GENERIC_SPINLOCK bool diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug index f53b6d5300e5..2283933a9a93 100644 --- a/arch/s390/Kconfig.debug +++ b/arch/s390/Kconfig.debug @@ -1,5 +1,9 @@ menu "Kernel hacking" +config TRACE_IRQFLAGS_SUPPORT + bool + default y + source "lib/Kconfig.debug" endmenu diff --git a/arch/s390/Makefile b/arch/s390/Makefile index b3791fb094a8..74ef57dcfa60 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -34,6 +34,11 @@ cflags-$(CONFIG_MARCH_G5) += $(call cc-option,-march=g5) cflags-$(CONFIG_MARCH_Z900) += $(call cc-option,-march=z900) cflags-$(CONFIG_MARCH_Z990) += $(call cc-option,-march=z990) +# +# Prevent tail-call optimizations, to get clearer backtraces: +# +cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls + # old style option for packed stacks ifeq ($(call cc-option-yn,-mkernel-backchain),y) cflags-$(CONFIG_PACK_STACK) += -mkernel-backchain -D__PACK_STACK diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 9269b5788fac..eabf00a6f770 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -21,6 +21,7 @@ obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o \ obj-$(CONFIG_BINFMT_ELF32) += binfmt_elf32.o obj-$(CONFIG_VIRT_TIMER) += vtime.o +obj-$(CONFIG_STACKTRACE) += stacktrace.o # Kexec part S390_KEXEC_OBJS := machine_kexec.o crash.o diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index d8948c342caf..5b5799ac8f83 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -58,6 +58,21 @@ STACK_SIZE = 1 << STACK_SHIFT #define BASED(name) name-system_call(%r13) +#ifdef CONFIG_TRACE_IRQFLAGS + .macro TRACE_IRQS_ON + l %r1,BASED(.Ltrace_irq_on) + basr %r14,%r1 + .endm + + .macro TRACE_IRQS_OFF + l %r1,BASED(.Ltrace_irq_off) + basr %r14,%r1 + .endm +#else +#define TRACE_IRQS_ON +#define TRACE_IRQS_OFF +#endif + /* * Register usage in interrupt handlers: * R9 - pointer to current task structure @@ -361,6 +376,7 @@ ret_from_fork: st %r15,SP_R15(%r15) # store stack pointer for new kthread 0: l %r1,BASED(.Lschedtail) basr %r14,%r1 + TRACE_IRQS_ON stosm __SF_EMPTY(%r15),0x03 # reenable interrupts b BASED(sysc_return) @@ -516,6 +532,7 @@ pgm_no_vtime3: mvc __THREAD_per+__PER_address(4,%r1),__LC_PER_ADDRESS mvc __THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID oi __TI_flags+3(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP + TRACE_IRQS_ON stosm __SF_EMPTY(%r15),0x03 # reenable interrupts b BASED(sysc_do_svc) @@ -539,9 +556,11 @@ io_int_handler: io_no_vtime: #endif l %r9,__LC_THREAD_INFO # load pointer to thread_info struct + TRACE_IRQS_OFF l %r1,BASED(.Ldo_IRQ) # load address of do_IRQ la %r2,SP_PTREGS(%r15) # address of register-save area basr %r14,%r1 # branch to standard irq handler + TRACE_IRQS_ON io_return: tm SP_PSW+1(%r15),0x01 # returning to user ? @@ -651,10 +670,12 @@ ext_int_handler: ext_no_vtime: #endif l %r9,__LC_THREAD_INFO # load pointer to thread_info struct + TRACE_IRQS_OFF la %r2,SP_PTREGS(%r15) # address of register-save area lh %r3,__LC_EXT_INT_CODE # get interruption code l %r1,BASED(.Ldo_extint) basr %r14,%r1 + TRACE_IRQS_ON b BASED(io_return) __critical_end: @@ -731,8 +752,10 @@ mcck_no_vtime: stosm __SF_EMPTY(%r15),0x04 # turn dat on tm __TI_flags+3(%r9),_TIF_MCCK_PENDING bno BASED(mcck_return) + TRACE_IRQS_OFF l %r1,BASED(.Ls390_handle_mcck) basr %r14,%r1 # call machine check handler + TRACE_IRQS_ON mcck_return: mvc __LC_RETURN_MCCK_PSW(8),SP_PSW(%r15) # move return PSW ni __LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit @@ -1012,7 +1035,11 @@ cleanup_io_leave_insn: .Lvfork: .long sys_vfork .Lschedtail: .long schedule_tail .Lsysc_table: .long sys_call_table - +#ifdef CONFIG_TRACE_IRQFLAGS +.Ltrace_irq_on:.long trace_hardirqs_on +.Ltrace_irq_off: + .long trace_hardirqs_off +#endif .Lcritical_start: .long __critical_start + 0x80000000 .Lcritical_end: diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 1ca499fa54b4..56f5f613b868 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -58,6 +58,19 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK | _TIF_NEED_RESCHED | \ #define BASED(name) name-system_call(%r13) +#ifdef CONFIG_TRACE_IRQFLAGS + .macro TRACE_IRQS_ON + brasl %r14,trace_hardirqs_on + .endm + + .macro TRACE_IRQS_OFF + brasl %r14,trace_hardirqs_off + .endm +#else +#define TRACE_IRQS_ON +#define TRACE_IRQS_OFF +#endif + .macro STORE_TIMER lc_offset #ifdef CONFIG_VIRT_CPU_ACCOUNTING stpt \lc_offset @@ -354,6 +367,7 @@ ret_from_fork: jo 0f stg %r15,SP_R15(%r15) # store stack pointer for new kthread 0: brasl %r14,schedule_tail + TRACE_IRQS_ON stosm 24(%r15),0x03 # reenable interrupts j sysc_return @@ -535,6 +549,7 @@ pgm_no_vtime3: mvc __THREAD_per+__PER_address(8,%r1),__LC_PER_ADDRESS mvc __THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID oi __TI_flags+7(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP + TRACE_IRQS_ON stosm __SF_EMPTY(%r15),0x03 # reenable interrupts j sysc_do_svc @@ -557,8 +572,10 @@ io_int_handler: io_no_vtime: #endif lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct + TRACE_IRQS_OFF la %r2,SP_PTREGS(%r15) # address of register-save area brasl %r14,do_IRQ # call standard irq handler + TRACE_IRQS_ON io_return: tm SP_PSW+1(%r15),0x01 # returning to user ? @@ -665,9 +682,11 @@ ext_int_handler: ext_no_vtime: #endif lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct + TRACE_IRQS_OFF la %r2,SP_PTREGS(%r15) # address of register-save area llgh %r3,__LC_EXT_INT_CODE # get interruption code brasl %r14,do_extint + TRACE_IRQS_ON j io_return __critical_end: @@ -743,7 +762,9 @@ mcck_no_vtime: stosm __SF_EMPTY(%r15),0x04 # turn dat on tm __TI_flags+7(%r9),_TIF_MCCK_PENDING jno mcck_return + TRACE_IRQS_OFF brasl %r14,s390_handle_mcck + TRACE_IRQS_ON mcck_return: mvc __LC_RETURN_MCCK_PSW(16),SP_PSW(%r15) # move return PSW ni __LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 480b6a5fef3a..1eef50918615 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -69,10 +69,6 @@ asmlinkage void do_softirq(void) local_irq_save(flags); - account_system_vtime(current); - - local_bh_disable(); - if (local_softirq_pending()) { /* Get current stack pointer. */ asm volatile("la %0,0(15)" : "=a" (old)); @@ -95,10 +91,6 @@ asmlinkage void do_softirq(void) __do_softirq(); } - account_system_vtime(current); - - __local_bh_enable(); - local_irq_restore(flags); } diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 1f9399191794..78c8e5548caf 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -142,6 +142,7 @@ static void default_idle(void) return; } + trace_hardirqs_on(); /* Wait for external, I/O or machine check interrupt. */ __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_WAIT | PSW_MASK_IO | PSW_MASK_EXT); diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c new file mode 100644 index 000000000000..de83f38288d0 --- /dev/null +++ b/arch/s390/kernel/stacktrace.c @@ -0,0 +1,90 @@ +/* + * arch/s390/kernel/stacktrace.c + * + * Stack trace management functions + * + * Copyright (C) IBM Corp. 2006 + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> + */ + +#include <linux/sched.h> +#include <linux/stacktrace.h> +#include <linux/kallsyms.h> + +static inline unsigned long save_context_stack(struct stack_trace *trace, + unsigned int *skip, + unsigned long sp, + unsigned long low, + unsigned long high) +{ + struct stack_frame *sf; + struct pt_regs *regs; + unsigned long addr; + + while(1) { + sp &= PSW_ADDR_INSN; + if (sp < low || sp > high) + return sp; + sf = (struct stack_frame *)sp; + while(1) { + addr = sf->gprs[8] & PSW_ADDR_INSN; + if (!(*skip)) + trace->entries[trace->nr_entries++] = addr; + else + (*skip)--; + if (trace->nr_entries >= trace->max_entries) + return sp; + low = sp; + sp = sf->back_chain & PSW_ADDR_INSN; + if (!sp) + break; + if (sp <= low || sp > high - sizeof(*sf)) + return sp; + sf = (struct stack_frame *)sp; + } + /* Zero backchain detected, check for interrupt frame. */ + sp = (unsigned long)(sf + 1); + if (sp <= low || sp > high - sizeof(*regs)) + return sp; + regs = (struct pt_regs *)sp; + addr = regs->psw.addr & PSW_ADDR_INSN; + if (!(*skip)) + trace->entries[trace->nr_entries++] = addr; + else + (*skip)--; + if (trace->nr_entries >= trace->max_entries) + return sp; + low = sp; + sp = regs->gprs[15]; + } +} + +void save_stack_trace(struct stack_trace *trace, + struct task_struct *task, int all_contexts, + unsigned int skip) +{ + register unsigned long sp asm ("15"); + unsigned long orig_sp; + + sp &= PSW_ADDR_INSN; + orig_sp = sp; + + sp = save_context_stack(trace, &skip, sp, + S390_lowcore.panic_stack - PAGE_SIZE, + S390_lowcore.panic_stack); + if ((sp != orig_sp) && !all_contexts) + return; + sp = save_context_stack(trace, &skip, sp, + S390_lowcore.async_stack - ASYNC_SIZE, + S390_lowcore.async_stack); + if ((sp != orig_sp) && !all_contexts) + return; + if (task) + save_context_stack(trace, &skip, sp, + (unsigned long) task_stack_page(task), + (unsigned long) task_stack_page(task) + THREAD_SIZE); + else + save_context_stack(trace, &skip, sp, S390_lowcore.thread_info, + S390_lowcore.thread_info + THREAD_SIZE); + return; +} diff --git a/arch/um/kernel/tt/process_kern.c b/arch/um/kernel/tt/process_kern.c index a9c1443fc548..8368c2dbe635 100644 --- a/arch/um/kernel/tt/process_kern.c +++ b/arch/um/kernel/tt/process_kern.c @@ -119,7 +119,7 @@ void suspend_new_thread(int fd) panic("read failed in suspend_new_thread, err = %d", -err); } -void schedule_tail(task_t *prev); +void schedule_tail(struct task_struct *prev); static void new_thread_handler(int sig) { diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 7d51dd7201c3..37cfe7701f06 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -495,6 +495,7 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end) { } +#ifdef CONFIG_SMP void alternatives_smp_module_add(struct module *mod, char *name, void *locks, void *locks_end, void *text, void *text_end) @@ -504,3 +505,4 @@ void alternatives_smp_module_add(struct module *mod, char *name, void alternatives_smp_module_del(struct module *mod) { } +#endif diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index e856804c447f..28df7d88ce2c 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -24,6 +24,14 @@ config X86 bool default y +config LOCKDEP_SUPPORT + bool + default y + +config STACKTRACE_SUPPORT + bool + default y + config SEMAPHORE_SLEEPERS bool default y diff --git a/arch/x86_64/Kconfig.debug b/arch/x86_64/Kconfig.debug index 1d92ab56c0f9..775d211a5cf9 100644 --- a/arch/x86_64/Kconfig.debug +++ b/arch/x86_64/Kconfig.debug @@ -1,5 +1,9 @@ menu "Kernel hacking" +config TRACE_IRQFLAGS_SUPPORT + bool + default y + source "lib/Kconfig.debug" config DEBUG_RODATA diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S index c536fa98ea37..9b5bb413a6e9 100644 --- a/arch/x86_64/ia32/ia32entry.S +++ b/arch/x86_64/ia32/ia32entry.S @@ -13,6 +13,7 @@ #include <asm/thread_info.h> #include <asm/segment.h> #include <asm/vsyscall32.h> +#include <asm/irqflags.h> #include <linux/linkage.h> #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) @@ -75,6 +76,10 @@ ENTRY(ia32_sysenter_target) swapgs movq %gs:pda_kernelstack, %rsp addq $(PDA_STACKOFFSET),%rsp + /* + * No need to follow this irqs on/off section: the syscall + * disabled irqs, here we enable it straight after entry: + */ sti movl %ebp,%ebp /* zero extension */ pushq $__USER32_DS @@ -118,6 +123,7 @@ sysenter_do_call: movq %rax,RAX-ARGOFFSET(%rsp) GET_THREAD_INFO(%r10) cli + TRACE_IRQS_OFF testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) jnz int_ret_from_sys_call andl $~TS_COMPAT,threadinfo_status(%r10) @@ -132,6 +138,7 @@ sysenter_do_call: CFI_REGISTER rsp,rcx movl $VSYSCALL32_SYSEXIT,%edx /* User %eip */ CFI_REGISTER rip,rdx + TRACE_IRQS_ON swapgs sti /* sti only takes effect after the next instruction */ /* sysexit */ @@ -186,6 +193,10 @@ ENTRY(ia32_cstar_target) movl %esp,%r8d CFI_REGISTER rsp,r8 movq %gs:pda_kernelstack,%rsp + /* + * No need to follow this irqs on/off section: the syscall + * disabled irqs and here we enable it straight after entry: + */ sti SAVE_ARGS 8,1,1 movl %eax,%eax /* zero extension */ @@ -220,6 +231,7 @@ cstar_do_call: movq %rax,RAX-ARGOFFSET(%rsp) GET_THREAD_INFO(%r10) cli + TRACE_IRQS_OFF testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) jnz int_ret_from_sys_call andl $~TS_COMPAT,threadinfo_status(%r10) @@ -228,6 +240,7 @@ cstar_do_call: CFI_REGISTER rip,rcx movl EFLAGS-ARGOFFSET(%rsp),%r11d /*CFI_REGISTER rflags,r11*/ + TRACE_IRQS_ON movl RSP-ARGOFFSET(%rsp),%esp CFI_RESTORE rsp swapgs @@ -286,7 +299,11 @@ ENTRY(ia32_syscall) /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/ /*CFI_REL_OFFSET cs,CS-RIP*/ CFI_REL_OFFSET rip,RIP-RIP - swapgs + swapgs + /* + * No need to follow this irqs on/off section: the syscall + * disabled irqs and here we enable it straight after entry: + */ sti movl %eax,%eax pushq %rax diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index 819e84ec5b64..b5aaeafc1cd3 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile @@ -10,6 +10,7 @@ obj-y := process.o signal.o entry.o traps.o irq.o \ setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \ pci-dma.o pci-nommu.o alternative.o +obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_X86_MCE) += mce.o obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index ed92c2983606..d464dded68c0 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S @@ -41,13 +41,24 @@ #include <asm/thread_info.h> #include <asm/hw_irq.h> #include <asm/page.h> +#include <asm/irqflags.h> .code64 #ifndef CONFIG_PREEMPT #define retint_kernel retint_restore_args #endif - + + +.macro TRACE_IRQS_IRETQ offset=ARGOFFSET +#ifdef CONFIG_TRACE_IRQFLAGS + bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ + jnc 1f + TRACE_IRQS_ON +1: +#endif +.endm + /* * C code is not supposed to know about undefined top of stack. Every time * a C function with an pt_regs argument is called from the SYSCALL based @@ -194,6 +205,10 @@ ENTRY(system_call) swapgs movq %rsp,%gs:pda_oldrsp movq %gs:pda_kernelstack,%rsp + /* + * No need to follow this irqs off/on section - it's straight + * and short: + */ sti SAVE_ARGS 8,1 movq %rax,ORIG_RAX-ARGOFFSET(%rsp) @@ -219,10 +234,15 @@ ret_from_sys_call: sysret_check: GET_THREAD_INFO(%rcx) cli + TRACE_IRQS_OFF movl threadinfo_flags(%rcx),%edx andl %edi,%edx CFI_REMEMBER_STATE jnz sysret_careful + /* + * sysretq will re-enable interrupts: + */ + TRACE_IRQS_ON movq RIP-ARGOFFSET(%rsp),%rcx CFI_REGISTER rip,rcx RESTORE_ARGS 0,-ARG_SKIP,1 @@ -237,6 +257,7 @@ sysret_careful: CFI_RESTORE_STATE bt $TIF_NEED_RESCHED,%edx jnc sysret_signal + TRACE_IRQS_ON sti pushq %rdi CFI_ADJUST_CFA_OFFSET 8 @@ -247,6 +268,7 @@ sysret_careful: /* Handle a signal */ sysret_signal: + TRACE_IRQS_ON sti testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx jz 1f @@ -261,6 +283,7 @@ sysret_signal: /* Use IRET because user could have changed frame. This works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ cli + TRACE_IRQS_OFF jmp int_with_check badsys: @@ -309,6 +332,7 @@ ENTRY(int_ret_from_sys_call) CFI_REL_OFFSET r10,R10-ARGOFFSET CFI_REL_OFFSET r11,R11-ARGOFFSET cli + TRACE_IRQS_OFF testl $3,CS-ARGOFFSET(%rsp) je retint_restore_args movl $_TIF_ALLWORK_MASK,%edi @@ -327,6 +351,7 @@ int_with_check: int_careful: bt $TIF_NEED_RESCHED,%edx jnc int_very_careful + TRACE_IRQS_ON sti pushq %rdi CFI_ADJUST_CFA_OFFSET 8 @@ -334,10 +359,12 @@ int_careful: popq %rdi CFI_ADJUST_CFA_OFFSET -8 cli + TRACE_IRQS_OFF jmp int_with_check /* handle signals and tracing -- both require a full stack frame */ int_very_careful: + TRACE_IRQS_ON sti SAVE_REST /* Check for syscall exit trace */ @@ -351,6 +378,7 @@ int_very_careful: CFI_ADJUST_CFA_OFFSET -8 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi cli + TRACE_IRQS_OFF jmp int_restore_rest int_signal: @@ -363,6 +391,7 @@ int_signal: int_restore_rest: RESTORE_REST cli + TRACE_IRQS_OFF jmp int_with_check CFI_ENDPROC END(int_ret_from_sys_call) @@ -484,6 +513,10 @@ END(stub_rt_sigreturn) swapgs 1: incl %gs:pda_irqcount # RED-PEN should check preempt count cmoveq %gs:pda_irqstackptr,%rsp + /* + * We entered an interrupt context - irqs are off: + */ + TRACE_IRQS_OFF call \func .endm @@ -493,6 +526,7 @@ ENTRY(common_interrupt) /* 0(%rsp): oldrsp-ARGOFFSET */ ret_from_intr: cli + TRACE_IRQS_OFF decl %gs:pda_irqcount leaveq CFI_DEF_CFA_REGISTER rsp @@ -515,9 +549,21 @@ retint_check: CFI_REMEMBER_STATE jnz retint_careful retint_swapgs: + /* + * The iretq could re-enable interrupts: + */ + cli + TRACE_IRQS_IRETQ swapgs + jmp restore_args + retint_restore_args: cli + /* + * The iretq could re-enable interrupts: + */ + TRACE_IRQS_IRETQ +restore_args: RESTORE_ARGS 0,8,0 iret_label: iretq @@ -530,6 +576,7 @@ iret_label: /* running with kernel gs */ bad_iret: movq $11,%rdi /* SIGSEGV */ + TRACE_IRQS_ON sti jmp do_exit .previous @@ -539,6 +586,7 @@ retint_careful: CFI_RESTORE_STATE bt $TIF_NEED_RESCHED,%edx jnc retint_signal + TRACE_IRQS_ON sti pushq %rdi CFI_ADJUST_CFA_OFFSET 8 @@ -547,11 +595,13 @@ retint_careful: CFI_ADJUST_CFA_OFFSET -8 GET_THREAD_INFO(%rcx) cli + TRACE_IRQS_OFF jmp retint_check retint_signal: testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx jz retint_swapgs + TRACE_IRQS_ON sti SAVE_REST movq $-1,ORIG_RAX(%rsp) @@ -560,6 +610,7 @@ retint_signal: call do_notify_resume RESTORE_REST cli + TRACE_IRQS_OFF movl $_TIF_NEED_RESCHED,%edi GET_THREAD_INFO(%rcx) jmp retint_check @@ -666,7 +717,7 @@ END(spurious_interrupt) /* error code is on the stack already */ /* handle NMI like exceptions that can happen everywhere */ - .macro paranoidentry sym, ist=0 + .macro paranoidentry sym, ist=0, irqtrace=1 SAVE_ALL cld movl $1,%ebx @@ -691,8 +742,73 @@ END(spurious_interrupt) addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) .endif cli + .if \irqtrace + TRACE_IRQS_OFF + .endif .endm - + + /* + * "Paranoid" exit path from exception stack. + * Paranoid because this is used by NMIs and cannot take + * any kernel state for granted. + * We don't do kernel preemption checks here, because only + * NMI should be common and it does not enable IRQs and + * cannot get reschedule ticks. + * + * "trace" is 0 for the NMI handler only, because irq-tracing + * is fundamentally NMI-unsafe. (we cannot change the soft and + * hard flags at once, atomically) + */ + .macro paranoidexit trace=1 + /* ebx: no swapgs flag */ +paranoid_exit\trace: + testl %ebx,%ebx /* swapgs needed? */ + jnz paranoid_restore\trace + testl $3,CS(%rsp) + jnz paranoid_userspace\trace +paranoid_swapgs\trace: + TRACE_IRQS_IRETQ 0 + swapgs +paranoid_restore\trace: + RESTORE_ALL 8 + iretq +paranoid_userspace\trace: + GET_THREAD_INFO(%rcx) + movl threadinfo_flags(%rcx),%ebx + andl $_TIF_WORK_MASK,%ebx + jz paranoid_swapgs\trace + movq %rsp,%rdi /* &pt_regs */ + call sync_regs + movq %rax,%rsp /* switch stack for scheduling */ + testl $_TIF_NEED_RESCHED,%ebx + jnz paranoid_schedule\trace + movl %ebx,%edx /* arg3: thread flags */ + .if \trace + TRACE_IRQS_ON + .endif + sti + xorl %esi,%esi /* arg2: oldset */ + movq %rsp,%rdi /* arg1: &pt_regs */ + call do_notify_resume + cli + .if \trace + TRACE_IRQS_OFF + .endif + jmp paranoid_userspace\trace +paranoid_schedule\trace: + .if \trace + TRACE_IRQS_ON + .endif + sti + call schedule + cli + .if \trace + TRACE_IRQS_OFF + .endif + jmp paranoid_userspace\trace + CFI_ENDPROC + .endm + /* * Exception entry point. This expects an error code/orig_rax on the stack * and the exception handler in %rax. @@ -748,6 +864,7 @@ error_exit: movl %ebx,%eax RESTORE_REST cli + TRACE_IRQS_OFF GET_THREAD_INFO(%rcx) testl %eax,%eax jne retint_kernel @@ -755,6 +872,10 @@ error_exit: movl $_TIF_WORK_MASK,%edi andl %edi,%edx jnz retint_careful + /* + * The iret might restore flags: + */ + TRACE_IRQS_IRETQ swapgs RESTORE_ARGS 0,8,0 jmp iret_label @@ -916,8 +1037,7 @@ KPROBE_ENTRY(debug) pushq $0 CFI_ADJUST_CFA_OFFSET 8 paranoidentry do_debug, DEBUG_STACK - jmp paranoid_exit - CFI_ENDPROC + paranoidexit END(debug) .previous .text @@ -926,49 +1046,13 @@ KPROBE_ENTRY(nmi) INTR_FRAME pushq $-1 CFI_ADJUST_CFA_OFFSET 8 - paranoidentry do_nmi - /* - * "Paranoid" exit path from exception stack. - * Paranoid because this is used by NMIs and cannot take - * any kernel state for granted. - * We don't do kernel preemption checks here, because only - * NMI should be common and it does not enable IRQs and - * cannot get reschedule ticks. - */ - /* ebx: no swapgs flag */ -paranoid_exit: - testl %ebx,%ebx /* swapgs needed? */ - jnz paranoid_restore - testl $3,CS(%rsp) - jnz paranoid_userspace -paranoid_swapgs: - swapgs -paranoid_restore: - RESTORE_ALL 8 - iretq -paranoid_userspace: - GET_THREAD_INFO(%rcx) - movl threadinfo_flags(%rcx),%ebx - andl $_TIF_WORK_MASK,%ebx - jz paranoid_swapgs - movq %rsp,%rdi /* &pt_regs */ - call sync_regs - movq %rax,%rsp /* switch stack for scheduling */ - testl $_TIF_NEED_RESCHED,%ebx - jnz paranoid_schedule - movl %ebx,%edx /* arg3: thread flags */ - sti - xorl %esi,%esi /* arg2: oldset */ - movq %rsp,%rdi /* arg1: &pt_regs */ - call do_notify_resume - cli - jmp paranoid_userspace -paranoid_schedule: - sti - call schedule - cli - jmp paranoid_userspace - CFI_ENDPROC + paranoidentry do_nmi, 0, 0 +#ifdef CONFIG_TRACE_IRQFLAGS + paranoidexit 0 +#else + jmp paranoid_exit1 + CFI_ENDPROC +#endif END(nmi) .previous .text @@ -977,7 +1061,7 @@ KPROBE_ENTRY(int3) pushq $0 CFI_ADJUST_CFA_OFFSET 8 paranoidentry do_int3, DEBUG_STACK - jmp paranoid_exit + jmp paranoid_exit1 CFI_ENDPROC END(int3) .previous .text @@ -1006,7 +1090,7 @@ END(reserved) ENTRY(double_fault) XCPT_FRAME paranoidentry do_double_fault - jmp paranoid_exit + jmp paranoid_exit1 CFI_ENDPROC END(double_fault) @@ -1022,7 +1106,7 @@ END(segment_not_present) ENTRY(stack_segment) XCPT_FRAME paranoidentry do_stack_segment - jmp paranoid_exit + jmp paranoid_exit1 CFI_ENDPROC END(stack_segment) @@ -1050,7 +1134,7 @@ ENTRY(machine_check) pushq $0 CFI_ADJUST_CFA_OFFSET 8 paranoidentry do_machine_check - jmp paranoid_exit + jmp paranoid_exit1 CFI_ENDPROC END(machine_check) #endif diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c index e6a71c9556d9..36647ce6aecb 100644 --- a/arch/x86_64/kernel/head64.c +++ b/arch/x86_64/kernel/head64.c @@ -85,6 +85,11 @@ void __init x86_64_start_kernel(char * real_mode_data) clear_bss(); /* + * This must be called really, really early: + */ + lockdep_init(); + + /* * switch to init_level4_pgt from boot_level4_pgt */ memcpy(init_level4_pgt, boot_level4_pgt, PTRS_PER_PGD*sizeof(pgd_t)); diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c index a1f1df5f7bfc..5221a53e90c1 100644 --- a/arch/x86_64/kernel/irq.c +++ b/arch/x86_64/kernel/irq.c @@ -177,8 +177,10 @@ asmlinkage void do_softirq(void) local_irq_save(flags); pending = local_softirq_pending(); /* Switch to interrupt stack */ - if (pending) + if (pending) { call_softirq(); + WARN_ON_ONCE(softirq_count()); + } local_irq_restore(flags); } EXPORT_SYMBOL(do_softirq); diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 476c1472fc07..5baa0c726e97 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c @@ -127,7 +127,7 @@ void __cpuinit nmi_watchdog_default(void) static __init void nmi_cpu_busy(void *data) { volatile int *endflag = data; - local_irq_enable(); + local_irq_enable_in_hardirq(); /* Intentionally don't use cpu_relax here. This is to make sure that the performance counter really ticks, even if there is a simulator or similar that catches the diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index ca56e19b8b6e..bb6745d13b8f 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -296,7 +296,7 @@ void __show_regs(struct pt_regs * regs) system_utsname.version); printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip); printk_address(regs->rip); - printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, + printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, regs->eflags); printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", regs->rax, regs->rbx, regs->rcx); diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 9705a6a384f1..b7c705969791 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -775,6 +775,8 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid) }; DECLARE_WORK(work, do_fork_idle, &c_idle); + lockdep_set_class(&c_idle.done.wait.lock, &waitqueue_lock_key); + /* allocate memory for gdts of secondary cpus. Hotplug is considered */ if (!cpu_gdt_descr[cpu].address && !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) { diff --git a/arch/x86_64/kernel/stacktrace.c b/arch/x86_64/kernel/stacktrace.c new file mode 100644 index 000000000000..32cf55eb9af8 --- /dev/null +++ b/arch/x86_64/kernel/stacktrace.c @@ -0,0 +1,221 @@ +/* + * arch/x86_64/kernel/stacktrace.c + * + * Stack trace management functions + * + * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + */ +#include <linux/sched.h> +#include <linux/stacktrace.h> + +#include <asm/smp.h> + +static inline int +in_range(unsigned long start, unsigned long addr, unsigned long end) +{ + return addr >= start && addr <= end; +} + +static unsigned long +get_stack_end(struct task_struct *task, unsigned long stack) +{ + unsigned long stack_start, stack_end, flags; + int i, cpu; + + /* + * The most common case is that we are in the task stack: + */ + stack_start = (unsigned long)task->thread_info; + stack_end = stack_start + THREAD_SIZE; + + if (in_range(stack_start, stack, stack_end)) + return stack_end; + + /* + * We are in an interrupt if irqstackptr is set: + */ + raw_local_irq_save(flags); + cpu = safe_smp_processor_id(); + stack_end = (unsigned long)cpu_pda(cpu)->irqstackptr; + + if (stack_end) { + stack_start = stack_end & ~(IRQSTACKSIZE-1); + if (in_range(stack_start, stack, stack_end)) + goto out_restore; + /* + * We get here if we are in an IRQ context but we + * are also in an exception stack. + */ + } + + /* + * Iterate over all exception stacks, and figure out whether + * 'stack' is in one of them: + */ + for (i = 0; i < N_EXCEPTION_STACKS; i++) { + /* + * set 'end' to the end of the exception stack. + */ + stack_end = per_cpu(init_tss, cpu).ist[i]; + stack_start = stack_end - EXCEPTION_STKSZ; + + /* + * Is 'stack' above this exception frame's end? + * If yes then skip to the next frame. + */ + if (stack >= stack_end) + continue; + /* + * Is 'stack' above this exception frame's start address? + * If yes then we found the right frame. + */ + if (stack >= stack_start) + goto out_restore; + + /* + * If this is a debug stack, and if it has a larger size than + * the usual exception stacks, then 'stack' might still + * be within the lower portion of the debug stack: + */ +#if DEBUG_STKSZ > EXCEPTION_STKSZ + if (i == DEBUG_STACK - 1 && stack >= stack_end - DEBUG_STKSZ) { + /* + * Black magic. A large debug stack is composed of + * multiple exception stack entries, which we + * iterate through now. Dont look: + */ + do { + stack_end -= EXCEPTION_STKSZ; + stack_start -= EXCEPTION_STKSZ; + } while (stack < stack_start); + + goto out_restore; + } +#endif + } + /* + * Ok, 'stack' is not pointing to any of the system stacks. + */ + stack_end = 0; + +out_restore: + raw_local_irq_restore(flags); + + return stack_end; +} + + +/* + * Save stack-backtrace addresses into a stack_trace buffer: + */ +static inline unsigned long +save_context_stack(struct stack_trace *trace, unsigned int skip, + unsigned long stack, unsigned long stack_end) +{ + unsigned long addr; + +#ifdef CONFIG_FRAME_POINTER + unsigned long prev_stack = 0; + + while (in_range(prev_stack, stack, stack_end)) { + pr_debug("stack: %p\n", (void *)stack); + addr = (unsigned long)(((unsigned long *)stack)[1]); + pr_debug("addr: %p\n", (void *)addr); + if (!skip) + trace->entries[trace->nr_entries++] = addr-1; + else + skip--; + if (trace->nr_entries >= trace->max_entries) + break; + if (!addr) + return 0; + /* + * Stack frames must go forwards (otherwise a loop could + * happen if the stackframe is corrupted), so we move + * prev_stack forwards: + */ + prev_stack = stack; + stack = (unsigned long)(((unsigned long *)stack)[0]); + } + pr_debug("invalid: %p\n", (void *)stack); +#else + while (stack < stack_end) { + addr = ((unsigned long *)stack)[0]; + stack += sizeof(long); + if (__kernel_text_address(addr)) { + if (!skip) + trace->entries[trace->nr_entries++] = addr-1; + else + skip--; + if (trace->nr_entries >= trace->max_entries) + break; + } + } +#endif + return stack; +} + +#define MAX_STACKS 10 + +/* + * Save stack-backtrace addresses into a stack_trace buffer. + * If all_contexts is set, all contexts (hardirq, softirq and process) + * are saved. If not set then only the current context is saved. + */ +void save_stack_trace(struct stack_trace *trace, + struct task_struct *task, int all_contexts, + unsigned int skip) +{ + unsigned long stack = (unsigned long)&stack; + int i, nr_stacks = 0, stacks_done[MAX_STACKS]; + + WARN_ON(trace->nr_entries || !trace->max_entries); + + if (!task) + task = current; + + pr_debug("task: %p, ti: %p\n", task, task->thread_info); + + if (!task || task == current) { + /* Grab rbp right from our regs: */ + asm ("mov %%rbp, %0" : "=r" (stack)); + pr_debug("rbp: %p\n", (void *)stack); + } else { + /* rbp is the last reg pushed by switch_to(): */ + stack = task->thread.rsp; + pr_debug("other task rsp: %p\n", (void *)stack); + stack = (unsigned long)(((unsigned long *)stack)[0]); + pr_debug("other task rbp: %p\n", (void *)stack); + } + + while (1) { + unsigned long stack_end = get_stack_end(task, stack); + + pr_debug("stack: %p\n", (void *)stack); + pr_debug("stack end: %p\n", (void *)stack_end); + + /* + * Invalid stack addres? + */ + if (!stack_end) + return; + /* + * Were we in this stack already? (recursion) + */ + for (i = 0; i < nr_stacks; i++) + if (stacks_done[i] == stack_end) + return; + stacks_done[nr_stacks] = stack_end; + + stack = save_context_stack(trace, skip, stack, stack_end); + if (!all_contexts || !stack || + trace->nr_entries >= trace->max_entries) + return; + trace->entries[trace->nr_entries++] = ULONG_MAX; + if (trace->nr_entries >= trace->max_entries) + return; + if (++nr_stacks >= MAX_STACKS) + return; + } +} + diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 5a5311d3de0f..79d05c482072 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -110,28 +110,31 @@ static int kstack_depth_to_print = 12; static int call_trace = 1; #ifdef CONFIG_KALLSYMS -#include <linux/kallsyms.h> -int printk_address(unsigned long address) -{ +# include <linux/kallsyms.h> +void printk_address(unsigned long address) +{ unsigned long offset = 0, symsize; const char *symname; char *modname; - char *delim = ":"; + char *delim = ":"; char namebuf[128]; - symname = kallsyms_lookup(address, &symsize, &offset, &modname, namebuf); - if (!symname) - return printk("[<%016lx>]", address); - if (!modname) + symname = kallsyms_lookup(address, &symsize, &offset, + &modname, namebuf); + if (!symname) { + printk(" [<%016lx>]\n", address); + return; + } + if (!modname) modname = delim = ""; - return printk("<%016lx>{%s%s%s%s%+ld}", - address, delim, modname, delim, symname, offset); -} + printk(" [<%016lx>] %s%s%s%s+0x%lx/0x%lx\n", + address, delim, modname, delim, symname, offset, symsize); +} #else -int printk_address(unsigned long address) -{ - return printk("[<%016lx>]", address); -} +void printk_address(unsigned long address) +{ + printk(" [<%016lx>]\n", address); +} #endif static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, @@ -149,10 +152,22 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, }; unsigned k; + /* + * Iterate over all exception stacks, and figure out whether + * 'stack' is in one of them: + */ for (k = 0; k < N_EXCEPTION_STACKS; k++) { unsigned long end; + /* + * set 'end' to the end of the exception stack. + */ switch (k + 1) { + /* + * TODO: this block is not needed i think, because + * setup64.c:cpu_init() sets up t->ist[DEBUG_STACK] + * properly too. + */ #if DEBUG_STKSZ > EXCEPTION_STKSZ case DEBUG_STACK: end = cpu_pda(cpu)->debugstack + DEBUG_STKSZ; @@ -162,19 +177,43 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, end = per_cpu(init_tss, cpu).ist[k]; break; } + /* + * Is 'stack' above this exception frame's end? + * If yes then skip to the next frame. + */ if (stack >= end) continue; + /* + * Is 'stack' above this exception frame's start address? + * If yes then we found the right frame. + */ if (stack >= end - EXCEPTION_STKSZ) { + /* + * Make sure we only iterate through an exception + * stack once. If it comes up for the second time + * then there's something wrong going on - just + * break out and return NULL: + */ if (*usedp & (1U << k)) break; *usedp |= 1U << k; *idp = ids[k]; return (unsigned long *)end; } + /* + * If this is a debug stack, and if it has a larger size than + * the usual exception stacks, then 'stack' might still + * be within the lower portion of the debug stack: + */ #if DEBUG_STKSZ > EXCEPTION_STKSZ if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) { unsigned j = N_EXCEPTION_STACKS - 1; + /* + * Black magic. A large debug stack is composed of + * multiple exception stack entries, which we + * iterate through now. Dont look: + */ do { ++j; end -= EXCEPTION_STKSZ; @@ -193,20 +232,14 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, static int show_trace_unwind(struct unwind_frame_info *info, void *context) { - int i = 11, n = 0; + int n = 0; while (unwind(info) == 0 && UNW_PC(info)) { - ++n; - if (i > 50) { - printk("\n "); - i = 7; - } else - i += printk(" "); - i += printk_address(UNW_PC(info)); + n++; + printk_address(UNW_PC(info)); if (arch_unw_user_mode(info)) break; } - printk("\n"); return n; } @@ -224,7 +257,7 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s int i = 11; unsigned used = 0; - printk("\nCall Trace:"); + printk("\nCall Trace:\n"); if (!tsk) tsk = current; @@ -250,16 +283,15 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s } } + /* + * Print function call entries within a stack. 'cond' is the + * "end of stackframe" condition, that the 'stack++' + * iteration will eventually trigger. + */ #define HANDLE_STACK(cond) \ do while (cond) { \ unsigned long addr = *stack++; \ if (kernel_text_address(addr)) { \ - if (i > 50) { \ - printk("\n "); \ - i = 0; \ - } \ - else \ - i += printk(" "); \ /* \ * If the address is either in the text segment of the \ * kernel, or in the region which contains vmalloc'ed \ @@ -268,20 +300,30 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s * down the cause of the crash will be able to figure \ * out the call path that was taken. \ */ \ - i += printk_address(addr); \ + printk_address(addr); \ } \ } while (0) - for(; ; ) { + /* + * Print function call entries in all stacks, starting at the + * current stack address. If the stacks consist of nested + * exceptions + */ + for ( ; ; ) { const char *id; unsigned long *estack_end; estack_end = in_exception_stack(cpu, (unsigned long)stack, &used, &id); if (estack_end) { - i += printk(" <%s>", id); + printk(" <%s>", id); HANDLE_STACK (stack < estack_end); - i += printk(" <EOE>"); + printk(" <EOE>"); + /* + * We link to the next stack via the + * second-to-last pointer (index -2 to end) in the + * exception stack: + */ stack = (unsigned long *) estack_end[-2]; continue; } @@ -291,19 +333,28 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s (IRQSTACKSIZE - 64) / sizeof(*irqstack); if (stack >= irqstack && stack < irqstack_end) { - i += printk(" <IRQ>"); + printk(" <IRQ>"); HANDLE_STACK (stack < irqstack_end); + /* + * We link to the next stack (which would be + * the process stack normally) the last + * pointer (index -1 to end) in the IRQ stack: + */ stack = (unsigned long *) (irqstack_end[-1]); irqstack_end = NULL; - i += printk(" <EOI>"); + printk(" <EOI>"); continue; } } break; } + /* + * This prints the process stack: + */ HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0); #undef HANDLE_STACK + printk("\n"); } @@ -337,8 +388,8 @@ static void _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned break; } if (i && ((i % 4) == 0)) - printk("\n "); - printk("%016lx ", *stack++); + printk("\n"); + printk(" %016lx", *stack++); touch_nmi_watchdog(); } show_trace(tsk, regs, rsp); diff --git a/arch/x86_64/lib/thunk.S b/arch/x86_64/lib/thunk.S index e49af0032e94..332ea5dff916 100644 --- a/arch/x86_64/lib/thunk.S +++ b/arch/x86_64/lib/thunk.S @@ -47,6 +47,11 @@ thunk_retrax __down_failed_interruptible,__down_interruptible thunk_retrax __down_failed_trylock,__down_trylock thunk __up_wakeup,__up + +#ifdef CONFIG_TRACE_IRQFLAGS + thunk trace_hardirqs_on_thunk,trace_hardirqs_on + thunk trace_hardirqs_off_thunk,trace_hardirqs_off +#endif /* SAVE_ARGS below is used only for the .cfi directives it contains. */ CFI_STARTPROC diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index 5afcf6eb00fa..ac8ea66ccb94 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c @@ -570,7 +570,6 @@ no_context: printk(KERN_ALERT "Unable to handle kernel paging request"); printk(" at %016lx RIP: \n" KERN_ALERT,address); printk_address(regs->rip); - printk("\n"); dump_pagetable(address); tsk->thread.cr2 = address; tsk->thread.trap_no = 14; diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 5813d63c20af..ab17c7224bb6 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -2516,7 +2516,7 @@ EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk, struct request *rq, int at_head) { - DECLARE_COMPLETION(wait); + DECLARE_COMPLETION_ONSTACK(wait); char sense[SCSI_SENSE_BUFFERSIZE]; int err = 0; diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 0242cbb86a87..5109fa37c662 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -249,18 +249,6 @@ static int irqdma_allocated; #include <linux/cdrom.h> /* for the compatibility eject ioctl */ #include <linux/completion.h> -/* - * Interrupt freeing also means /proc VFS work - dont do it - * from interrupt context. We push this work into keventd: - */ -static void fd_free_irq_fn(void *data) -{ - fd_free_irq(); -} - -static DECLARE_WORK(fd_free_irq_work, fd_free_irq_fn, NULL); - - static struct request *current_req; static struct request_queue *floppy_queue; static void do_fd_request(request_queue_t * q); @@ -826,15 +814,6 @@ static int set_dor(int fdc, char mask, char data) UDRS->select_date = jiffies; } } - /* - * We should propagate failures to grab the resources back - * nicely from here. Actually we ought to rewrite the fd - * driver some day too. - */ - if (newdor & FLOPPY_MOTOR_MASK) - floppy_grab_irq_and_dma(); - if (olddor & FLOPPY_MOTOR_MASK) - floppy_release_irq_and_dma(); return olddor; } @@ -892,8 +871,6 @@ static int _lock_fdc(int drive, int interruptible, int line) line); return -1; } - if (floppy_grab_irq_and_dma() == -1) - return -EBUSY; if (test_and_set_bit(0, &fdc_busy)) { DECLARE_WAITQUEUE(wait, current); @@ -915,6 +892,8 @@ static int _lock_fdc(int drive, int interruptible, int line) set_current_state(TASK_RUNNING); remove_wait_queue(&fdc_wait, &wait); + + flush_scheduled_work(); } command_status = FD_COMMAND_NONE; @@ -948,7 +927,6 @@ static inline void unlock_fdc(void) if (elv_next_request(floppy_queue)) do_fd_request(floppy_queue); spin_unlock_irqrestore(&floppy_lock, flags); - floppy_release_irq_and_dma(); wake_up(&fdc_wait); } @@ -3694,8 +3672,8 @@ static int floppy_release(struct inode *inode, struct file *filp) } if (!UDRS->fd_ref) opened_bdev[drive] = NULL; - floppy_release_irq_and_dma(); mutex_unlock(&open_lock); + return 0; } @@ -3726,9 +3704,6 @@ static int floppy_open(struct inode *inode, struct file *filp) if (UDRS->fd_ref == -1 || (UDRS->fd_ref && (filp->f_flags & O_EXCL))) goto out2; - if (floppy_grab_irq_and_dma()) - goto out2; - if (filp->f_flags & O_EXCL) UDRS->fd_ref = -1; else @@ -3805,7 +3780,6 @@ out: UDRS->fd_ref--; if (!UDRS->fd_ref) opened_bdev[drive] = NULL; - floppy_release_irq_and_dma(); out2: mutex_unlock(&open_lock); return res; @@ -3822,14 +3796,9 @@ static int check_floppy_change(struct gendisk *disk) return 1; if (time_after(jiffies, UDRS->last_checked + UDP->checkfreq)) { - if (floppy_grab_irq_and_dma()) { - return 1; - } - lock_fdc(drive, 0); poll_drive(0, 0); process_fd_request(); - floppy_release_irq_and_dma(); } if (UTESTF(FD_DISK_CHANGED) || @@ -4346,7 +4315,6 @@ static int __init floppy_init(void) fdc = 0; del_timer(&fd_timeout); current_drive = 0; - floppy_release_irq_and_dma(); initialising = 0; if (have_no_fdc) { DPRINT("no floppy controllers found\n"); @@ -4504,7 +4472,7 @@ static void floppy_release_irq_and_dma(void) if (irqdma_allocated) { fd_disable_dma(); fd_free_dma(); - schedule_work(&fd_free_irq_work); + fd_free_irq(); irqdma_allocated = 0; } set_dor(0, ~0, 8); @@ -4600,8 +4568,6 @@ void cleanup_module(void) /* eject disk, if any */ fd_eject(0); - flush_scheduled_work(); /* fd_free_irq() might be pending */ - wait_for_completion(&device_release); } diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 3721e12135d9..cc42e762396f 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -250,8 +250,6 @@ static int floppy_open(struct inode *inode, struct file *filp); static int floppy_release(struct inode *inode, struct file *filp); static int floppy_check_change(struct gendisk *disk); static int floppy_revalidate(struct gendisk *disk); -static int swim3_add_device(struct device_node *swims); -int swim3_init(void); #ifndef CONFIG_PMAC_MEDIABAY #define check_media_bay(which, what) 1 @@ -1011,114 +1009,63 @@ static struct block_device_operations floppy_fops = { .revalidate_disk= floppy_revalidate, }; -int swim3_init(void) -{ - struct device_node *swim; - int err = -ENOMEM; - int i; - - swim = find_devices("floppy"); - while (swim && (floppy_count < MAX_FLOPPIES)) - { - swim3_add_device(swim); - swim = swim->next; - } - - swim = find_devices("swim3"); - while (swim && (floppy_count < MAX_FLOPPIES)) - { - swim3_add_device(swim); - swim = swim->next; - } - - if (!floppy_count) - return -ENODEV; - - for (i = 0; i < floppy_count; i++) { - disks[i] = alloc_disk(1); - if (!disks[i]) - goto out; - } - - if (register_blkdev(FLOPPY_MAJOR, "fd")) { - err = -EBUSY; - goto out; - } - - swim3_queue = blk_init_queue(do_fd_request, &swim3_lock); - if (!swim3_queue) { - err = -ENOMEM; - goto out_queue; - } - - for (i = 0; i < floppy_count; i++) { - struct gendisk *disk = disks[i]; - disk->major = FLOPPY_MAJOR; - disk->first_minor = i; - disk->fops = &floppy_fops; - disk->private_data = &floppy_states[i]; - disk->queue = swim3_queue; - disk->flags |= GENHD_FL_REMOVABLE; - sprintf(disk->disk_name, "fd%d", i); - set_capacity(disk, 2880); - add_disk(disk); - } - return 0; - -out_queue: - unregister_blkdev(FLOPPY_MAJOR, "fd"); -out: - while (i--) - put_disk(disks[i]); - /* shouldn't we do something with results of swim_add_device()? */ - return err; -} - -static int swim3_add_device(struct device_node *swim) +static int swim3_add_device(struct macio_dev *mdev, int index) { + struct device_node *swim = mdev->ofdev.node; struct device_node *mediabay; - struct floppy_state *fs = &floppy_states[floppy_count]; - struct resource res_reg, res_dma; + struct floppy_state *fs = &floppy_states[index]; + int rc = -EBUSY; - if (of_address_to_resource(swim, 0, &res_reg) || - of_address_to_resource(swim, 1, &res_dma)) { - printk(KERN_ERR "swim3: Can't get addresses\n"); - return -EINVAL; + /* Check & Request resources */ + if (macio_resource_count(mdev) < 2) { + printk(KERN_WARNING "ifd%d: no address for %s\n", + index, swim->full_name); + return -ENXIO; } - if (request_mem_region(res_reg.start, res_reg.end - res_reg.start + 1, - " (reg)") == NULL) { - printk(KERN_ERR "swim3: Can't request register space\n"); - return -EINVAL; + if (macio_irq_count(mdev) < 2) { + printk(KERN_WARNING "fd%d: no intrs for device %s\n", + index, swim->full_name); } - if (request_mem_region(res_dma.start, res_dma.end - res_dma.start + 1, - " (dma)") == NULL) { - release_mem_region(res_reg.start, - res_reg.end - res_reg.start + 1); - printk(KERN_ERR "swim3: Can't request DMA space\n"); - return -EINVAL; + if (macio_request_resource(mdev, 0, "swim3 (mmio)")) { + printk(KERN_ERR "fd%d: can't request mmio resource for %s\n", + index, swim->full_name); + return -EBUSY; } - - if (swim->n_intrs < 2) { - printk(KERN_INFO "swim3: expecting 2 intrs (n_intrs:%d)\n", - swim->n_intrs); - release_mem_region(res_reg.start, - res_reg.end - res_reg.start + 1); - release_mem_region(res_dma.start, - res_dma.end - res_dma.start + 1); - return -EINVAL; + if (macio_request_resource(mdev, 1, "swim3 (dma)")) { + printk(KERN_ERR "fd%d: can't request dma resource for %s\n", + index, swim->full_name); + macio_release_resource(mdev, 0); + return -EBUSY; } + dev_set_drvdata(&mdev->ofdev.dev, fs); - mediabay = (strcasecmp(swim->parent->type, "media-bay") == 0) ? swim->parent : NULL; + mediabay = (strcasecmp(swim->parent->type, "media-bay") == 0) ? + swim->parent : NULL; if (mediabay == NULL) pmac_call_feature(PMAC_FTR_SWIM3_ENABLE, swim, 0, 1); memset(fs, 0, sizeof(*fs)); spin_lock_init(&fs->lock); fs->state = idle; - fs->swim3 = (struct swim3 __iomem *)ioremap(res_reg.start, 0x200); - fs->dma = (struct dbdma_regs __iomem *)ioremap(res_dma.start, 0x200); - fs->swim3_intr = swim->intrs[0].line; - fs->dma_intr = swim->intrs[1].line; + fs->swim3 = (struct swim3 __iomem *) + ioremap(macio_resource_start(mdev, 0), 0x200); + if (fs->swim3 == NULL) { + printk("fd%d: couldn't map registers for %s\n", + index, swim->full_name); + rc = -ENOMEM; + goto out_release; + } + fs->dma = (struct dbdma_regs __iomem *) + ioremap(macio_resource_start(mdev, 1), 0x200); + if (fs->dma == NULL) { + printk("fd%d: couldn't map DMA for %s\n", + index, swim->full_name); + iounmap(fs->swim3); + rc = -ENOMEM; + goto out_release; + } + fs->swim3_intr = macio_irq(mdev, 0); + fs->dma_intr = macio_irq(mdev, 1);; fs->cur_cyl = -1; fs->cur_sector = -1; fs->secpercyl = 36; @@ -1132,15 +1079,16 @@ static int swim3_add_device(struct device_node *swim) st_le16(&fs->dma_cmd[1].command, DBDMA_STOP); if (request_irq(fs->swim3_intr, swim3_interrupt, 0, "SWIM3", fs)) { - printk(KERN_ERR "Couldn't get irq %d for SWIM3\n", fs->swim3_intr); + printk(KERN_ERR "fd%d: couldn't request irq %d for %s\n", + index, fs->swim3_intr, swim->full_name); pmac_call_feature(PMAC_FTR_SWIM3_ENABLE, swim, 0, 0); + goto out_unmap; return -EBUSY; } /* if (request_irq(fs->dma_intr, fd_dma_interrupt, 0, "SWIM3-dma", fs)) { printk(KERN_ERR "Couldn't get irq %d for SWIM3 DMA", fs->dma_intr); - pmac_call_feature(PMAC_FTR_SWIM3_ENABLE, swim, 0, 0); return -EBUSY; } */ @@ -1150,8 +1098,90 @@ static int swim3_add_device(struct device_node *swim) printk(KERN_INFO "fd%d: SWIM3 floppy controller %s\n", floppy_count, mediabay ? "in media bay" : ""); - floppy_count++; - + return 0; + + out_unmap: + iounmap(fs->dma); + iounmap(fs->swim3); + + out_release: + macio_release_resource(mdev, 0); + macio_release_resource(mdev, 1); + + return rc; +} + +static int __devinit swim3_attach(struct macio_dev *mdev, const struct of_device_id *match) +{ + int i, rc; + struct gendisk *disk; + + /* Add the drive */ + rc = swim3_add_device(mdev, floppy_count); + if (rc) + return rc; + + /* Now create the queue if not there yet */ + if (swim3_queue == NULL) { + /* If we failed, there isn't much we can do as the driver is still + * too dumb to remove the device, just bail out + */ + if (register_blkdev(FLOPPY_MAJOR, "fd")) + return 0; + swim3_queue = blk_init_queue(do_fd_request, &swim3_lock); + if (swim3_queue == NULL) { + unregister_blkdev(FLOPPY_MAJOR, "fd"); + return 0; + } + } + + /* Now register that disk. Same comment about failure handling */ + i = floppy_count++; + disk = disks[i] = alloc_disk(1); + if (disk == NULL) + return 0; + + disk->major = FLOPPY_MAJOR; + disk->first_minor = i; + disk->fops = &floppy_fops; + disk->private_data = &floppy_states[i]; + disk->queue = swim3_queue; + disk->flags |= GENHD_FL_REMOVABLE; + sprintf(disk->disk_name, "fd%d", i); + set_capacity(disk, 2880); + add_disk(disk); + + return 0; +} + +static struct of_device_id swim3_match[] = +{ + { + .name = "swim3", + }, + { + .compatible = "ohare-swim3" + }, + { + .compatible = "swim3" + }, +}; + +static struct macio_driver swim3_driver = +{ + .name = "swim3", + .match_table = swim3_match, + .probe = swim3_attach, +#if 0 + .suspend = swim3_suspend, + .resume = swim3_resume, +#endif +}; + + +int swim3_init(void) +{ + macio_register_driver(&swim3_driver); return 0; } diff --git a/drivers/char/agp/frontend.c b/drivers/char/agp/frontend.c index ffcf15c30e90..d9c5a9142ad1 100644 --- a/drivers/char/agp/frontend.c +++ b/drivers/char/agp/frontend.c @@ -1059,7 +1059,7 @@ ioctl_out: return ret_val; } -static struct file_operations agp_fops = +static const struct file_operations agp_fops = { .owner = THIS_MODULE, .llseek = no_llseek, diff --git a/drivers/char/applicom.c b/drivers/char/applicom.c index bcc4668835b5..10a389dafd60 100644 --- a/drivers/char/applicom.c +++ b/drivers/char/applicom.c @@ -112,7 +112,7 @@ static int ac_ioctl(struct inode *, struct file *, unsigned int, unsigned long); static irqreturn_t ac_interrupt(int, void *, struct pt_regs *); -static struct file_operations ac_fops = { +static const struct file_operations ac_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .read = ac_read, diff --git a/drivers/char/cs5535_gpio.c b/drivers/char/cs5535_gpio.c index 46d66037b917..8ce3f34cfc22 100644 --- a/drivers/char/cs5535_gpio.c +++ b/drivers/char/cs5535_gpio.c @@ -158,7 +158,7 @@ static int cs5535_gpio_open(struct inode *inode, struct file *file) return nonseekable_open(inode, file); } -static struct file_operations cs5535_gpio_fops = { +static const struct file_operations cs5535_gpio_fops = { .owner = THIS_MODULE, .write = cs5535_gpio_write, .read = cs5535_gpio_read, diff --git a/drivers/char/ds1286.c b/drivers/char/ds1286.c index d755cac14bc1..21c8229f5443 100644 --- a/drivers/char/ds1286.c +++ b/drivers/char/ds1286.c @@ -281,7 +281,7 @@ static unsigned int ds1286_poll(struct file *file, poll_table *wait) * The various file operations we support. */ -static struct file_operations ds1286_fops = { +static const struct file_operations ds1286_fops = { .llseek = no_llseek, .read = ds1286_read, .poll = ds1286_poll, diff --git a/drivers/char/ds1302.c b/drivers/char/ds1302.c index 625e8b517005..bcdb107aa967 100644 --- a/drivers/char/ds1302.c +++ b/drivers/char/ds1302.c @@ -282,7 +282,7 @@ get_rtc_status(char *buf) /* The various file operations we support. */ -static struct file_operations rtc_fops = { +static const struct file_operations rtc_fops = { .owner = THIS_MODULE, .ioctl = rtc_ioctl, }; diff --git a/drivers/char/ds1620.c b/drivers/char/ds1620.c index 953e670dcd09..48cb8f0e8ebf 100644 --- a/drivers/char/ds1620.c +++ b/drivers/char/ds1620.c @@ -336,7 +336,7 @@ proc_therm_ds1620_read(char *buf, char **start, off_t offset, static struct proc_dir_entry *proc_therm_ds1620; #endif -static struct file_operations ds1620_fops = { +static const struct file_operations ds1620_fops = { .owner = THIS_MODULE, .open = nonseekable_open, .read = ds1620_read, diff --git a/drivers/char/dsp56k.c b/drivers/char/dsp56k.c index 09b413618b57..9b1bf60ffbe7 100644 --- a/drivers/char/dsp56k.c +++ b/drivers/char/dsp56k.c @@ -483,7 +483,7 @@ static int dsp56k_release(struct inode *inode, struct file *file) return 0; } -static struct file_operations dsp56k_fops = { +static const struct file_operations dsp56k_fops = { .owner = THIS_MODULE, .read = dsp56k_read, .write = dsp56k_write, diff --git a/drivers/char/dtlk.c b/drivers/char/dtlk.c index da2c89f1b8bc..5e82c3bad2e3 100644 --- a/drivers/char/dtlk.c +++ b/drivers/char/dtlk.c @@ -94,7 +94,7 @@ static int dtlk_release(struct inode *, struct file *); static int dtlk_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg); -static struct file_operations dtlk_fops = +static const struct file_operations dtlk_fops = { .owner = THIS_MODULE, .read = dtlk_read, diff --git a/drivers/char/efirtc.c b/drivers/char/efirtc.c index 0090e7a4fcd3..004141d535a2 100644 --- a/drivers/char/efirtc.c +++ b/drivers/char/efirtc.c @@ -285,7 +285,7 @@ efi_rtc_close(struct inode *inode, struct file *file) * The various file operations we support. */ -static struct file_operations efi_rtc_fops = { +static const struct file_operations efi_rtc_fops = { .owner = THIS_MODULE, .ioctl = efi_rtc_ioctl, .open = efi_rtc_open, diff --git a/drivers/char/ftape/zftape/zftape-init.c b/drivers/char/ftape/zftape/zftape-init.c index 55272566b740..164a1aa77a2f 100644 --- a/drivers/char/ftape/zftape/zftape-init.c +++ b/drivers/char/ftape/zftape/zftape-init.c @@ -86,7 +86,7 @@ static ssize_t zft_read (struct file *fp, char __user *buff, static ssize_t zft_write(struct file *fp, const char __user *buff, size_t req_len, loff_t *ppos); -static struct file_operations zft_cdev = +static const struct file_operations zft_cdev = { .owner = THIS_MODULE, .read = zft_read, diff --git a/drivers/char/genrtc.c b/drivers/char/genrtc.c index bebd7e34f792..817dc409ac20 100644 --- a/drivers/char/genrtc.c +++ b/drivers/char/genrtc.c @@ -482,7 +482,7 @@ static inline int gen_rtc_proc_init(void) { return 0; } * The various file operations we support. */ -static struct file_operations gen_rtc_fops = { +static const struct file_operations gen_rtc_fops = { .owner = THIS_MODULE, #ifdef CONFIG_GEN_RTC_X .read = gen_rtc_read, diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c index e5643f3aa73f..8afba339f05a 100644 --- a/drivers/char/hpet.c +++ b/drivers/char/hpet.c @@ -553,7 +553,7 @@ hpet_ioctl_common(struct hpet_dev *devp, int cmd, unsigned long arg, int kernel) return err; } -static struct file_operations hpet_fops = { +static const struct file_operations hpet_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .read = hpet_read, diff --git a/drivers/char/hvsi.c b/drivers/char/hvsi.c index 8dc205b275e3..56612a2dca6b 100644 --- a/drivers/char/hvsi.c +++ b/drivers/char/hvsi.c @@ -1299,13 +1299,12 @@ static int __init hvsi_console_init(void) hp->inbuf_end = hp->inbuf; hp->state = HVSI_CLOSED; hp->vtermno = *vtermno; - hp->virq = virt_irq_create_mapping(irq[0]); + hp->virq = irq_create_mapping(NULL, irq[0], 0); if (hp->virq == NO_IRQ) { printk(KERN_ERR "%s: couldn't create irq mapping for 0x%x\n", - __FUNCTION__, hp->virq); + __FUNCTION__, irq[0]); continue; - } else - hp->virq = irq_offset_up(hp->virq); + } hvsi_count++; } diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 88b026639f10..154a81d328c1 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -149,7 +149,7 @@ out: } -static struct file_operations rng_chrdev_ops = { +static const struct file_operations rng_chrdev_ops = { .owner = THIS_MODULE, .open = rng_dev_open, .read = rng_dev_read, diff --git a/drivers/char/i8k.c b/drivers/char/i8k.c index f3c3aaf4560e..353d9f3cf8d7 100644 --- a/drivers/char/i8k.c +++ b/drivers/char/i8k.c @@ -80,7 +80,7 @@ static int i8k_open_fs(struct inode *inode, struct file *file); static int i8k_ioctl(struct inode *, struct file *, unsigned int, unsigned long); -static struct file_operations i8k_fops = { +static const struct file_operations i8k_fops = { .open = i8k_open_fs, .read = seq_read, .llseek = seq_lseek, diff --git a/drivers/char/ip2/ip2main.c b/drivers/char/ip2/ip2main.c index a4200a2b0811..518ece7ac656 100644 --- a/drivers/char/ip2/ip2main.c +++ b/drivers/char/ip2/ip2main.c @@ -233,7 +233,7 @@ static void *DevTableMem[IP2_MAX_BOARDS]; /* This is the driver descriptor for the ip2ipl device, which is used to * download the loadware to the boards. */ -static struct file_operations ip2_ipl = { +static const struct file_operations ip2_ipl = { .owner = THIS_MODULE, .read = ip2_ipl_read, .write = ip2_ipl_write, diff --git a/drivers/char/ip27-rtc.c b/drivers/char/ip27-rtc.c index 3acdac3c967e..a48da02aad2f 100644 --- a/drivers/char/ip27-rtc.c +++ b/drivers/char/ip27-rtc.c @@ -196,7 +196,7 @@ static int rtc_release(struct inode *inode, struct file *file) * The various file operations we support. */ -static struct file_operations rtc_fops = { +static const struct file_operations rtc_fops = { .owner = THIS_MODULE, .ioctl = rtc_ioctl, .open = rtc_open, diff --git a/drivers/char/ipmi/ipmi_devintf.c b/drivers/char/ipmi/ipmi_devintf.c index 2fc894fef1cb..68d7c61a864e 100644 --- a/drivers/char/ipmi/ipmi_devintf.c +++ b/drivers/char/ipmi/ipmi_devintf.c @@ -765,7 +765,7 @@ static long compat_ipmi_ioctl(struct file *filep, unsigned int cmd, } #endif -static struct file_operations ipmi_fops = { +static const struct file_operations ipmi_fops = { .owner = THIS_MODULE, .ioctl = ipmi_ioctl, #ifdef CONFIG_COMPAT diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c index 74a889c58333..accaaf1a6b69 100644 --- a/drivers/char/ipmi/ipmi_watchdog.c +++ b/drivers/char/ipmi/ipmi_watchdog.c @@ -807,7 +807,7 @@ static int ipmi_close(struct inode *ino, struct file *filep) return 0; } -static struct file_operations ipmi_wdog_fops = { +static const struct file_operations ipmi_wdog_fops = { .owner = THIS_MODULE, .read = ipmi_read, .poll = ipmi_poll, diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c index fbce2f0669d6..84dfc4278139 100644 --- a/drivers/char/istallion.c +++ b/drivers/char/istallion.c @@ -748,7 +748,7 @@ static int stli_initpcibrd(int brdtype, struct pci_dev *devp); * will give access to the shared memory on the Stallion intelligent * board. This is also a very useful debugging tool. */ -static struct file_operations stli_fsiomem = { +static const struct file_operations stli_fsiomem = { .owner = THIS_MODULE, .read = stli_memread, .write = stli_memwrite, diff --git a/drivers/char/ite_gpio.c b/drivers/char/ite_gpio.c index 747ba45e50e5..cde562d70c4f 100644 --- a/drivers/char/ite_gpio.c +++ b/drivers/char/ite_gpio.c @@ -357,7 +357,7 @@ DEB(printk("interrupt 0x%x %d\n",ITE_GPAISR, i)); } } -static struct file_operations ite_gpio_fops = { +static const struct file_operations ite_gpio_fops = { .owner = THIS_MODULE, .ioctl = ite_gpio_ioctl, .open = ite_gpio_open, diff --git a/drivers/char/lcd.c b/drivers/char/lcd.c index 7d49b241de56..da601fd6c07a 100644 --- a/drivers/char/lcd.c +++ b/drivers/char/lcd.c @@ -598,7 +598,7 @@ static ssize_t lcd_read(struct file *file, char *buf, * The various file operations we support. */ -static struct file_operations lcd_fops = { +static const struct file_operations lcd_fops = { .read = lcd_read, .ioctl = lcd_ioctl, .open = lcd_open, diff --git a/drivers/char/lp.c b/drivers/char/lp.c index 582cdbdb0c42..f875fda3b089 100644 --- a/drivers/char/lp.c +++ b/drivers/char/lp.c @@ -666,7 +666,7 @@ static int lp_ioctl(struct inode *inode, struct file *file, return retval; } -static struct file_operations lp_fops = { +static const struct file_operations lp_fops = { .owner = THIS_MODULE, .write = lp_write, .ioctl = lp_ioctl, diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 70f3954d6dfd..e97c32ceb796 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -776,7 +776,7 @@ static int open_port(struct inode * inode, struct file * filp) #define open_kmem open_mem #define open_oldmem open_mem -static struct file_operations mem_fops = { +static const struct file_operations mem_fops = { .llseek = memory_lseek, .read = read_mem, .write = write_mem, @@ -784,7 +784,7 @@ static struct file_operations mem_fops = { .open = open_mem, }; -static struct file_operations kmem_fops = { +static const struct file_operations kmem_fops = { .llseek = memory_lseek, .read = read_kmem, .write = write_kmem, @@ -792,7 +792,7 @@ static struct file_operations kmem_fops = { .open = open_kmem, }; -static struct file_operations null_fops = { +static const struct file_operations null_fops = { .llseek = null_lseek, .read = read_null, .write = write_null, @@ -800,7 +800,7 @@ static struct file_operations null_fops = { }; #if defined(CONFIG_ISA) || !defined(__mc68000__) -static struct file_operations port_fops = { +static const struct file_operations port_fops = { .llseek = memory_lseek, .read = read_port, .write = write_port, @@ -808,7 +808,7 @@ static struct file_operations port_fops = { }; #endif -static struct file_operations zero_fops = { +static const struct file_operations zero_fops = { .llseek = zero_lseek, .read = read_zero, .write = write_zero, @@ -819,14 +819,14 @@ static struct backing_dev_info zero_bdi = { .capabilities = BDI_CAP_MAP_COPY, }; -static struct file_operations full_fops = { +static const struct file_operations full_fops = { .llseek = full_lseek, .read = read_full, .write = write_full, }; #ifdef CONFIG_CRASH_DUMP -static struct file_operations oldmem_fops = { +static const struct file_operations oldmem_fops = { .read = read_oldmem, .open = open_oldmem, }; @@ -853,7 +853,7 @@ static ssize_t kmsg_write(struct file * file, const char __user * buf, return ret; } -static struct file_operations kmsg_fops = { +static const struct file_operations kmsg_fops = { .write = kmsg_write, }; @@ -903,7 +903,7 @@ static int memory_open(struct inode * inode, struct file * filp) return 0; } -static struct file_operations memory_fops = { +static const struct file_operations memory_fops = { .open = memory_open, /* just a selector for the real open */ }; diff --git a/drivers/char/misc.c b/drivers/char/misc.c index d5fa19da330b..62ebe09656e3 100644 --- a/drivers/char/misc.c +++ b/drivers/char/misc.c @@ -113,7 +113,7 @@ static int misc_seq_open(struct inode *inode, struct file *file) return seq_open(file, &misc_seq_ops); } -static struct file_operations misc_proc_fops = { +static const struct file_operations misc_proc_fops = { .owner = THIS_MODULE, .open = misc_seq_open, .read = seq_read, @@ -176,7 +176,7 @@ fail: */ static struct class *misc_class; -static struct file_operations misc_fops = { +static const struct file_operations misc_fops = { .owner = THIS_MODULE, .open = misc_open, }; diff --git a/drivers/char/mmtimer.c b/drivers/char/mmtimer.c index 70b774ff5aa4..1f0f2b6dae26 100644 --- a/drivers/char/mmtimer.c +++ b/drivers/char/mmtimer.c @@ -63,7 +63,7 @@ static int mmtimer_mmap(struct file *file, struct vm_area_struct *vma); */ static unsigned long mmtimer_femtoperiod = 0; -static struct file_operations mmtimer_fops = { +static const struct file_operations mmtimer_fops = { .owner = THIS_MODULE, .mmap = mmtimer_mmap, .ioctl = mmtimer_ioctl, diff --git a/drivers/char/mwave/mwavedd.c b/drivers/char/mwave/mwavedd.c index d3ba2f860ef0..39a2e661ff55 100644 --- a/drivers/char/mwave/mwavedd.c +++ b/drivers/char/mwave/mwavedd.c @@ -454,7 +454,7 @@ static int register_serial_portandirq(unsigned int port, int irq) } -static struct file_operations mwave_fops = { +static const struct file_operations mwave_fops = { .owner = THIS_MODULE, .read = mwave_read, .write = mwave_write, diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c index 8c5f102622b6..a39f19c35a6a 100644 --- a/drivers/char/nvram.c +++ b/drivers/char/nvram.c @@ -437,7 +437,7 @@ nvram_read_proc(char *buffer, char **start, off_t offset, #endif /* CONFIG_PROC_FS */ -static struct file_operations nvram_fops = { +static const struct file_operations nvram_fops = { .owner = THIS_MODULE, .llseek = nvram_llseek, .read = nvram_read, diff --git a/drivers/char/nwbutton.c b/drivers/char/nwbutton.c index f240a104d250..7c57ebfa8640 100644 --- a/drivers/char/nwbutton.c +++ b/drivers/char/nwbutton.c @@ -183,7 +183,7 @@ static int button_read (struct file *filp, char __user *buffer, * attempts to perform these operations on the device. */ -static struct file_operations button_fops = { +static const struct file_operations button_fops = { .owner = THIS_MODULE, .read = button_read, }; diff --git a/drivers/char/nwflash.c b/drivers/char/nwflash.c index 8865387d3448..206cf6f50695 100644 --- a/drivers/char/nwflash.c +++ b/drivers/char/nwflash.c @@ -642,7 +642,7 @@ static void kick_open(void) udelay(25); } -static struct file_operations flash_fops = +static const struct file_operations flash_fops = { .owner = THIS_MODULE, .llseek = flash_llseek, diff --git a/drivers/char/pc8736x_gpio.c b/drivers/char/pc8736x_gpio.c index c860de6a6fde..4005ee0aa11e 100644 --- a/drivers/char/pc8736x_gpio.c +++ b/drivers/char/pc8736x_gpio.c @@ -236,7 +236,7 @@ static int pc8736x_gpio_open(struct inode *inode, struct file *file) return nonseekable_open(inode, file); } -static struct file_operations pc8736x_gpio_fops = { +static const struct file_operations pc8736x_gpio_fops = { .owner = THIS_MODULE, .open = pc8736x_gpio_open, .write = nsc_gpio_write, diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c index 31c8a21f9d87..50d20aafeb18 100644 --- a/drivers/char/pcmcia/cm4000_cs.c +++ b/drivers/char/pcmcia/cm4000_cs.c @@ -1938,7 +1938,7 @@ static void cm4000_detach(struct pcmcia_device *link) return; } -static struct file_operations cm4000_fops = { +static const struct file_operations cm4000_fops = { .owner = THIS_MODULE, .read = cmm_read, .write = cmm_write, diff --git a/drivers/char/pcmcia/cm4040_cs.c b/drivers/char/pcmcia/cm4040_cs.c index 47a8465bf95b..55cf4be42976 100644 --- a/drivers/char/pcmcia/cm4040_cs.c +++ b/drivers/char/pcmcia/cm4040_cs.c @@ -688,7 +688,7 @@ static void reader_detach(struct pcmcia_device *link) return; } -static struct file_operations reader_fops = { +static const struct file_operations reader_fops = { .owner = THIS_MODULE, .read = cm4040_read, .write = cm4040_write, diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c index 24231d9743dc..520d2cf82bc0 100644 --- a/drivers/char/ppdev.c +++ b/drivers/char/ppdev.c @@ -739,7 +739,7 @@ static unsigned int pp_poll (struct file * file, poll_table * wait) static struct class *ppdev_class; -static struct file_operations pp_fops = { +static const struct file_operations pp_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .read = pp_read, diff --git a/drivers/char/random.c b/drivers/char/random.c index 164bddae047f..4c3a5ca9d8f7 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -416,7 +416,7 @@ static struct entropy_store input_pool = { .poolinfo = &poolinfo_table[0], .name = "input", .limit = 1, - .lock = SPIN_LOCK_UNLOCKED, + .lock = __SPIN_LOCK_UNLOCKED(&input_pool.lock), .pool = input_pool_data }; @@ -425,7 +425,7 @@ static struct entropy_store blocking_pool = { .name = "blocking", .limit = 1, .pull = &input_pool, - .lock = SPIN_LOCK_UNLOCKED, + .lock = __SPIN_LOCK_UNLOCKED(&blocking_pool.lock), .pool = blocking_pool_data }; @@ -433,7 +433,7 @@ static struct entropy_store nonblocking_pool = { .poolinfo = &poolinfo_table[1], .name = "nonblocking", .pull = &input_pool, - .lock = SPIN_LOCK_UNLOCKED, + .lock = __SPIN_LOCK_UNLOCKED(&nonblocking_pool.lock), .pool = nonblocking_pool_data }; diff --git a/drivers/char/raw.c b/drivers/char/raw.c index 9bf97c5e38c0..579868af4a54 100644 --- a/drivers/char/raw.c +++ b/drivers/char/raw.c @@ -30,7 +30,7 @@ struct raw_device_data { static struct class *raw_class; static struct raw_device_data raw_devices[MAX_RAW_MINORS]; static DEFINE_MUTEX(raw_mutex); -static struct file_operations raw_ctl_fops; /* forward declaration */ +static const struct file_operations raw_ctl_fops; /* forward declaration */ /* * Open/close code for raw IO. @@ -261,7 +261,7 @@ static ssize_t raw_file_aio_write(struct kiocb *iocb, const char __user *buf, } -static struct file_operations raw_fops = { +static const struct file_operations raw_fops = { .read = generic_file_read, .aio_read = generic_file_aio_read, .write = raw_file_write, @@ -274,7 +274,7 @@ static struct file_operations raw_fops = { .owner = THIS_MODULE, }; -static struct file_operations raw_ctl_fops = { +static const struct file_operations raw_ctl_fops = { .ioctl = raw_ctl_ioctl, .open = raw_open, .owner = THIS_MODULE, diff --git a/drivers/char/rio/rio_linux.c b/drivers/char/rio/rio_linux.c index 3afc6a47ebbc..3fa80aaf4527 100644 --- a/drivers/char/rio/rio_linux.c +++ b/drivers/char/rio/rio_linux.c @@ -243,7 +243,7 @@ static struct real_driver rio_real_driver = { * */ -static struct file_operations rio_fw_fops = { +static const struct file_operations rio_fw_fops = { .owner = THIS_MODULE, .ioctl = rio_fw_ioctl, }; diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c index aefac4ac0bf5..cc7bd1a3095b 100644 --- a/drivers/char/rtc.c +++ b/drivers/char/rtc.c @@ -877,7 +877,7 @@ int rtc_control(rtc_task_t *task, unsigned int cmd, unsigned long arg) * The various file operations we support. */ -static struct file_operations rtc_fops = { +static const struct file_operations rtc_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .read = rtc_read, @@ -896,7 +896,7 @@ static struct miscdevice rtc_dev = { .fops = &rtc_fops, }; -static struct file_operations rtc_proc_fops = { +static const struct file_operations rtc_proc_fops = { .owner = THIS_MODULE, .open = rtc_proc_open, .read = seq_read, diff --git a/drivers/char/scx200_gpio.c b/drivers/char/scx200_gpio.c index 45083e5dd23b..425c58719db6 100644 --- a/drivers/char/scx200_gpio.c +++ b/drivers/char/scx200_gpio.c @@ -63,7 +63,7 @@ static int scx200_gpio_release(struct inode *inode, struct file *file) } -static struct file_operations scx200_gpio_fops = { +static const struct file_operations scx200_gpio_fops = { .owner = THIS_MODULE, .write = nsc_gpio_write, .read = nsc_gpio_read, diff --git a/drivers/char/snsc.c b/drivers/char/snsc.c index 203240b6c08f..afc6eda602f7 100644 --- a/drivers/char/snsc.c +++ b/drivers/char/snsc.c @@ -347,7 +347,7 @@ scdrv_poll(struct file *file, struct poll_table_struct *wait) return mask; } -static struct file_operations scdrv_fops = { +static const struct file_operations scdrv_fops = { .owner = THIS_MODULE, .read = scdrv_read, .write = scdrv_write, diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c index 45508a039508..d4e434d694b7 100644 --- a/drivers/char/sonypi.c +++ b/drivers/char/sonypi.c @@ -1106,7 +1106,7 @@ static int sonypi_misc_ioctl(struct inode *ip, struct file *fp, return ret; } -static struct file_operations sonypi_misc_fops = { +static const struct file_operations sonypi_misc_fops = { .owner = THIS_MODULE, .read = sonypi_misc_read, .poll = sonypi_misc_poll, diff --git a/drivers/char/stallion.c b/drivers/char/stallion.c index ed7b8eaf0367..3beb2203d24b 100644 --- a/drivers/char/stallion.c +++ b/drivers/char/stallion.c @@ -707,7 +707,7 @@ static unsigned int sc26198_baudtable[] = { * Define the driver info for a user level control device. Used mainly * to get at port stats - only not using the port device itself. */ -static struct file_operations stl_fsiomem = { +static const struct file_operations stl_fsiomem = { .owner = THIS_MODULE, .ioctl = stl_memioctl, }; diff --git a/drivers/char/sx.c b/drivers/char/sx.c index 45c193aa11db..e1cd2bc4b1e4 100644 --- a/drivers/char/sx.c +++ b/drivers/char/sx.c @@ -410,7 +410,7 @@ static struct real_driver sx_real_driver = { * */ -static struct file_operations sx_fw_fops = { +static const struct file_operations sx_fw_fops = { .owner = THIS_MODULE, .ioctl = sx_fw_ioctl, }; diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index a064ee9181c0..ee3ca8f1768e 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c @@ -147,12 +147,13 @@ static struct sysrq_key_op sysrq_mountro_op = { .enable_mask = SYSRQ_ENABLE_REMOUNT, }; -#ifdef CONFIG_DEBUG_MUTEXES +#ifdef CONFIG_LOCKDEP static void sysrq_handle_showlocks(int key, struct pt_regs *pt_regs, struct tty_struct *tty) { - mutex_debug_show_all_locks(); + debug_show_all_locks(); } + static struct sysrq_key_op sysrq_showlocks_op = { .handler = sysrq_handle_showlocks, .help_msg = "show-all-locks(D)", diff --git a/drivers/char/tb0219.c b/drivers/char/tb0219.c index a80c83210872..bb1bad4c18f9 100644 --- a/drivers/char/tb0219.c +++ b/drivers/char/tb0219.c @@ -255,7 +255,7 @@ static int tanbac_tb0219_release(struct inode *inode, struct file *file) return 0; } -static struct file_operations tb0219_fops = { +static const struct file_operations tb0219_fops = { .owner = THIS_MODULE, .read = tanbac_tb0219_read, .write = tanbac_tb0219_write, diff --git a/drivers/char/tipar.c b/drivers/char/tipar.c index e0633a119d29..d30dc09dbbc9 100644 --- a/drivers/char/tipar.c +++ b/drivers/char/tipar.c @@ -381,7 +381,7 @@ tipar_ioctl(struct inode *inode, struct file *file, /* ----- kernel module registering ------------------------------------ */ -static struct file_operations tipar_fops = { +static const struct file_operations tipar_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .read = tipar_read, diff --git a/drivers/char/tlclk.c b/drivers/char/tlclk.c index 952b829e2cb4..d2c5ba4e83b8 100644 --- a/drivers/char/tlclk.c +++ b/drivers/char/tlclk.c @@ -247,7 +247,7 @@ static ssize_t tlclk_write(struct file *filp, const char __user *buf, size_t cou return 0; } -static struct file_operations tlclk_fops = { +static const struct file_operations tlclk_fops = { .read = tlclk_read, .write = tlclk_write, .open = tlclk_open, diff --git a/drivers/char/toshiba.c b/drivers/char/toshiba.c index e2fb234dee40..dd36fd04a842 100644 --- a/drivers/char/toshiba.c +++ b/drivers/char/toshiba.c @@ -92,7 +92,7 @@ static int tosh_ioctl(struct inode *, struct file *, unsigned int, unsigned long); -static struct file_operations tosh_fops = { +static const struct file_operations tosh_fops = { .owner = THIS_MODULE, .ioctl = tosh_ioctl, }; diff --git a/drivers/char/tpm/tpm_atmel.c b/drivers/char/tpm/tpm_atmel.c index 58a258cec153..ad8ffe49256f 100644 --- a/drivers/char/tpm/tpm_atmel.c +++ b/drivers/char/tpm/tpm_atmel.c @@ -116,7 +116,7 @@ static u8 tpm_atml_status(struct tpm_chip *chip) return ioread8(chip->vendor.iobase + 1); } -static struct file_operations atmel_ops = { +static const struct file_operations atmel_ops = { .owner = THIS_MODULE, .llseek = no_llseek, .open = tpm_open, diff --git a/drivers/char/tpm/tpm_infineon.c b/drivers/char/tpm/tpm_infineon.c index adfff21beb21..1353b5a6bae8 100644 --- a/drivers/char/tpm/tpm_infineon.c +++ b/drivers/char/tpm/tpm_infineon.c @@ -338,7 +338,7 @@ static struct attribute *inf_attrs[] = { static struct attribute_group inf_attr_grp = {.attrs = inf_attrs }; -static struct file_operations inf_ops = { +static const struct file_operations inf_ops = { .owner = THIS_MODULE, .llseek = no_llseek, .open = tpm_open, diff --git a/drivers/char/tpm/tpm_nsc.c b/drivers/char/tpm/tpm_nsc.c index 4c8bc06c7d95..26287aace87d 100644 --- a/drivers/char/tpm/tpm_nsc.c +++ b/drivers/char/tpm/tpm_nsc.c @@ -226,7 +226,7 @@ static u8 tpm_nsc_status(struct tpm_chip *chip) return inb(chip->vendor.base + NSC_STATUS); } -static struct file_operations nsc_ops = { +static const struct file_operations nsc_ops = { .owner = THIS_MODULE, .llseek = no_llseek, .open = tpm_open, diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index abb0f2aeae66..3232b1932597 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -330,7 +330,7 @@ out_err: return rc; } -static struct file_operations tis_ops = { +static const struct file_operations tis_ops = { .owner = THIS_MODULE, .llseek = no_llseek, .open = tpm_open, diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 615e934da05f..bfdb90242a90 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -912,7 +912,7 @@ static int hung_up_tty_ioctl(struct inode * inode, struct file * file, return cmd == TIOCSPGRP ? -ENOTTY : -EIO; } -static struct file_operations tty_fops = { +static const struct file_operations tty_fops = { .llseek = no_llseek, .read = tty_read, .write = tty_write, @@ -924,7 +924,7 @@ static struct file_operations tty_fops = { }; #ifdef CONFIG_UNIX98_PTYS -static struct file_operations ptmx_fops = { +static const struct file_operations ptmx_fops = { .llseek = no_llseek, .read = tty_read, .write = tty_write, @@ -936,7 +936,7 @@ static struct file_operations ptmx_fops = { }; #endif -static struct file_operations console_fops = { +static const struct file_operations console_fops = { .llseek = no_llseek, .read = tty_read, .write = redirected_tty_write, @@ -947,7 +947,7 @@ static struct file_operations console_fops = { .fasync = tty_fasync, }; -static struct file_operations hung_up_tty_fops = { +static const struct file_operations hung_up_tty_fops = { .llseek = no_llseek, .read = hung_up_tty_read, .write = hung_up_tty_write, @@ -2336,7 +2336,7 @@ static int fionbio(struct file *file, int __user *p) static int tiocsctty(struct tty_struct *tty, int arg) { - task_t *p; + struct task_struct *p; if (current->signal->leader && (current->signal->session == tty->session)) diff --git a/drivers/char/vc_screen.c b/drivers/char/vc_screen.c index 45e9bd81bc0e..a9247b5213d5 100644 --- a/drivers/char/vc_screen.c +++ b/drivers/char/vc_screen.c @@ -465,7 +465,7 @@ vcs_open(struct inode *inode, struct file *filp) return 0; } -static struct file_operations vcs_fops = { +static const struct file_operations vcs_fops = { .llseek = vcs_lseek, .read = vcs_read, .write = vcs_write, diff --git a/drivers/char/viotape.c b/drivers/char/viotape.c index 7d42c8ec8dbc..b72b2049aaae 100644 --- a/drivers/char/viotape.c +++ b/drivers/char/viotape.c @@ -292,7 +292,7 @@ static int proc_viotape_open(struct inode *inode, struct file *file) return single_open(file, proc_viotape_show, NULL); } -static struct file_operations proc_viotape_operations = { +static const struct file_operations proc_viotape_operations = { .open = proc_viotape_open, .read = seq_read, .llseek = seq_lseek, diff --git a/drivers/char/vr41xx_giu.c b/drivers/char/vr41xx_giu.c index 073da48c092e..1b9b1f1d4c49 100644 --- a/drivers/char/vr41xx_giu.c +++ b/drivers/char/vr41xx_giu.c @@ -605,7 +605,7 @@ static int gpio_release(struct inode *inode, struct file *file) return 0; } -static struct file_operations gpio_fops = { +static const struct file_operations gpio_fops = { .owner = THIS_MODULE, .read = gpio_read, .write = gpio_write, diff --git a/drivers/char/vt.c b/drivers/char/vt.c index 3ef823d7d255..da7e66a2a38b 100644 --- a/drivers/char/vt.c +++ b/drivers/char/vt.c @@ -886,6 +886,7 @@ void vc_disallocate(unsigned int currcons) if (vc_cons_allocated(currcons)) { struct vc_data *vc = vc_cons[currcons].d; vc->vc_sw->con_deinit(vc); + module_put(vc->vc_sw->owner); if (vc->vc_kmalloced) kfree(vc->vc_screenbuf); if (currcons >= MIN_NR_CONSOLES) diff --git a/drivers/char/watchdog/acquirewdt.c b/drivers/char/watchdog/acquirewdt.c index 7289f4af93d0..c77fe3cf2852 100644 --- a/drivers/char/watchdog/acquirewdt.c +++ b/drivers/char/watchdog/acquirewdt.c @@ -231,7 +231,7 @@ static int acq_notify_sys(struct notifier_block *this, unsigned long code, * Kernel Interfaces */ -static struct file_operations acq_fops = { +static const struct file_operations acq_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .write = acq_write, diff --git a/drivers/char/watchdog/advantechwdt.c b/drivers/char/watchdog/advantechwdt.c index 194a3fd36b91..8069be445edc 100644 --- a/drivers/char/watchdog/advantechwdt.c +++ b/drivers/char/watchdog/advantechwdt.c @@ -227,7 +227,7 @@ advwdt_notify_sys(struct notifier_block *this, unsigned long code, * Kernel Interfaces */ -static struct file_operations advwdt_fops = { +static const struct file_operations advwdt_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .write = advwdt_write, diff --git a/drivers/char/watchdog/alim1535_wdt.c b/drivers/char/watchdog/alim1535_wdt.c index 8338ca300e2e..c5c94e4c9495 100644 --- a/drivers/char/watchdog/alim1535_wdt.c +++ b/drivers/char/watchdog/alim1535_wdt.c @@ -362,7 +362,7 @@ static int __init ali_find_watchdog(void) * Kernel Interfaces */ -static struct file_operations ali_fops = { +static const struct file_operations ali_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .write = ali_write, diff --git a/drivers/char/watchdog/alim7101_wdt.c b/drivers/char/watchdog/alim7101_wdt.c index c05ac188a4d7..ffd7684f999b 100644 --- a/drivers/char/watchdog/alim7101_wdt.c +++ b/drivers/char/watchdog/alim7101_wdt.c @@ -281,7 +281,7 @@ static int fop_ioctl(struct inode *inode, struct file *file, unsigned int cmd, u } } -static struct file_operations wdt_fops = { +static const struct file_operations wdt_fops = { .owner= THIS_MODULE, .llseek= no_llseek, .write= fop_write, diff --git a/drivers/char/watchdog/at91_wdt.c b/drivers/char/watchdog/at91_wdt.c index f61dedc3c96c..cc266715ea32 100644 --- a/drivers/char/watchdog/at91_wdt.c +++ b/drivers/char/watchdog/at91_wdt.c @@ -183,7 +183,7 @@ static ssize_t at91_wdt_write(struct file *file, const char *data, size_t len, l /* ......................................................................... */ -static struct file_operations at91wdt_fops = { +static const struct file_operations at91wdt_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .ioctl = at91_wdt_ioctl, diff --git a/drivers/char/watchdog/booke_wdt.c b/drivers/char/watchdog/booke_wdt.c index 537f5c6729bf..e3cefc538b40 100644 --- a/drivers/char/watchdog/booke_wdt.c +++ b/drivers/char/watchdog/booke_wdt.c @@ -145,7 +145,7 @@ static int booke_wdt_open (struct inode *inode, struct file *file) return 0; } -static struct file_operations booke_wdt_fops = { +static const struct file_operations booke_wdt_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .write = booke_wdt_write, diff --git a/drivers/char/watchdog/cpu5wdt.c b/drivers/char/watchdog/cpu5wdt.c index 3e8410b5a65e..04c7e49918db 100644 --- a/drivers/char/watchdog/cpu5wdt.c +++ b/drivers/char/watchdog/cpu5wdt.c @@ -198,7 +198,7 @@ static ssize_t cpu5wdt_write(struct file *file, const char __user *buf, size_t c return count; } -static struct file_operations cpu5wdt_fops = { +static const struct file_operations cpu5wdt_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .ioctl = cpu5wdt_ioctl, diff --git a/drivers/char/watchdog/ep93xx_wdt.c b/drivers/char/watchdog/ep93xx_wdt.c index 9021dbb78299..77c8a955ae9e 100644 --- a/drivers/char/watchdog/ep93xx_wdt.c +++ b/drivers/char/watchdog/ep93xx_wdt.c @@ -187,7 +187,7 @@ static int ep93xx_wdt_release(struct inode *inode, struct file *file) return 0; } -static struct file_operations ep93xx_wdt_fops = { +static const struct file_operations ep93xx_wdt_fops = { .owner = THIS_MODULE, .write = ep93xx_wdt_write, .ioctl = ep93xx_wdt_ioctl, diff --git a/drivers/char/watchdog/eurotechwdt.c b/drivers/char/watchdog/eurotechwdt.c index ea670de4fab7..62dbccb2f6df 100644 --- a/drivers/char/watchdog/eurotechwdt.c +++ b/drivers/char/watchdog/eurotechwdt.c @@ -356,7 +356,7 @@ static int eurwdt_notify_sys(struct notifier_block *this, unsigned long code, */ -static struct file_operations eurwdt_fops = { +static const struct file_operations eurwdt_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .write = eurwdt_write, diff --git a/drivers/char/watchdog/i6300esb.c b/drivers/char/watchdog/i6300esb.c index 93785f13242e..870539eabbf3 100644 --- a/drivers/char/watchdog/i6300esb.c +++ b/drivers/char/watchdog/i6300esb.c @@ -337,7 +337,7 @@ static int esb_notify_sys (struct notifier_block *this, unsigned long code, void * Kernel Interfaces */ -static struct file_operations esb_fops = { +static const struct file_operations esb_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .write = esb_write, diff --git a/drivers/char/watchdog/i8xx_tco.c b/drivers/char/watchdog/i8xx_tco.c index bfbdbbf3c2f2..8385dd36eefe 100644 --- a/drivers/char/watchdog/i8xx_tco.c +++ b/drivers/char/watchdog/i8xx_tco.c @@ -378,7 +378,7 @@ static int i8xx_tco_notify_sys (struct notifier_block *this, unsigned long code, * Kernel Interfaces */ -static struct file_operations i8xx_tco_fops = { +static const struct file_operations i8xx_tco_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .write = i8xx_tco_write, diff --git a/drivers/char/watchdog/ib700wdt.c b/drivers/char/watchdog/ib700wdt.c index a2e53c715b36..fd95f7327798 100644 --- a/drivers/char/watchdog/ib700wdt.c +++ b/drivers/char/watchdog/ib700wdt.c @@ -255,7 +255,7 @@ ibwdt_notify_sys(struct notifier_block *this, unsigned long code, * Kernel Interfaces */ -static struct file_operations ibwdt_fops = { +static const struct file_operations ibwdt_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .write = ibwdt_write, diff --git a/drivers/char/watchdog/ibmasr.c b/drivers/char/watchdog/ibmasr.c index b0741cbdc139..26ceee7a4df0 100644 --- a/drivers/char/watchdog/ibmasr.c +++ b/drivers/char/watchdog/ibmasr.c @@ -322,7 +322,7 @@ static int asr_release(struct inode *inode, struct file *file) return 0; } -static struct file_operations asr_fops = { +static const struct file_operations asr_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .write = asr_write, diff --git a/drivers/char/watchdog/indydog.c b/drivers/char/watchdog/indydog.c index d387979b2434..dacc1c20a310 100644 --- a/drivers/char/watchdog/indydog.c +++ b/drivers/char/watchdog/indydog.c @@ -154,7 +154,7 @@ static int indydog_notify_sys(struct notifier_block *this, unsigned long code, v return NOTIFY_DONE; } -static struct file_operations indydog_fops = { +static const struct file_operations indydog_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .write = indydog_write, diff --git a/drivers/char/watchdog/ixp2000_wdt.c b/drivers/char/watchdog/ixp2000_wdt.c index aa29a7d68759..692908819e26 100644 --- a/drivers/char/watchdog/ixp2000_wdt.c +++ b/drivers/char/watchdog/ixp2000_wdt.c @@ -168,7 +168,7 @@ ixp2000_wdt_release(struct inode *inode, struct file *file) } -static struct file_operations ixp2000_wdt_fops = +static const struct file_operations ixp2000_wdt_fops = { .owner = THIS_MODULE, .llseek = no_llseek, diff --git a/drivers/char/watchdog/ixp4xx_wdt.c b/drivers/char/watchdog/ixp4xx_wdt.c index e6a3fe83fa01..9db5cf2c38c3 100644 --- a/drivers/char/watchdog/ixp4xx_wdt.c +++ b/drivers/char/watchdog/ixp4xx_wdt.c @@ -162,7 +162,7 @@ ixp4xx_wdt_release(struct inode *inode, struct file *file) } -static struct file_operations ixp4xx_wdt_fops = +static const struct file_operations ixp4xx_wdt_fops = { .owner = THIS_MODULE, .llseek = no_llseek, diff --git a/drivers/char/watchdog/machzwd.c b/drivers/char/watchdog/machzwd.c index b67b4878ae0f..23734e07fb22 100644 --- a/drivers/char/watchdog/machzwd.c +++ b/drivers/char/watchdog/machzwd.c @@ -388,7 +388,7 @@ static int zf_notify_sys(struct notifier_block *this, unsigned long code, -static struct file_operations zf_fops = { +static const struct file_operations zf_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .write = zf_write, diff --git a/drivers/char/watchdog/mixcomwd.c b/drivers/char/watchdog/mixcomwd.c index 433c27f98159..ae943324d251 100644 --- a/drivers/char/watchdog/mixcomwd.c +++ b/drivers/char/watchdog/mixcomwd.c @@ -190,7 +190,7 @@ static int mixcomwd_ioctl(struct inode *inode, struct file *file, return 0; } -static struct file_operations mixcomwd_fops= +static const struct file_operations mixcomwd_fops= { .owner = THIS_MODULE, .llseek = no_llseek, diff --git a/drivers/char/watchdog/mpc83xx_wdt.c b/drivers/char/watchdog/mpc83xx_wdt.c index dac1381af364..a480903ee1a5 100644 --- a/drivers/char/watchdog/mpc83xx_wdt.c +++ b/drivers/char/watchdog/mpc83xx_wdt.c @@ -129,7 +129,7 @@ static int mpc83xx_wdt_ioctl(struct inode *inode, struct file *file, } } -static struct file_operations mpc83xx_wdt_fops = { +static const struct file_operations mpc83xx_wdt_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .write = mpc83xx_wdt_write, diff --git a/drivers/char/watchdog/mpc8xx_wdt.c b/drivers/char/watchdog/mpc8xx_wdt.c index 11f0ccd4c4d4..35dd9e6e1140 100644 --- a/drivers/char/watchdog/mpc8xx_wdt.c +++ b/drivers/char/watchdog/mpc8xx_wdt.c @@ -132,7 +132,7 @@ static int mpc8xx_wdt_ioctl(struct inode *inode, struct file *file, return 0; } -static struct file_operations mpc8xx_wdt_fops = { +static const struct file_operations mpc8xx_wdt_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .write = mpc8xx_wdt_write, diff --git a/drivers/char/watchdog/mpcore_wdt.c b/drivers/char/watchdog/mpcore_wdt.c index c2d492c852fc..54b3c56ead0d 100644 --- a/drivers/char/watchdog/mpcore_wdt.c +++ b/drivers/char/watchdog/mpcore_wdt.c @@ -297,7 +297,7 @@ static void mpcore_wdt_shutdown(struct platform_device *dev) /* * Kernel Interfaces */ -static struct file_operations mpcore_wdt_fops = { +static const struct file_operations mpcore_wdt_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .write = mpcore_wdt_write, diff --git a/drivers/char/watchdog/mv64x60_wdt.c b/drivers/char/watchdog/mv64x60_wdt.c index 20a6cbb0fbb8..5c8fab345b40 100644 --- a/drivers/char/watchdog/mv64x60_wdt.c +++ b/drivers/char/watchdog/mv64x60_wdt.c @@ -166,7 +166,7 @@ static int mv64x60_wdt_ioctl(struct inode *inode, struct file *file, return 0; } -static struct file_operations mv64x60_wdt_fops = { +static const struct file_operations mv64x60_wdt_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .write = mv64x60_wdt_write, diff --git a/drivers/char/watchdog/pcwd.c b/drivers/char/watchdog/pcwd.c index 6d44ca68312d..cd7d1b6a5d9f 100644 --- a/drivers/char/watchdog/pcwd.c +++ b/drivers/char/watchdog/pcwd.c @@ -740,7 +740,7 @@ static int pcwd_notify_sys(struct notifier_block *this, unsigned long code, void * Kernel Interfaces */ -static struct file_operations pcwd_fops = { +static const struct file_operations pcwd_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .write = pcwd_write, @@ -755,7 +755,7 @@ static struct miscdevice pcwd_miscdev = { .fops = &pcwd_fops, }; -static struct file_operations pcwd_temp_fops = { +static const struct file_operations pcwd_temp_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .read = pcwd_temp_read, diff --git a/drivers/char/watchdog/pcwd_pci.c b/drivers/char/watchdog/pcwd_pci.c index 1f40ecefbf72..c7cfd6dbfe1b 100644 --- a/drivers/char/watchdog/pcwd_pci.c +++ b/drivers/char/watchdog/pcwd_pci.c @@ -625,7 +625,7 @@ static int pcipcwd_notify_sys(struct notifier_block *this, unsigned long code, v * Kernel Interfaces */ -static struct file_operations pcipcwd_fops = { +static const struct file_operations pcipcwd_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .write = pcipcwd_write, @@ -640,7 +640,7 @@ static struct miscdevice pcipcwd_miscdev = { .fops = &pcipcwd_fops, }; -static struct file_operations pcipcwd_temp_fops = { +static const struct file_operations pcipcwd_temp_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .read = pcipcwd_temp_read, diff --git a/drivers/char/watchdog/pcwd_usb.c b/drivers/char/watchdog/pcwd_usb.c index 92bf8c1a0f0d..b7ae73dcdd08 100644 --- a/drivers/char/watchdog/pcwd_usb.c +++ b/drivers/char/watchdog/pcwd_usb.c @@ -523,7 +523,7 @@ static int usb_pcwd_notify_sys(struct notifier_block *this, unsigned long code, * Kernel Interfaces */ -static struct file_operations usb_pcwd_fops = { +static const struct file_operations usb_pcwd_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .write = usb_pcwd_write, @@ -538,7 +538,7 @@ static struct miscdevice usb_pcwd_miscdev = { .fops = &usb_pcwd_fops, }; -static struct file_operations usb_pcwd_temperature_fops = { +static const struct file_operations usb_pcwd_temperature_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .read = usb_pcwd_temperature_read, diff --git a/drivers/char/watchdog/s3c2410_wdt.c b/drivers/char/watchdog/s3c2410_wdt.c index f267dad26071..be978e8ed754 100644 --- a/drivers/char/watchdog/s3c2410_wdt.c +++ b/drivers/char/watchdog/s3c2410_wdt.c @@ -319,7 +319,7 @@ static int s3c2410wdt_ioctl(struct inode *inode, struct file *file, /* kernel interface */ -static struct file_operations s3c2410wdt_fops = { +static const struct file_operations s3c2410wdt_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .write = s3c2410wdt_write, diff --git a/drivers/char/watchdog/sa1100_wdt.c b/drivers/char/watchdog/sa1100_wdt.c index b22e95c5470c..1fc16d995788 100644 --- a/drivers/char/watchdog/sa1100_wdt.c +++ b/drivers/char/watchdog/sa1100_wdt.c @@ -135,7 +135,7 @@ static int sa1100dog_ioctl(struct inode *inode, struct file *file, return ret; } -static struct file_operations sa1100dog_fops = +static const struct file_operations sa1100dog_fops = { .owner = THIS_MODULE, .llseek = no_llseek, diff --git a/drivers/char/watchdog/sbc60xxwdt.c b/drivers/char/watchdog/sbc60xxwdt.c index ed0bd55fbfc1..4663c2fd53cd 100644 --- a/drivers/char/watchdog/sbc60xxwdt.c +++ b/drivers/char/watchdog/sbc60xxwdt.c @@ -282,7 +282,7 @@ static int fop_ioctl(struct inode *inode, struct file *file, unsigned int cmd, } } -static struct file_operations wdt_fops = { +static const struct file_operations wdt_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .write = fop_write, diff --git a/drivers/char/watchdog/sbc8360.c b/drivers/char/watchdog/sbc8360.c index 6562aa910ace..1035be5b5019 100644 --- a/drivers/char/watchdog/sbc8360.c +++ b/drivers/char/watchdog/sbc8360.c @@ -305,7 +305,7 @@ static int sbc8360_notify_sys(struct notifier_block *this, unsigned long code, * Kernel Interfaces */ -static struct file_operations sbc8360_fops = { +static const struct file_operations sbc8360_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .write = sbc8360_write, diff --git a/drivers/char/watchdog/sbc_epx_c3.c b/drivers/char/watchdog/sbc_epx_c3.c index 09867fadc720..bfc475dabe6d 100644 --- a/drivers/char/watchdog/sbc_epx_c3.c +++ b/drivers/char/watchdog/sbc_epx_c3.c @@ -154,7 +154,7 @@ static int epx_c3_notify_sys(struct notifier_block *this, unsigned long code, return NOTIFY_DONE; } -static struct file_operations epx_c3_fops = { +static const struct file_operations epx_c3_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .write = epx_c3_write, diff --git a/drivers/char/watchdog/sc1200wdt.c b/drivers/char/watchdog/sc1200wdt.c index 78ef6333c181..7c3cf293a5af 100644 --- a/drivers/char/watchdog/sc1200wdt.c +++ b/drivers/char/watchdog/sc1200wdt.c @@ -292,7 +292,7 @@ static struct notifier_block sc1200wdt_notifier = .notifier_call = sc1200wdt_notify_sys, }; -static struct file_operations sc1200wdt_fops = +static const struct file_operations sc1200wdt_fops = { .owner = THIS_MODULE, .llseek = no_llseek, diff --git a/drivers/char/watchdog/sc520_wdt.c b/drivers/char/watchdog/sc520_wdt.c index 4ee9974ad8cb..2c7c9db71be8 100644 --- a/drivers/char/watchdog/sc520_wdt.c +++ b/drivers/char/watchdog/sc520_wdt.c @@ -336,7 +336,7 @@ static int fop_ioctl(struct inode *inode, struct file *file, unsigned int cmd, } } -static struct file_operations wdt_fops = { +static const struct file_operations wdt_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .write = fop_write, diff --git a/drivers/char/watchdog/scx200_wdt.c b/drivers/char/watchdog/scx200_wdt.c index c0b4754e8de0..c561299a5537 100644 --- a/drivers/char/watchdog/scx200_wdt.c +++ b/drivers/char/watchdog/scx200_wdt.c @@ -194,7 +194,7 @@ static int scx200_wdt_ioctl(struct inode *inode, struct file *file, } } -static struct file_operations scx200_wdt_fops = { +static const struct file_operations scx200_wdt_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .write = scx200_wdt_write, diff --git a/drivers/char/watchdog/shwdt.c b/drivers/char/watchdog/shwdt.c index 803701b675c0..1355038f1044 100644 --- a/drivers/char/watchdog/shwdt.c +++ b/drivers/char/watchdog/shwdt.c @@ -344,7 +344,7 @@ static int sh_wdt_notify_sys(struct notifier_block *this, return NOTIFY_DONE; } -static struct file_operations sh_wdt_fops = { +static const struct file_operations sh_wdt_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .write = sh_wdt_write, diff --git a/drivers/char/watchdog/softdog.c b/drivers/char/watchdog/softdog.c index 79ce5c655428..ef8da517545a 100644 --- a/drivers/char/watchdog/softdog.c +++ b/drivers/char/watchdog/softdog.c @@ -243,7 +243,7 @@ static int softdog_notify_sys(struct notifier_block *this, unsigned long code, * Kernel Interfaces */ -static struct file_operations softdog_fops = { +static const struct file_operations softdog_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .write = softdog_write, diff --git a/drivers/char/watchdog/w83627hf_wdt.c b/drivers/char/watchdog/w83627hf_wdt.c index d15ca9a3986f..13f16d41c2fd 100644 --- a/drivers/char/watchdog/w83627hf_wdt.c +++ b/drivers/char/watchdog/w83627hf_wdt.c @@ -274,7 +274,7 @@ wdt_notify_sys(struct notifier_block *this, unsigned long code, * Kernel Interfaces */ -static struct file_operations wdt_fops = { +static const struct file_operations wdt_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .write = wdt_write, diff --git a/drivers/char/watchdog/w83877f_wdt.c b/drivers/char/watchdog/w83877f_wdt.c index 52a8bd0a5988..ccf6c0915945 100644 --- a/drivers/char/watchdog/w83877f_wdt.c +++ b/drivers/char/watchdog/w83877f_wdt.c @@ -299,7 +299,7 @@ static int fop_ioctl(struct inode *inode, struct file *file, unsigned int cmd, } } -static struct file_operations wdt_fops = { +static const struct file_operations wdt_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .write = fop_write, diff --git a/drivers/char/watchdog/w83977f_wdt.c b/drivers/char/watchdog/w83977f_wdt.c index c31849e4c5c2..98f4e17db70a 100644 --- a/drivers/char/watchdog/w83977f_wdt.c +++ b/drivers/char/watchdog/w83977f_wdt.c @@ -449,7 +449,7 @@ static int wdt_notify_sys(struct notifier_block *this, unsigned long code, return NOTIFY_DONE; } -static struct file_operations wdt_fops= +static const struct file_operations wdt_fops= { .owner = THIS_MODULE, .llseek = no_llseek, diff --git a/drivers/char/watchdog/wafer5823wdt.c b/drivers/char/watchdog/wafer5823wdt.c index 7cf6c9bbf486..2bb6a9d6ad28 100644 --- a/drivers/char/watchdog/wafer5823wdt.c +++ b/drivers/char/watchdog/wafer5823wdt.c @@ -222,7 +222,7 @@ static int wafwdt_notify_sys(struct notifier_block *this, unsigned long code, vo * Kernel Interfaces */ -static struct file_operations wafwdt_fops = { +static const struct file_operations wafwdt_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .write = wafwdt_write, diff --git a/drivers/char/watchdog/wdrtas.c b/drivers/char/watchdog/wdrtas.c index 3a462c34b92a..5c38cdf41731 100644 --- a/drivers/char/watchdog/wdrtas.c +++ b/drivers/char/watchdog/wdrtas.c @@ -520,7 +520,7 @@ wdrtas_reboot(struct notifier_block *this, unsigned long code, void *ptr) /*** initialization stuff */ -static struct file_operations wdrtas_fops = { +static const struct file_operations wdrtas_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .write = wdrtas_write, @@ -535,7 +535,7 @@ static struct miscdevice wdrtas_miscdev = { .fops = &wdrtas_fops, }; -static struct file_operations wdrtas_temp_fops = { +static const struct file_operations wdrtas_temp_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .read = wdrtas_temp_read, diff --git a/drivers/char/watchdog/wdt.c b/drivers/char/watchdog/wdt.c index a1d972c8f44c..70be81e39a61 100644 --- a/drivers/char/watchdog/wdt.c +++ b/drivers/char/watchdog/wdt.c @@ -494,7 +494,7 @@ static int wdt_notify_sys(struct notifier_block *this, unsigned long code, */ -static struct file_operations wdt_fops = { +static const struct file_operations wdt_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .write = wdt_write, @@ -510,7 +510,7 @@ static struct miscdevice wdt_miscdev = { }; #ifdef CONFIG_WDT_501 -static struct file_operations wdt_temp_fops = { +static const struct file_operations wdt_temp_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .read = wdt_temp_read, diff --git a/drivers/char/watchdog/wdt285.c b/drivers/char/watchdog/wdt285.c index 52825a1f1779..6555fb844f23 100644 --- a/drivers/char/watchdog/wdt285.c +++ b/drivers/char/watchdog/wdt285.c @@ -178,7 +178,7 @@ watchdog_ioctl(struct inode *inode, struct file *file, unsigned int cmd, return ret; } -static struct file_operations watchdog_fops = { +static const struct file_operations watchdog_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .write = watchdog_write, diff --git a/drivers/char/watchdog/wdt977.c b/drivers/char/watchdog/wdt977.c index 3cde2b9bb763..a0935bc775f8 100644 --- a/drivers/char/watchdog/wdt977.c +++ b/drivers/char/watchdog/wdt977.c @@ -418,7 +418,7 @@ static int wdt977_notify_sys(struct notifier_block *this, unsigned long code, return NOTIFY_DONE; } -static struct file_operations wdt977_fops= +static const struct file_operations wdt977_fops= { .owner = THIS_MODULE, .llseek = no_llseek, diff --git a/drivers/char/watchdog/wdt_pci.c b/drivers/char/watchdog/wdt_pci.c index 7529ecdbabae..5918ca2c9c35 100644 --- a/drivers/char/watchdog/wdt_pci.c +++ b/drivers/char/watchdog/wdt_pci.c @@ -543,7 +543,7 @@ static int wdtpci_notify_sys(struct notifier_block *this, unsigned long code, */ -static struct file_operations wdtpci_fops = { +static const struct file_operations wdtpci_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .write = wdtpci_write, @@ -559,7 +559,7 @@ static struct miscdevice wdtpci_miscdev = { }; #ifdef CONFIG_WDT_501_PCI -static struct file_operations wdtpci_temp_fops = { +static const struct file_operations wdtpci_temp_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .read = wdtpci_temp_read, diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index 6ca3476d02c7..adbe9f76a505 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -838,7 +838,7 @@ static ide_startstop_t idefloppy_pc_intr (ide_drive_t *drive) "transferred\n", pc->actually_transferred); clear_bit(PC_DMA_IN_PROGRESS, &pc->flags); - local_irq_enable(); + local_irq_enable_in_hardirq(); if (status.b.check || test_bit(PC_DMA_ERROR, &pc->flags)) { /* Error detected */ diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 7dba9992ad30..fb6795236e76 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -693,7 +693,7 @@ static ide_startstop_t drive_cmd_intr (ide_drive_t *drive) u8 stat = hwif->INB(IDE_STATUS_REG); int retries = 10; - local_irq_enable(); + local_irq_enable_in_hardirq(); if ((stat & DRQ_STAT) && args && args[3]) { u8 io_32bit = drive->io_32bit; drive->io_32bit = 0; @@ -1286,7 +1286,7 @@ static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq) if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq) disable_irq_nosync(hwif->irq); spin_unlock(&ide_lock); - local_irq_enable(); + local_irq_enable_in_hardirq(); /* allow other IRQs while we start this request */ startstop = start_request(drive, rq); spin_lock_irq(&ide_lock); @@ -1631,7 +1631,7 @@ irqreturn_t ide_intr (int irq, void *dev_id, struct pt_regs *regs) spin_unlock(&ide_lock); if (drive->unmask) - local_irq_enable(); + local_irq_enable_in_hardirq(); /* service this interrupt, may set handler for next interrupt */ startstop = handler(drive); spin_lock_irq(&ide_lock); @@ -1705,7 +1705,7 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio { unsigned long flags; ide_hwgroup_t *hwgroup = HWGROUP(drive); - DECLARE_COMPLETION(wait); + DECLARE_COMPLETION_ONSTACK(wait); int where = ELEVATOR_INSERT_BACK, err; int must_wait = (action == ide_wait || action == ide_head_wait); diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index 04547eb0833f..97a9244312fc 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -222,7 +222,7 @@ ide_startstop_t task_no_data_intr (ide_drive_t *drive) ide_hwif_t *hwif = HWIF(drive); u8 stat; - local_irq_enable(); + local_irq_enable_in_hardirq(); if (!OK_STAT(stat = hwif->INB(IDE_STATUS_REG),READY_STAT,BAD_STAT)) { return ide_error(drive, "task_no_data_intr", stat); /* calls ide_end_drive_cmd */ diff --git a/drivers/ieee1394/hosts.c b/drivers/ieee1394/hosts.c index 2c669287f5bd..4feead4a35c5 100644 --- a/drivers/ieee1394/hosts.c +++ b/drivers/ieee1394/hosts.c @@ -107,6 +107,14 @@ static int alloc_hostnum_cb(struct hpsb_host *host, void *__data) */ static DEFINE_MUTEX(host_num_alloc); +/* + * The pending_packet_queue is special in that it's processed + * from hardirq context too (such as hpsb_bus_reset()). Hence + * split the lock class from the usual networking skb-head + * lock class by using a separate key for it: + */ +static struct lock_class_key pending_packet_queue_key; + struct hpsb_host *hpsb_alloc_host(struct hpsb_host_driver *drv, size_t extra, struct device *dev) { @@ -128,6 +136,8 @@ struct hpsb_host *hpsb_alloc_host(struct hpsb_host_driver *drv, size_t extra, h->driver = drv; skb_queue_head_init(&h->pending_packet_queue); + lockdep_set_class(&h->pending_packet_queue.lock, + &pending_packet_queue_key); INIT_LIST_HEAD(&h->addr_space); for (i = 2; i < 16; i++) diff --git a/drivers/input/serio/i8042-sparcio.h b/drivers/input/serio/i8042-sparcio.h index 7d9fafea9615..54adba2d8ed5 100644 --- a/drivers/input/serio/i8042-sparcio.h +++ b/drivers/input/serio/i8042-sparcio.h @@ -88,7 +88,7 @@ static struct of_device_id sparc_i8042_match[] = { }, {}, }; -MODULE_DEVICE_TABLE(of, i8042_match); +MODULE_DEVICE_TABLE(of, sparc_i8042_match); static struct of_platform_driver sparc_i8042_driver = { .name = "i8042", diff --git a/drivers/input/serio/libps2.c b/drivers/input/serio/libps2.c index 79c97f94bcbd..61a6f977846f 100644 --- a/drivers/input/serio/libps2.c +++ b/drivers/input/serio/libps2.c @@ -177,7 +177,7 @@ int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command) return -1; } - mutex_lock(&ps2dev->cmd_mutex); + mutex_lock_nested(&ps2dev->cmd_mutex, SINGLE_DEPTH_NESTING); serio_pause_rx(ps2dev->serio); ps2dev->flags = command == PS2_CMD_GETID ? PS2_FLAG_WAITID : 0; diff --git a/drivers/macintosh/macio-adb.c b/drivers/macintosh/macio-adb.c index 314fc0830d90..4b08852c35ee 100644 --- a/drivers/macintosh/macio-adb.c +++ b/drivers/macintosh/macio-adb.c @@ -90,22 +90,12 @@ int macio_init(void) { struct device_node *adbs; struct resource r; + unsigned int irq; adbs = find_compatible_devices("adb", "chrp,adb0"); if (adbs == 0) return -ENXIO; -#if 0 - { int i = 0; - - printk("macio_adb_init: node = %p, addrs =", adbs->node); - while(!of_address_to_resource(adbs, i, &r)) - printk(" %x(%x)", r.start, r.end - r.start); - printk(", intrs ="); - for (i = 0; i < adbs->n_intrs; ++i) - printk(" %x", adbs->intrs[i].line); - printk("\n"); } -#endif if (of_address_to_resource(adbs, 0, &r)) return -ENXIO; adb = ioremap(r.start, sizeof(struct adb_regs)); @@ -117,10 +107,9 @@ int macio_init(void) out_8(&adb->active_lo.r, 0xff); out_8(&adb->autopoll.r, APE); - if (request_irq(adbs->intrs[0].line, macio_adb_interrupt, - 0, "ADB", (void *)0)) { - printk(KERN_ERR "ADB: can't get irq %d\n", - adbs->intrs[0].line); + irq = irq_of_parse_and_map(adbs, 0); + if (request_irq(irq, macio_adb_interrupt, 0, "ADB", (void *)0)) { + printk(KERN_ERR "ADB: can't get irq %d\n", irq); return -EAGAIN; } out_8(&adb->intr_enb.r, DFB | TAG); diff --git a/drivers/macintosh/macio_asic.c b/drivers/macintosh/macio_asic.c index 40ae7b6a939d..80c0c665b5f6 100644 --- a/drivers/macintosh/macio_asic.c +++ b/drivers/macintosh/macio_asic.c @@ -280,75 +280,128 @@ static void macio_release_dev(struct device *dev) static int macio_resource_quirks(struct device_node *np, struct resource *res, int index) { - if (res->flags & IORESOURCE_MEM) { - /* Grand Central has too large resource 0 on some machines */ - if (index == 0 && !strcmp(np->name, "gc")) - res->end = res->start + 0x1ffff; + /* Only quirks for memory resources for now */ + if ((res->flags & IORESOURCE_MEM) == 0) + return 0; + + /* Grand Central has too large resource 0 on some machines */ + if (index == 0 && !strcmp(np->name, "gc")) + res->end = res->start + 0x1ffff; - /* Airport has bogus resource 2 */ - if (index >= 2 && !strcmp(np->name, "radio")) - return 1; + /* Airport has bogus resource 2 */ + if (index >= 2 && !strcmp(np->name, "radio")) + return 1; #ifndef CONFIG_PPC64 - /* DBDMAs may have bogus sizes */ - if ((res->start & 0x0001f000) == 0x00008000) - res->end = res->start + 0xff; + /* DBDMAs may have bogus sizes */ + if ((res->start & 0x0001f000) == 0x00008000) + res->end = res->start + 0xff; #endif /* CONFIG_PPC64 */ - /* ESCC parent eats child resources. We could have added a - * level of hierarchy, but I don't really feel the need - * for it - */ - if (!strcmp(np->name, "escc")) - return 1; - - /* ESCC has bogus resources >= 3 */ - if (index >= 3 && !(strcmp(np->name, "ch-a") && - strcmp(np->name, "ch-b"))) - return 1; - - /* Media bay has too many resources, keep only first one */ - if (index > 0 && !strcmp(np->name, "media-bay")) - return 1; - - /* Some older IDE resources have bogus sizes */ - if (!(strcmp(np->name, "IDE") && strcmp(np->name, "ATA") && - strcmp(np->type, "ide") && strcmp(np->type, "ata"))) { - if (index == 0 && (res->end - res->start) > 0xfff) - res->end = res->start + 0xfff; - if (index == 1 && (res->end - res->start) > 0xff) - res->end = res->start + 0xff; - } + /* ESCC parent eats child resources. We could have added a + * level of hierarchy, but I don't really feel the need + * for it + */ + if (!strcmp(np->name, "escc")) + return 1; + + /* ESCC has bogus resources >= 3 */ + if (index >= 3 && !(strcmp(np->name, "ch-a") && + strcmp(np->name, "ch-b"))) + return 1; + + /* Media bay has too many resources, keep only first one */ + if (index > 0 && !strcmp(np->name, "media-bay")) + return 1; + + /* Some older IDE resources have bogus sizes */ + if (!(strcmp(np->name, "IDE") && strcmp(np->name, "ATA") && + strcmp(np->type, "ide") && strcmp(np->type, "ata"))) { + if (index == 0 && (res->end - res->start) > 0xfff) + res->end = res->start + 0xfff; + if (index == 1 && (res->end - res->start) > 0xff) + res->end = res->start + 0xff; } return 0; } +static void macio_create_fixup_irq(struct macio_dev *dev, int index, + unsigned int line) +{ + unsigned int irq; -static void macio_setup_interrupts(struct macio_dev *dev) + irq = irq_create_mapping(NULL, line, 0); + if (irq != NO_IRQ) { + dev->interrupt[index].start = irq; + dev->interrupt[index].flags = IORESOURCE_IRQ; + dev->interrupt[index].name = dev->ofdev.dev.bus_id; + } + if (dev->n_interrupts <= index) + dev->n_interrupts = index + 1; +} + +static void macio_add_missing_resources(struct macio_dev *dev) { struct device_node *np = dev->ofdev.node; - int i,j; + unsigned int irq_base; + + /* Gatwick has some missing interrupts on child nodes */ + if (dev->bus->chip->type != macio_gatwick) + return; - /* For now, we use pre-parsed entries in the device-tree for - * interrupt routing and addresses, but we should change that - * to dynamically parsed entries and so get rid of most of the - * clutter in struct device_node + /* irq_base is always 64 on gatwick. I have no cleaner way to get + * that value from here at this point */ - for (i = j = 0; i < np->n_intrs; i++) { + irq_base = 64; + + /* Fix SCC */ + if (strcmp(np->name, "ch-a") == 0) { + macio_create_fixup_irq(dev, 0, 15 + irq_base); + macio_create_fixup_irq(dev, 1, 4 + irq_base); + macio_create_fixup_irq(dev, 2, 5 + irq_base); + printk(KERN_INFO "macio: fixed SCC irqs on gatwick\n"); + } + + /* Fix media-bay */ + if (strcmp(np->name, "media-bay") == 0) { + macio_create_fixup_irq(dev, 0, 29 + irq_base); + printk(KERN_INFO "macio: fixed media-bay irq on gatwick\n"); + } + + /* Fix left media bay childs */ + if (dev->media_bay != NULL && strcmp(np->name, "floppy") == 0) { + macio_create_fixup_irq(dev, 0, 19 + irq_base); + macio_create_fixup_irq(dev, 1, 1 + irq_base); + printk(KERN_INFO "macio: fixed left floppy irqs\n"); + } + if (dev->media_bay != NULL && strcasecmp(np->name, "ata4") == 0) { + macio_create_fixup_irq(dev, 0, 14 + irq_base); + macio_create_fixup_irq(dev, 0, 3 + irq_base); + printk(KERN_INFO "macio: fixed left ide irqs\n"); + } +} + +static void macio_setup_interrupts(struct macio_dev *dev) +{ + struct device_node *np = dev->ofdev.node; + unsigned int irq; + int i = 0, j = 0; + + for (;;) { struct resource *res = &dev->interrupt[j]; if (j >= MACIO_DEV_COUNT_IRQS) break; - res->start = np->intrs[i].line; - res->flags = IORESOURCE_IO; - if (np->intrs[j].sense) - res->flags |= IORESOURCE_IRQ_LOWLEVEL; - else - res->flags |= IORESOURCE_IRQ_HIGHEDGE; + irq = irq_of_parse_and_map(np, i++); + if (irq == NO_IRQ) + break; + res->start = irq; + res->flags = IORESOURCE_IRQ; res->name = dev->ofdev.dev.bus_id; - if (macio_resource_quirks(np, res, i)) + if (macio_resource_quirks(np, res, i - 1)) { memset(res, 0, sizeof(struct resource)); - else + continue; + } else j++; } dev->n_interrupts = j; @@ -445,6 +498,7 @@ static struct macio_dev * macio_add_one_device(struct macio_chip *chip, /* Setup interrupts & resources */ macio_setup_interrupts(dev); macio_setup_resources(dev, parent_res); + macio_add_missing_resources(dev); /* Register with core */ if (of_device_register(&dev->ofdev) != 0) { diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c index ff6d9bfdc3d2..f139a74696fe 100644 --- a/drivers/macintosh/smu.c +++ b/drivers/macintosh/smu.c @@ -497,8 +497,7 @@ int __init smu_init (void) smu->doorbell = *data; if (smu->doorbell < 0x50) smu->doorbell += 0x50; - if (np->n_intrs > 0) - smu->db_irq = np->intrs[0].line; + smu->db_irq = irq_of_parse_and_map(np, 0); of_node_put(np); @@ -515,8 +514,7 @@ int __init smu_init (void) smu->msg = *data; if (smu->msg < 0x50) smu->msg += 0x50; - if (np->n_intrs > 0) - smu->msg_irq = np->intrs[0].line; + smu->msg_irq = irq_of_parse_and_map(np, 0); of_node_put(np); } while(0); diff --git a/drivers/macintosh/via-cuda.c b/drivers/macintosh/via-cuda.c index 6501db50fb83..69d5452fd22f 100644 --- a/drivers/macintosh/via-cuda.c +++ b/drivers/macintosh/via-cuda.c @@ -34,13 +34,6 @@ static volatile unsigned char __iomem *via; static DEFINE_SPINLOCK(cuda_lock); -#ifdef CONFIG_MAC -#define CUDA_IRQ IRQ_MAC_ADB -#define eieio() -#else -#define CUDA_IRQ vias->intrs[0].line -#endif - /* VIA registers - spaced 0x200 bytes apart */ #define RS 0x200 /* skip between registers */ #define B 0 /* B-side data */ @@ -189,11 +182,24 @@ int __init find_via_cuda(void) static int __init via_cuda_start(void) { + unsigned int irq; + if (via == NULL) return -ENODEV; - if (request_irq(CUDA_IRQ, cuda_interrupt, 0, "ADB", cuda_interrupt)) { - printk(KERN_ERR "cuda_init: can't get irq %d\n", CUDA_IRQ); +#ifdef CONFIG_MAC + irq = IRQ_MAC_ADB; +#else /* CONFIG_MAC */ + irq = irq_of_parse_and_map(vias, 0); + if (irq == NO_IRQ) { + printk(KERN_ERR "via-cuda: can't map interrupts for %s\n", + vias->full_name); + return -ENODEV; + } +#endif /* CONFIG_MAP */ + + if (request_irq(irq, cuda_interrupt, 0, "ADB", cuda_interrupt)) { + printk(KERN_ERR "via-cuda: can't request irq %d\n", irq); return -EAGAIN; } diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index c1193d34ec9e..06ca80bfd6b9 100644 --- a/drivers/macintosh/via-pmu.c +++ b/drivers/macintosh/via-pmu.c @@ -64,10 +64,6 @@ #include <asm/backlight.h> #endif -#ifdef CONFIG_PPC32 -#include <asm/open_pic.h> -#endif - #include "via-pmu-event.h" /* Some compile options */ @@ -151,7 +147,7 @@ static int pmu_fully_inited = 0; static int pmu_has_adb; static struct device_node *gpio_node; static unsigned char __iomem *gpio_reg = NULL; -static int gpio_irq = -1; +static int gpio_irq = NO_IRQ; static int gpio_irq_enabled = -1; static volatile int pmu_suspended = 0; static spinlock_t pmu_lock; @@ -403,22 +399,21 @@ static int __init pmu_init(void) */ static int __init via_pmu_start(void) { + unsigned int irq; + if (vias == NULL) return -ENODEV; batt_req.complete = 1; -#ifndef CONFIG_PPC_MERGE - if (pmu_kind == PMU_KEYLARGO_BASED) - openpic_set_irq_priority(vias->intrs[0].line, - OPENPIC_PRIORITY_DEFAULT + 1); -#endif - - if (request_irq(vias->intrs[0].line, via_pmu_interrupt, 0, "VIA-PMU", - (void *)0)) { - printk(KERN_ERR "VIA-PMU: can't get irq %d\n", - vias->intrs[0].line); - return -EAGAIN; + irq = irq_of_parse_and_map(vias, 0); + if (irq == NO_IRQ) { + printk(KERN_ERR "via-pmu: can't map interruptn"); + return -ENODEV; + } + if (request_irq(irq, via_pmu_interrupt, 0, "VIA-PMU", (void *)0)) { + printk(KERN_ERR "via-pmu: can't request irq %d\n", irq); + return -ENODEV; } if (pmu_kind == PMU_KEYLARGO_BASED) { @@ -426,10 +421,10 @@ static int __init via_pmu_start(void) if (gpio_node == NULL) gpio_node = of_find_node_by_name(NULL, "pmu-interrupt"); - if (gpio_node && gpio_node->n_intrs > 0) - gpio_irq = gpio_node->intrs[0].line; + if (gpio_node) + gpio_irq = irq_of_parse_and_map(gpio_node, 0); - if (gpio_irq != -1) { + if (gpio_irq != NO_IRQ) { if (request_irq(gpio_irq, gpio1_interrupt, 0, "GPIO1 ADB", (void *)0)) printk(KERN_ERR "pmu: can't get irq %d" diff --git a/drivers/md/md.c b/drivers/md/md.c index 2fe32c261922..e4e161372a3e 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1404,7 +1404,7 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev) struct block_device *bdev; char b[BDEVNAME_SIZE]; - bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); + bdev = open_partition_by_devnum(dev, FMODE_READ|FMODE_WRITE); if (IS_ERR(bdev)) { printk(KERN_ERR "md: could not open %s.\n", __bdevname(dev, b)); @@ -1414,7 +1414,7 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev) if (err) { printk(KERN_ERR "md: could not bd_claim %s.\n", bdevname(bdev, b)); - blkdev_put(bdev); + blkdev_put_partition(bdev); return err; } rdev->bdev = bdev; @@ -1428,7 +1428,7 @@ static void unlock_rdev(mdk_rdev_t *rdev) if (!bdev) MD_BUG(); bd_release(bdev); - blkdev_put(bdev); + blkdev_put_partition(bdev); } void md_autodetect_dev(dev_t dev); diff --git a/drivers/mmc/mmc.c b/drivers/mmc/mmc.c index 247ff2f23ac9..33525bdf2ab6 100644 --- a/drivers/mmc/mmc.c +++ b/drivers/mmc/mmc.c @@ -128,7 +128,7 @@ static void mmc_wait_done(struct mmc_request *mrq) int mmc_wait_for_req(struct mmc_host *host, struct mmc_request *mrq) { - DECLARE_COMPLETION(complete); + DECLARE_COMPLETION_ONSTACK(complete); mrq->done_data = &complete; mrq->done = mmc_wait_done; diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c index 8ab03b4a885e..2819de79442c 100644 --- a/drivers/net/3c59x.c +++ b/drivers/net/3c59x.c @@ -1897,7 +1897,7 @@ vortex_timer(unsigned long data) printk(KERN_DEBUG "dev->watchdog_timeo=%d\n", dev->watchdog_timeo); } - disable_irq(dev->irq); + disable_irq_lockdep(dev->irq); old_window = ioread16(ioaddr + EL3_CMD) >> 13; EL3WINDOW(4); media_status = ioread16(ioaddr + Wn4_Media); @@ -1978,7 +1978,7 @@ leave_media_alone: dev->name, media_tbl[dev->if_port].name); EL3WINDOW(old_window); - enable_irq(dev->irq); + enable_irq_lockdep(dev->irq); mod_timer(&vp->timer, RUN_AT(next_tick)); if (vp->deferred) iowrite16(FakeIntr, ioaddr + EL3_CMD); diff --git a/drivers/net/8390.c b/drivers/net/8390.c index 86be96af9c8f..d2935ae39814 100644 --- a/drivers/net/8390.c +++ b/drivers/net/8390.c @@ -249,7 +249,7 @@ void ei_tx_timeout(struct net_device *dev) /* Ugly but a reset can be slow, yet must be protected */ - disable_irq_nosync(dev->irq); + disable_irq_nosync_lockdep(dev->irq); spin_lock(&ei_local->page_lock); /* Try to restart the card. Perhaps the user has fixed something. */ @@ -257,7 +257,7 @@ void ei_tx_timeout(struct net_device *dev) NS8390_init(dev, 1); spin_unlock(&ei_local->page_lock); - enable_irq(dev->irq); + enable_irq_lockdep(dev->irq); netif_wake_queue(dev); } diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index 3c90003f4230..037d870712ff 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -2735,21 +2735,21 @@ static void nv_do_nic_poll(unsigned long data) if (!using_multi_irqs(dev)) { if (np->msi_flags & NV_MSI_X_ENABLED) - disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector); + disable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector); else - disable_irq(dev->irq); + disable_irq_lockdep(dev->irq); mask = np->irqmask; } else { if (np->nic_poll_irq & NVREG_IRQ_RX_ALL) { - disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector); + disable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector); mask |= NVREG_IRQ_RX_ALL; } if (np->nic_poll_irq & NVREG_IRQ_TX_ALL) { - disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector); + disable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector); mask |= NVREG_IRQ_TX_ALL; } if (np->nic_poll_irq & NVREG_IRQ_OTHER) { - disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector); + disable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector); mask |= NVREG_IRQ_OTHER; } } @@ -2761,23 +2761,23 @@ static void nv_do_nic_poll(unsigned long data) pci_push(base); if (!using_multi_irqs(dev)) { - nv_nic_irq((int) 0, (void *) data, (struct pt_regs *) NULL); + nv_nic_irq(0, dev, NULL); if (np->msi_flags & NV_MSI_X_ENABLED) - enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector); + enable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector); else - enable_irq(dev->irq); + enable_irq_lockdep(dev->irq); } else { if (np->nic_poll_irq & NVREG_IRQ_RX_ALL) { - nv_nic_irq_rx((int) 0, (void *) data, (struct pt_regs *) NULL); - enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector); + nv_nic_irq_rx(0, dev, NULL); + enable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector); } if (np->nic_poll_irq & NVREG_IRQ_TX_ALL) { - nv_nic_irq_tx((int) 0, (void *) data, (struct pt_regs *) NULL); - enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector); + nv_nic_irq_tx(0, dev, NULL); + enable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector); } if (np->nic_poll_irq & NVREG_IRQ_OTHER) { - nv_nic_irq_other((int) 0, (void *) data, (struct pt_regs *) NULL); - enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector); + nv_nic_irq_other(0, dev, NULL); + enable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector); } } } diff --git a/drivers/net/mace.c b/drivers/net/mace.c index f2c0bf89f0c7..29e4b5aa6ead 100644 --- a/drivers/net/mace.c +++ b/drivers/net/mace.c @@ -242,12 +242,12 @@ static int __devinit mace_probe(struct macio_dev *mdev, const struct of_device_i } rc = request_irq(mp->tx_dma_intr, mace_txdma_intr, 0, "MACE-txdma", dev); if (rc) { - printk(KERN_ERR "MACE: can't get irq %d\n", mace->intrs[1].line); + printk(KERN_ERR "MACE: can't get irq %d\n", mp->tx_dma_intr); goto err_free_irq; } rc = request_irq(mp->rx_dma_intr, mace_rxdma_intr, 0, "MACE-rxdma", dev); if (rc) { - printk(KERN_ERR "MACE: can't get irq %d\n", mace->intrs[2].line); + printk(KERN_ERR "MACE: can't get irq %d\n", mp->rx_dma_intr); goto err_free_tx_irq; } diff --git a/drivers/net/wireless/hostap/hostap_hw.c b/drivers/net/wireless/hostap/hostap_hw.c index b764cfda6e84..dafaa5ff5aa6 100644 --- a/drivers/net/wireless/hostap/hostap_hw.c +++ b/drivers/net/wireless/hostap/hostap_hw.c @@ -3095,6 +3095,14 @@ static void prism2_clear_set_tim_queue(local_info_t *local) } +/* + * HostAP uses two layers of net devices, where the inner + * layer gets called all the time from the outer layer. + * This is a natural nesting, which needs a split lock type. + */ +static struct lock_class_key hostap_netdev_xmit_lock_key; + + static struct net_device * prism2_init_local_data(struct prism2_helper_functions *funcs, int card_idx, struct device *sdev) @@ -3259,6 +3267,8 @@ while (0) SET_NETDEV_DEV(dev, sdev); if (ret >= 0) ret = register_netdevice(dev); + + lockdep_set_class(&dev->_xmit_lock, &hostap_netdev_xmit_lock_key); rtnl_unlock(); if (ret < 0) { printk(KERN_WARNING "%s: register netdevice failed!\n", diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c index 3a4a644c2686..212268881857 100644 --- a/drivers/pnp/pnpacpi/rsparser.c +++ b/drivers/pnp/pnpacpi/rsparser.c @@ -74,7 +74,7 @@ static void decode_irq_flags(int flag, int *triggering, int *polarity) static void pnpacpi_parse_allocated_irqresource(struct pnp_resource_table *res, u32 gsi, - int triggering, int polarity) + int triggering, int polarity, int shareable) { int i = 0; int irq; @@ -95,6 +95,9 @@ pnpacpi_parse_allocated_irqresource(struct pnp_resource_table *res, u32 gsi, return; } + if (shareable) + res->irq_resource[i].flags |= IORESOURCE_IRQ_SHAREABLE; + res->irq_resource[i].start = irq; res->irq_resource[i].end = irq; pcibios_penalize_isa_irq(irq, 1); @@ -194,7 +197,8 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res, pnpacpi_parse_allocated_irqresource(res_table, res->data.irq.interrupts[i], res->data.irq.triggering, - res->data.irq.polarity); + res->data.irq.polarity, + res->data.irq.sharable); } break; @@ -255,7 +259,8 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res, pnpacpi_parse_allocated_irqresource(res_table, res->data.extended_irq.interrupts[i], res->data.extended_irq.triggering, - res->data.extended_irq.polarity); + res->data.extended_irq.polarity, + res->data.extended_irq.sharable); } break; diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c index 4138564402b8..985d1613baaa 100644 --- a/drivers/s390/char/sclp.c +++ b/drivers/s390/char/sclp.c @@ -383,6 +383,7 @@ void sclp_sync_wait(void) { unsigned long psw_mask; + unsigned long flags; unsigned long cr0, cr0_sync; u64 timeout; @@ -395,9 +396,11 @@ sclp_sync_wait(void) sclp_tod_from_jiffies(sclp_request_timer.expires - jiffies); } + local_irq_save(flags); /* Prevent bottom half from executing once we force interrupts open */ local_bh_disable(); /* Enable service-signal interruption, disable timer interrupts */ + trace_hardirqs_on(); __ctl_store(cr0, 0, 0); cr0_sync = cr0; cr0_sync |= 0x00000200; @@ -415,11 +418,10 @@ sclp_sync_wait(void) barrier(); cpu_relax(); } - /* Restore interrupt settings */ - asm volatile ("SSM 0(%0)" - : : "a" (&psw_mask) : "memory"); + local_irq_disable(); __ctl_load(cr0, 0, 0); - __local_bh_enable(); + _local_bh_enable(); + local_irq_restore(flags); } EXPORT_SYMBOL(sclp_sync_wait); diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index a3423267467f..6fec90eab00e 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -147,7 +147,7 @@ cio_tpi(void) sch->driver->irq(&sch->dev); spin_unlock(&sch->lock); irq_exit (); - __local_bh_enable(); + _local_bh_enable(); return 1; } diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c index 36733b9823c6..8e8963f15731 100644 --- a/drivers/s390/net/qeth_main.c +++ b/drivers/s390/net/qeth_main.c @@ -84,6 +84,8 @@ static debug_info_t *qeth_dbf_qerr = NULL; DEFINE_PER_CPU(char[256], qeth_dbf_txt_buf); +static struct lock_class_key qdio_out_skb_queue_key; + /** * some more definitions and declarations */ @@ -3229,6 +3231,9 @@ qeth_alloc_qdio_buffers(struct qeth_card *card) &card->qdio.out_qs[i]->qdio_bufs[j]; skb_queue_head_init(&card->qdio.out_qs[i]->bufs[j]. skb_list); + lockdep_set_class( + &card->qdio.out_qs[i]->bufs[j].skb_list.lock, + &qdio_out_skb_queue_key); INIT_LIST_HEAD(&card->qdio.out_qs[i]->bufs[j].ctx_list); } } @@ -5272,6 +5277,7 @@ qeth_free_vlan_buffer(struct qeth_card *card, struct qeth_qdio_out_buffer *buf, struct sk_buff_head tmp_list; skb_queue_head_init(&tmp_list); + lockdep_set_class(&tmp_list.lock, &qdio_out_skb_queue_key); for(i = 0; i < QETH_MAX_BUFFER_ELEMENTS(card); ++i){ while ((skb = skb_dequeue(&buf->skb_list))){ if (vlan_tx_tag_present(skb) && diff --git a/drivers/s390/s390mach.c b/drivers/s390/s390mach.c index 432136f96e64..ffb3677e354f 100644 --- a/drivers/s390/s390mach.c +++ b/drivers/s390/s390mach.c @@ -378,6 +378,8 @@ s390_do_machine_check(struct pt_regs *regs) struct mcck_struct *mcck; int umode; + lockdep_off(); + mci = (struct mci *) &S390_lowcore.mcck_interruption_code; mcck = &__get_cpu_var(cpu_mcck); umode = user_mode(regs); @@ -482,6 +484,7 @@ s390_do_machine_check(struct pt_regs *regs) mcck->warning = 1; set_thread_flag(TIF_MCCK_PENDING); } + lockdep_on(); } /* diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index 909731b99d26..8ec8da0beaa8 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -2168,9 +2168,9 @@ zfcp_erp_adapter_strategy_open_fsf_xconfig(struct zfcp_erp_action *erp_action) atomic_clear_mask(ZFCP_STATUS_ADAPTER_HOST_CON_INIT, &adapter->status); ZFCP_LOG_DEBUG("Doing exchange config data\n"); - write_lock(&adapter->erp_lock); + write_lock_irq(&adapter->erp_lock); zfcp_erp_action_to_running(erp_action); - write_unlock(&adapter->erp_lock); + write_unlock_irq(&adapter->erp_lock); zfcp_erp_timeout_init(erp_action); if (zfcp_fsf_exchange_config_data(erp_action)) { retval = ZFCP_ERP_FAILED; @@ -2236,9 +2236,9 @@ zfcp_erp_adapter_strategy_open_fsf_xport(struct zfcp_erp_action *erp_action) adapter = erp_action->adapter; atomic_clear_mask(ZFCP_STATUS_ADAPTER_XPORT_OK, &adapter->status); - write_lock(&adapter->erp_lock); + write_lock_irq(&adapter->erp_lock); zfcp_erp_action_to_running(erp_action); - write_unlock(&adapter->erp_lock); + write_unlock_irq(&adapter->erp_lock); zfcp_erp_timeout_init(erp_action); ret = zfcp_fsf_exchange_port_data(erp_action, adapter, NULL); diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c index 345a191926a4..49ea5add4abc 100644 --- a/drivers/s390/scsi/zfcp_qdio.c +++ b/drivers/s390/scsi/zfcp_qdio.c @@ -427,6 +427,7 @@ int zfcp_qdio_reqid_check(struct zfcp_adapter *adapter, void *sbale_addr) { struct zfcp_fsf_req *fsf_req; + unsigned long flags; /* invalid (per convention used in this driver) */ if (unlikely(!sbale_addr)) { @@ -438,15 +439,15 @@ zfcp_qdio_reqid_check(struct zfcp_adapter *adapter, void *sbale_addr) fsf_req = (struct zfcp_fsf_req *) sbale_addr; /* serialize with zfcp_fsf_req_dismiss_all */ - spin_lock(&adapter->fsf_req_list_lock); + spin_lock_irqsave(&adapter->fsf_req_list_lock, flags); if (list_empty(&adapter->fsf_req_list_head)) { - spin_unlock(&adapter->fsf_req_list_lock); + spin_unlock_irqrestore(&adapter->fsf_req_list_lock, flags); return 0; } list_del(&fsf_req->list); atomic_dec(&adapter->fsf_reqs_active); - spin_unlock(&adapter->fsf_req_list_lock); - + spin_unlock_irqrestore(&adapter->fsf_req_list_lock, flags); + if (unlikely(adapter != fsf_req->adapter)) { ZFCP_LOG_NORMAL("bug: invalid reqid (fsf_req=%p, " "fsf_req->adapter=%p, adapter=%p)\n", diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index 82caba464291..1c960ac1617f 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -1001,7 +1001,7 @@ unsigned ata_exec_internal(struct ata_device *dev, struct ata_queued_cmd *qc; unsigned int tag, preempted_tag; u32 preempted_sactive, preempted_qc_active; - DECLARE_COMPLETION(wait); + DECLARE_COMPLETION_ONSTACK(wait); unsigned long flags; unsigned int err_mask; int rc; diff --git a/drivers/serial/8250_pnp.c b/drivers/serial/8250_pnp.c index 739bc84f91e9..632f62d6ec7e 100644 --- a/drivers/serial/8250_pnp.c +++ b/drivers/serial/8250_pnp.c @@ -431,6 +431,8 @@ serial_pnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id) #endif port.flags |= UPF_SKIP_TEST | UPF_BOOT_AUTOCONF; + if (pnp_irq_flags(dev, 0) & IORESOURCE_IRQ_SHAREABLE) + port.flags |= UPF_SHARE_IRQ; port.uartclk = 1843200; port.dev = &dev->dev; diff --git a/drivers/serial/pmac_zilog.c b/drivers/serial/pmac_zilog.c index 459c0231aef3..bfd2a22759eb 100644 --- a/drivers/serial/pmac_zilog.c +++ b/drivers/serial/pmac_zilog.c @@ -1443,8 +1443,8 @@ static int __init pmz_init_port(struct uart_pmac_port *uap) uap->flags &= ~PMACZILOG_FLAG_HAS_DMA; goto no_dma; } - uap->tx_dma_irq = np->intrs[1].line; - uap->rx_dma_irq = np->intrs[2].line; + uap->tx_dma_irq = irq_of_parse_and_map(np, 1); + uap->rx_dma_irq = irq_of_parse_and_map(np, 2); } no_dma: @@ -1491,7 +1491,7 @@ no_dma: * Init remaining bits of "port" structure */ uap->port.iotype = UPIO_MEM; - uap->port.irq = np->intrs[0].line; + uap->port.irq = irq_of_parse_and_map(np, 0); uap->port.uartclk = ZS_CLOCK; uap->port.fifosize = 1; uap->port.ops = &pmz_pops; diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c index c54af8774393..95831808334c 100644 --- a/drivers/serial/serial_core.c +++ b/drivers/serial/serial_core.c @@ -49,6 +49,12 @@ */ static DEFINE_MUTEX(port_mutex); +/* + * lockdep: port->lock is initialized in two places, but we + * want only one lock-class: + */ +static struct lock_class_key port_lock_key; + #define HIGH_BITS_OFFSET ((sizeof(long)-sizeof(int))*8) #define uart_users(state) ((state)->count + ((state)->info ? (state)->info->blocked_open : 0)) @@ -1865,6 +1871,7 @@ uart_set_options(struct uart_port *port, struct console *co, * early. */ spin_lock_init(&port->lock); + lockdep_set_class(&port->lock, &port_lock_key); memset(&termios, 0, sizeof(struct termios)); @@ -2247,8 +2254,10 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *port) * If this port is a console, then the spinlock is already * initialised. */ - if (!(uart_console(port) && (port->cons->flags & CON_ENABLED))) + if (!(uart_console(port) && (port->cons->flags & CON_ENABLED))) { spin_lock_init(&port->lock); + lockdep_set_class(&port->lock, &port_lock_key); + } uart_configure_port(drv, state, port); diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index ed1cdf6ac8f3..146298ad7371 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -510,7 +510,7 @@ static void spi_complete(void *arg) */ int spi_sync(struct spi_device *spi, struct spi_message *message) { - DECLARE_COMPLETION(done); + DECLARE_COMPLETION_ONSTACK(done); int status; message->complete = spi_complete; diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index e47e3a8ed6e4..f48c3dbc367a 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -200,7 +200,7 @@ static void update_sb(struct super_block *sb) if (!root) return; - mutex_lock(&root->d_inode->i_mutex); + mutex_lock_nested(&root->d_inode->i_mutex, I_MUTEX_PARENT); list_for_each_entry(bus, &root->d_subdirs, d_u.d_child) { if (bus->d_inode) { @@ -527,7 +527,7 @@ static void fs_remove_file (struct dentry *dentry) if (!parent || !parent->d_inode) return; - mutex_lock(&parent->d_inode->i_mutex); + mutex_lock_nested(&parent->d_inode->i_mutex, I_MUTEX_PARENT); if (usbfs_positive(dentry)) { if (dentry->d_inode) { if (S_ISDIR(dentry->d_inode->i_mode)) diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig index 17de4c84db69..3badb48d662b 100644 --- a/drivers/video/Kconfig +++ b/drivers/video/Kconfig @@ -1557,6 +1557,21 @@ config FB_S3C2410_DEBUG Turn on debugging messages. Note that you can set/unset at run time through sysfs +config FB_PNX4008_DUM + tristate "Display Update Module support on Philips PNX4008 board" + depends on FB && ARCH_PNX4008 + ---help--- + Say Y here to enable support for PNX4008 Display Update Module (DUM) + +config FB_PNX4008_DUM_RGB + tristate "RGB Framebuffer support on Philips PNX4008 board" + depends on FB_PNX4008_DUM + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + ---help--- + Say Y here to enable support for PNX4008 RGB Framebuffer + config FB_VIRTUAL tristate "Virtual Frame Buffer support (ONLY FOR TESTING!)" depends on FB diff --git a/drivers/video/Makefile b/drivers/video/Makefile index c335e9bc3b20..6283d015f8f5 100644 --- a/drivers/video/Makefile +++ b/drivers/video/Makefile @@ -94,6 +94,8 @@ obj-$(CONFIG_FB_TX3912) += tx3912fb.o obj-$(CONFIG_FB_S1D13XXX) += s1d13xxxfb.o obj-$(CONFIG_FB_IMX) += imxfb.o obj-$(CONFIG_FB_S3C2410) += s3c2410fb.o +obj-$(CONFIG_FB_PNX4008_DUM) += pnx4008/ +obj-$(CONFIG_FB_PNX4008_DUM_RGB) += pnx4008/ # Platform or fallback drivers go here obj-$(CONFIG_FB_VESA) += vesafb.o diff --git a/drivers/video/offb.c b/drivers/video/offb.c index bfeb11bd4712..71ce1fa45cf4 100644 --- a/drivers/video/offb.c +++ b/drivers/video/offb.c @@ -97,14 +97,43 @@ static int offb_setcolreg(u_int regno, u_int red, u_int green, u_int blue, u_int transp, struct fb_info *info) { struct offb_par *par = (struct offb_par *) info->par; + int i, depth; + u32 *pal = info->pseudo_palette; - if (!par->cmap_adr || regno > 255) + depth = info->var.bits_per_pixel; + if (depth == 16) + depth = (info->var.green.length == 5) ? 15 : 16; + + if (regno > 255 || + (depth == 16 && regno > 63) || + (depth == 15 && regno > 31)) return 1; + if (regno < 16) { + switch (depth) { + case 15: + pal[regno] = (regno << 10) | (regno << 5) | regno; + break; + case 16: + pal[regno] = (regno << 11) | (regno << 5) | regno; + break; + case 24: + pal[regno] = (regno << 16) | (regno << 8) | regno; + break; + case 32: + i = (regno << 8) | regno; + pal[regno] = (i << 16) | i; + break; + } + } + red >>= 8; green >>= 8; blue >>= 8; + if (!par->cmap_adr) + return 0; + switch (par->cmap_type) { case cmap_m64: writeb(regno, par->cmap_adr); @@ -141,20 +170,6 @@ static int offb_setcolreg(u_int regno, u_int red, u_int green, u_int blue, break; } - if (regno < 16) - switch (info->var.bits_per_pixel) { - case 16: - ((u16 *) (info->pseudo_palette))[regno] = - (regno << 10) | (regno << 5) | regno; - break; - case 32: - { - int i = (regno << 8) | regno; - ((u32 *) (info->pseudo_palette))[regno] = - (i << 16) | i; - break; - } - } return 0; } @@ -223,81 +238,9 @@ int __init offb_init(void) { struct device_node *dp = NULL, *boot_disp = NULL; -#if defined(CONFIG_BOOTX_TEXT) && defined(CONFIG_PPC32) - struct device_node *macos_display = NULL; -#endif if (fb_get_options("offb", NULL)) return -ENODEV; -#if defined(CONFIG_BOOTX_TEXT) && defined(CONFIG_PPC32) - /* If we're booted from BootX... */ - if (boot_infos != 0) { - unsigned long addr = - (unsigned long) boot_infos->dispDeviceBase; - u32 *addrp; - u64 daddr, dsize; - unsigned int flags; - - /* find the device node corresponding to the macos display */ - while ((dp = of_find_node_by_type(dp, "display"))) { - int i; - - /* - * Look for an AAPL,address property first. - */ - unsigned int na; - unsigned int *ap = - (unsigned int *)get_property(dp, "AAPL,address", - &na); - if (ap != 0) { - for (na /= sizeof(unsigned int); na > 0; - --na, ++ap) - if (*ap <= addr && - addr < *ap + 0x1000000) { - macos_display = dp; - goto foundit; - } - } - - /* - * See if the display address is in one of the address - * ranges for this display. - */ - i = 0; - for (;;) { - addrp = of_get_address(dp, i++, &dsize, &flags); - if (addrp == NULL) - break; - if (!(flags & IORESOURCE_MEM)) - continue; - daddr = of_translate_address(dp, addrp); - if (daddr == OF_BAD_ADDR) - continue; - if (daddr <= addr && addr < (daddr + dsize)) { - macos_display = dp; - goto foundit; - } - } - foundit: - if (macos_display) { - printk(KERN_INFO "MacOS display is %s\n", - dp->full_name); - break; - } - } - - /* initialize it */ - offb_init_fb(macos_display ? macos_display-> - name : "MacOS display", - macos_display ? macos_display-> - full_name : "MacOS display", - boot_infos->dispDeviceRect[2], - boot_infos->dispDeviceRect[3], - boot_infos->dispDeviceDepth, - boot_infos->dispDeviceRowBytes, addr, NULL); - } -#endif /* defined(CONFIG_BOOTX_TEXT) && defined(CONFIG_PPC32) */ - for (dp = NULL; (dp = of_find_node_by_type(dp, "display"));) { if (get_property(dp, "linux,opened", NULL) && get_property(dp, "linux,boot-display", NULL)) { @@ -317,94 +260,93 @@ int __init offb_init(void) static void __init offb_init_nodriver(struct device_node *dp) { - int *pp, i; unsigned int len; - int width = 640, height = 480, depth = 8, pitch; - unsigned int flags, rsize, *up; - u64 address = OF_BAD_ADDR; - u32 *addrp; + int i, width = 640, height = 480, depth = 8, pitch = 640; + unsigned int flags, rsize, addr_prop = 0; + unsigned long max_size = 0; + u64 rstart, address = OF_BAD_ADDR; + u32 *pp, *addrp, *up; u64 asize; - if ((pp = (int *) get_property(dp, "depth", &len)) != NULL - && len == sizeof(int)) + pp = (u32 *)get_property(dp, "linux,bootx-depth", &len); + if (pp == NULL) + pp = (u32 *)get_property(dp, "depth", &len); + if (pp && len == sizeof(u32)) depth = *pp; - if ((pp = (int *) get_property(dp, "width", &len)) != NULL - && len == sizeof(int)) + + pp = (u32 *)get_property(dp, "linux,bootx-width", &len); + if (pp == NULL) + pp = (u32 *)get_property(dp, "width", &len); + if (pp && len == sizeof(u32)) width = *pp; - if ((pp = (int *) get_property(dp, "height", &len)) != NULL - && len == sizeof(int)) + + pp = (u32 *)get_property(dp, "linux,bootx-height", &len); + if (pp == NULL) + pp = (u32 *)get_property(dp, "height", &len); + if (pp && len == sizeof(u32)) height = *pp; - if ((pp = (int *) get_property(dp, "linebytes", &len)) != NULL - && len == sizeof(int)) { + + pp = (u32 *)get_property(dp, "linux,bootx-linebytes", &len); + if (pp == NULL) + pp = (u32 *)get_property(dp, "linebytes", &len); + if (pp && len == sizeof(u32)) pitch = *pp; - if (pitch == 1) - pitch = 0x1000; - } else - pitch = width; - - rsize = (unsigned long)pitch * (unsigned long)height * - (unsigned long)(depth / 8); - - /* Try to match device to a PCI device in order to get a properly - * translated address rather then trying to decode the open firmware - * stuff in various incorrect ways - */ -#ifdef CONFIG_PCI - /* First try to locate the PCI device if any */ - { - struct pci_dev *pdev = NULL; - - for_each_pci_dev(pdev) { - if (dp == pci_device_to_OF_node(pdev)) - break; - } - if (pdev) { - for (i = 0; i < 6 && address == OF_BAD_ADDR; i++) { - if ((pci_resource_flags(pdev, i) & - IORESOURCE_MEM) && - (pci_resource_len(pdev, i) >= rsize)) - address = pci_resource_start(pdev, i); - } - pci_dev_put(pdev); - } - } -#endif /* CONFIG_PCI */ - - /* This one is dodgy, we may drop it ... */ - if (address == OF_BAD_ADDR && - (up = (unsigned *) get_property(dp, "address", &len)) != NULL && - len == sizeof(unsigned int)) - address = (u64) * up; - - if (address == OF_BAD_ADDR) { - for (i = 0; (addrp = of_get_address(dp, i, &asize, &flags)) - != NULL; i++) { - if (!(flags & IORESOURCE_MEM)) - continue; - if (asize >= pitch * height * depth / 8) - break; - } - if (addrp == NULL) { - printk(KERN_ERR - "no framebuffer address found for %s\n", - dp->full_name); - return; - } - address = of_translate_address(dp, addrp); - if (address == OF_BAD_ADDR) { - printk(KERN_ERR - "can't translate framebuffer address for %s\n", - dp->full_name); - return; + else + pitch = width * ((depth + 7) / 8); + + rsize = (unsigned long)pitch * (unsigned long)height; + + /* Ok, now we try to figure out the address of the framebuffer. + * + * Unfortunately, Open Firmware doesn't provide a standard way to do + * so. All we can do is a dodgy heuristic that happens to work in + * practice. On most machines, the "address" property contains what + * we need, though not on Matrox cards found in IBM machines. What I've + * found that appears to give good results is to go through the PCI + * ranges and pick one that is both big enough and if possible encloses + * the "address" property. If none match, we pick the biggest + */ + up = (u32 *)get_property(dp, "linux,bootx-addr", &len); + if (up == NULL) + up = (u32 *)get_property(dp, "address", &len); + if (up && len == sizeof(u32)) + addr_prop = *up; + + for (i = 0; (addrp = of_get_address(dp, i, &asize, &flags)) + != NULL; i++) { + int match_addrp = 0; + + if (!(flags & IORESOURCE_MEM)) + continue; + if (asize < rsize) + continue; + rstart = of_translate_address(dp, addrp); + if (rstart == OF_BAD_ADDR) + continue; + if (addr_prop && (rstart <= addr_prop) && + ((rstart + asize) >= (addr_prop + rsize))) + match_addrp = 1; + if (match_addrp) { + address = addr_prop; + break; } + if (rsize > max_size) { + max_size = rsize; + address = OF_BAD_ADDR; + } + if (address == OF_BAD_ADDR) + address = rstart; + } + if (address == OF_BAD_ADDR && addr_prop) + address = (u64)addr_prop; + if (address != OF_BAD_ADDR) { /* kludge for valkyrie */ if (strcmp(dp->name, "valkyrie") == 0) address += 0x1000; + offb_init_fb(dp->name, dp->full_name, width, height, depth, + pitch, address, dp); } - offb_init_fb(dp->name, dp->full_name, width, height, depth, - pitch, address, dp); - } static void __init offb_init_fb(const char *name, const char *full_name, @@ -412,7 +354,7 @@ static void __init offb_init_fb(const char *name, const char *full_name, int pitch, unsigned long address, struct device_node *dp) { - unsigned long res_size = pitch * height * depth / 8; + unsigned long res_size = pitch * height * (depth + 7) / 8; struct offb_par *par = &default_par; unsigned long res_start = address; struct fb_fix_screeninfo *fix; @@ -426,7 +368,7 @@ static void __init offb_init_fb(const char *name, const char *full_name, printk(KERN_INFO "Using unsupported %dx%d %s at %lx, depth=%d, pitch=%d\n", width, height, name, address, depth, pitch); - if (depth != 8 && depth != 16 && depth != 32) { + if (depth != 8 && depth != 15 && depth != 16 && depth != 32) { printk(KERN_ERR "%s: can't use depth = %d\n", full_name, depth); release_mem_region(res_start, res_size); @@ -502,7 +444,6 @@ static void __init offb_init_fb(const char *name, const char *full_name, : */ FB_VISUAL_TRUECOLOR; var->xoffset = var->yoffset = 0; - var->bits_per_pixel = depth; switch (depth) { case 8: var->bits_per_pixel = 8; @@ -515,7 +456,7 @@ static void __init offb_init_fb(const char *name, const char *full_name, var->transp.offset = 0; var->transp.length = 0; break; - case 16: /* RGB 555 */ + case 15: /* RGB 555 */ var->bits_per_pixel = 16; var->red.offset = 10; var->red.length = 5; @@ -526,6 +467,17 @@ static void __init offb_init_fb(const char *name, const char *full_name, var->transp.offset = 0; var->transp.length = 0; break; + case 16: /* RGB 565 */ + var->bits_per_pixel = 16; + var->red.offset = 11; + var->red.length = 5; + var->green.offset = 5; + var->green.length = 6; + var->blue.offset = 0; + var->blue.length = 5; + var->transp.offset = 0; + var->transp.length = 0; + break; case 32: /* RGB 888 */ var->bits_per_pixel = 32; var->red.offset = 16; diff --git a/drivers/video/pnx4008/Makefile b/drivers/video/pnx4008/Makefile new file mode 100644 index 000000000000..636aaccf01fd --- /dev/null +++ b/drivers/video/pnx4008/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the new PNX4008 framebuffer device driver +# + +obj-$(CONFIG_FB_PNX4008_DUM) += sdum.o +obj-$(CONFIG_FB_PNX4008_DUM_RGB) += pnxrgbfb.o + diff --git a/drivers/video/pnx4008/dum.h b/drivers/video/pnx4008/dum.h new file mode 100644 index 000000000000..d80a614d89ed --- /dev/null +++ b/drivers/video/pnx4008/dum.h @@ -0,0 +1,211 @@ +/* + * linux/drivers/video/pnx4008/dum.h + * + * Internal header for SDUM + * + * 2005 (c) Koninklijke Philips N.V. This file is licensed under + * the terms of the GNU General Public License version 2. This program + * is licensed "as is" without any warranty of any kind, whether express + * or implied. + */ + +#ifndef __PNX008_DUM_H__ +#define __PNX008_DUM_H__ + +#include <asm/arch/platform.h> + +#define PNX4008_DUMCONF_VA_BASE IO_ADDRESS(PNX4008_DUMCONF_BASE) +#define PNX4008_DUM_MAIN_VA_BASE IO_ADDRESS(PNX4008_DUM_MAINCFG_BASE) + +/* DUM CFG ADDRESSES */ +#define DUM_CH_BASE_ADR (PNX4008_DUMCONF_VA_BASE + 0x00) +#define DUM_CH_MIN_ADR (PNX4008_DUMCONF_VA_BASE + 0x00) +#define DUM_CH_MAX_ADR (PNX4008_DUMCONF_VA_BASE + 0x04) +#define DUM_CH_CONF_ADR (PNX4008_DUMCONF_VA_BASE + 0x08) +#define DUM_CH_STAT_ADR (PNX4008_DUMCONF_VA_BASE + 0x0C) +#define DUM_CH_CTRL_ADR (PNX4008_DUMCONF_VA_BASE + 0x10) + +#define CH_MARG (0x100 / sizeof(u32)) +#define DUM_CH_MIN(i) (*((volatile u32 *)DUM_CH_MIN_ADR + (i) * CH_MARG)) +#define DUM_CH_MAX(i) (*((volatile u32 *)DUM_CH_MAX_ADR + (i) * CH_MARG)) +#define DUM_CH_CONF(i) (*((volatile u32 *)DUM_CH_CONF_ADR + (i) * CH_MARG)) +#define DUM_CH_STAT(i) (*((volatile u32 *)DUM_CH_STAT_ADR + (i) * CH_MARG)) +#define DUM_CH_CTRL(i) (*((volatile u32 *)DUM_CH_CTRL_ADR + (i) * CH_MARG)) + +#define DUM_CONF_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x00) +#define DUM_CTRL_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x04) +#define DUM_STAT_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x08) +#define DUM_DECODE_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x0C) +#define DUM_COM_BASE_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x10) +#define DUM_SYNC_C_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x14) +#define DUM_CLK_DIV_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x18) +#define DUM_DIRTY_LOW_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x20) +#define DUM_DIRTY_HIGH_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x24) +#define DUM_FORMAT_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x28) +#define DUM_WTCFG1_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x30) +#define DUM_RTCFG1_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x34) +#define DUM_WTCFG2_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x38) +#define DUM_RTCFG2_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x3C) +#define DUM_TCFG_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x40) +#define DUM_OUTP_FORMAT1_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x44) +#define DUM_OUTP_FORMAT2_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x48) +#define DUM_SYNC_MODE_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x4C) +#define DUM_SYNC_OUT_C_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x50) + +#define DUM_CONF (*(volatile u32 *)(DUM_CONF_ADR)) +#define DUM_CTRL (*(volatile u32 *)(DUM_CTRL_ADR)) +#define DUM_STAT (*(volatile u32 *)(DUM_STAT_ADR)) +#define DUM_DECODE (*(volatile u32 *)(DUM_DECODE_ADR)) +#define DUM_COM_BASE (*(volatile u32 *)(DUM_COM_BASE_ADR)) +#define DUM_SYNC_C (*(volatile u32 *)(DUM_SYNC_C_ADR)) +#define DUM_CLK_DIV (*(volatile u32 *)(DUM_CLK_DIV_ADR)) +#define DUM_DIRTY_LOW (*(volatile u32 *)(DUM_DIRTY_LOW_ADR)) +#define DUM_DIRTY_HIGH (*(volatile u32 *)(DUM_DIRTY_HIGH_ADR)) +#define DUM_FORMAT (*(volatile u32 *)(DUM_FORMAT_ADR)) +#define DUM_WTCFG1 (*(volatile u32 *)(DUM_WTCFG1_ADR)) +#define DUM_RTCFG1 (*(volatile u32 *)(DUM_RTCFG1_ADR)) +#define DUM_WTCFG2 (*(volatile u32 *)(DUM_WTCFG2_ADR)) +#define DUM_RTCFG2 (*(volatile u32 *)(DUM_RTCFG2_ADR)) +#define DUM_TCFG (*(volatile u32 *)(DUM_TCFG_ADR)) +#define DUM_OUTP_FORMAT1 (*(volatile u32 *)(DUM_OUTP_FORMAT1_ADR)) +#define DUM_OUTP_FORMAT2 (*(volatile u32 *)(DUM_OUTP_FORMAT2_ADR)) +#define DUM_SYNC_MODE (*(volatile u32 *)(DUM_SYNC_MODE_ADR)) +#define DUM_SYNC_OUT_C (*(volatile u32 *)(DUM_SYNC_OUT_C_ADR)) + +/* DUM SLAVE ADDRESSES */ +#define DUM_SLAVE_WRITE_ADR (PNX4008_DUM_MAINCFG_BASE + 0x0000000) +#define DUM_SLAVE_READ1_I_ADR (PNX4008_DUM_MAINCFG_BASE + 0x1000000) +#define DUM_SLAVE_READ1_R_ADR (PNX4008_DUM_MAINCFG_BASE + 0x1000004) +#define DUM_SLAVE_READ2_I_ADR (PNX4008_DUM_MAINCFG_BASE + 0x1000008) +#define DUM_SLAVE_READ2_R_ADR (PNX4008_DUM_MAINCFG_BASE + 0x100000C) + +#define DUM_SLAVE_WRITE_W ((volatile u32 *)(DUM_SLAVE_WRITE_ADR)) +#define DUM_SLAVE_WRITE_HW ((volatile u16 *)(DUM_SLAVE_WRITE_ADR)) +#define DUM_SLAVE_READ1_I ((volatile u8 *)(DUM_SLAVE_READ1_I_ADR)) +#define DUM_SLAVE_READ1_R ((volatile u16 *)(DUM_SLAVE_READ1_R_ADR)) +#define DUM_SLAVE_READ2_I ((volatile u8 *)(DUM_SLAVE_READ2_I_ADR)) +#define DUM_SLAVE_READ2_R ((volatile u16 *)(DUM_SLAVE_READ2_R_ADR)) + +/* Sony display register addresses */ +#define DISP_0_REG (0x00) +#define DISP_1_REG (0x01) +#define DISP_CAL_REG (0x20) +#define DISP_ID_REG (0x2A) +#define DISP_XMIN_L_REG (0x30) +#define DISP_XMIN_H_REG (0x31) +#define DISP_YMIN_REG (0x32) +#define DISP_XMAX_L_REG (0x34) +#define DISP_XMAX_H_REG (0x35) +#define DISP_YMAX_REG (0x36) +#define DISP_SYNC_EN_REG (0x38) +#define DISP_SYNC_RISE_L_REG (0x3C) +#define DISP_SYNC_RISE_H_REG (0x3D) +#define DISP_SYNC_FALL_L_REG (0x3E) +#define DISP_SYNC_FALL_H_REG (0x3F) +#define DISP_PIXEL_REG (0x0B) +#define DISP_DUMMY1_REG (0x28) +#define DISP_DUMMY2_REG (0x29) +#define DISP_TIMING_REG (0x98) +#define DISP_DUMP_REG (0x99) + +/* Sony display constants */ +#define SONY_ID1 (0x22) +#define SONY_ID2 (0x23) + +/* Philips display register addresses */ +#define PH_DISP_ORIENT_REG (0x003) +#define PH_DISP_YPOINT_REG (0x200) +#define PH_DISP_XPOINT_REG (0x201) +#define PH_DISP_PIXEL_REG (0x202) +#define PH_DISP_YMIN_REG (0x406) +#define PH_DISP_YMAX_REG (0x407) +#define PH_DISP_XMIN_REG (0x408) +#define PH_DISP_XMAX_REG (0x409) + +/* Misc constants */ +#define NO_VALID_DISPLAY_FOUND (0) +#define DISPLAY2_IS_NOT_CONNECTED (0) + +/* register values */ +#define V_BAC_ENABLE (BIT(0)) +#define V_BAC_DISABLE_IDLE (BIT(1)) +#define V_BAC_DISABLE_TRIG (BIT(2)) +#define V_DUM_RESET (BIT(3)) +#define V_MUX_RESET (BIT(4)) +#define BAC_ENABLED (BIT(0)) +#define BAC_DISABLED 0 + +/* Sony LCD commands */ +#define V_LCD_STANDBY_OFF ((BIT(25)) | (0 << 16) | DISP_0_REG) +#define V_LCD_USE_9BIT_BUS ((BIT(25)) | (2 << 16) | DISP_1_REG) +#define V_LCD_SYNC_RISE_L ((BIT(25)) | (0 << 16) | DISP_SYNC_RISE_L_REG) +#define V_LCD_SYNC_RISE_H ((BIT(25)) | (0 << 16) | DISP_SYNC_RISE_H_REG) +#define V_LCD_SYNC_FALL_L ((BIT(25)) | (160 << 16) | DISP_SYNC_FALL_L_REG) +#define V_LCD_SYNC_FALL_H ((BIT(25)) | (0 << 16) | DISP_SYNC_FALL_H_REG) +#define V_LCD_SYNC_ENABLE ((BIT(25)) | (128 << 16) | DISP_SYNC_EN_REG) +#define V_LCD_DISPLAY_ON ((BIT(25)) | (64 << 16) | DISP_0_REG) + +enum { + PAD_NONE, + PAD_512, + PAD_1024 +}; + +enum { + RGB888, + RGB666, + RGB565, + BGR565, + ARGB1555, + ABGR1555, + ARGB4444, + ABGR4444 +}; + +struct dum_setup { + int sync_neg_edge; + int round_robin; + int mux_int; + int synced_dirty_flag_int; + int dirty_flag_int; + int error_int; + int pf_empty_int; + int sf_empty_int; + int bac_dis_int; + u32 dirty_base_adr; + u32 command_base_adr; + u32 sync_clk_div; + int sync_output; + u32 sync_restart_val; + u32 set_sync_high; + u32 set_sync_low; +}; + +struct dum_ch_setup { + int disp_no; + u32 xmin; + u32 ymin; + u32 xmax; + u32 ymax; + int xmirror; + int ymirror; + int rotate; + u32 minadr; + u32 maxadr; + u32 dirtybuffer; + int pad; + int format; + int hwdirty; + int slave_trans; +}; + +struct disp_window { + u32 xmin_l; + u32 xmin_h; + u32 ymin; + u32 xmax_l; + u32 xmax_h; + u32 ymax; +}; + +#endif /* #ifndef __PNX008_DUM_H__ */ diff --git a/drivers/video/pnx4008/fbcommon.h b/drivers/video/pnx4008/fbcommon.h new file mode 100644 index 000000000000..4ebc87dafafb --- /dev/null +++ b/drivers/video/pnx4008/fbcommon.h @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2005 Philips Semiconductors + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA, or http://www.gnu.org/licenses/gpl.html +*/ + +#define QCIF_W (176) +#define QCIF_H (144) + +#define CIF_W (352) +#define CIF_H (288) + +#define LCD_X_RES 208 +#define LCD_Y_RES 320 +#define LCD_X_PAD 256 +#define LCD_BBP 4 /* Bytes Per Pixel */ + +#define DISP_MAX_X_SIZE (320) +#define DISP_MAX_Y_SIZE (208) + +#define RETURNVAL_BASE (0x400) + +enum fb_ioctl_returntype { + ENORESOURCESLEFT = RETURNVAL_BASE, + ERESOURCESNOTFREED, + EPROCNOTOWNER, + EFBNOTOWNER, + ECOPYFAILED, + EIOREMAPFAILED, +}; diff --git a/drivers/video/pnx4008/pnxrgbfb.c b/drivers/video/pnx4008/pnxrgbfb.c new file mode 100644 index 000000000000..7d9453c91a42 --- /dev/null +++ b/drivers/video/pnx4008/pnxrgbfb.c @@ -0,0 +1,213 @@ +/* + * drivers/video/pnx4008/pnxrgbfb.c + * + * PNX4008's framebuffer support + * + * Author: Grigory Tolstolytkin <gtolstolytkin@ru.mvista.com> + * Based on Philips Semiconductors's code + * + * Copyrght (c) 2005 MontaVista Software, Inc. + * Copyright (c) 2005 Philips Semiconductors + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/delay.h> +#include <linux/interrupt.h> +#include <linux/fb.h> +#include <linux/init.h> +#include <linux/platform_device.h> + +#include <asm/uaccess.h> +#include "sdum.h" +#include "fbcommon.h" + +static u32 colreg[16]; + +static struct fb_var_screeninfo rgbfb_var __initdata = { + .xres = LCD_X_RES, + .yres = LCD_Y_RES, + .xres_virtual = LCD_X_RES, + .yres_virtual = LCD_Y_RES, + .bits_per_pixel = 32, + .red.offset = 16, + .red.length = 8, + .green.offset = 8, + .green.length = 8, + .blue.offset = 0, + .blue.length = 8, + .left_margin = 0, + .right_margin = 0, + .upper_margin = 0, + .lower_margin = 0, + .vmode = FB_VMODE_NONINTERLACED, +}; +static struct fb_fix_screeninfo rgbfb_fix __initdata = { + .id = "RGBFB", + .line_length = LCD_X_RES * LCD_BBP, + .type = FB_TYPE_PACKED_PIXELS, + .visual = FB_VISUAL_TRUECOLOR, + .xpanstep = 0, + .ypanstep = 0, + .ywrapstep = 0, + .accel = FB_ACCEL_NONE, +}; + +static int channel_owned; + +static int no_cursor(struct fb_info *info, struct fb_cursor *cursor) +{ + return 0; +} + +static int rgbfb_setcolreg(u_int regno, u_int red, u_int green, u_int blue, + u_int transp, struct fb_info *info) +{ + if (regno > 15) + return 1; + + colreg[regno] = ((red & 0xff00) << 8) | (green & 0xff00) | + ((blue & 0xff00) >> 8); + return 0; +} + +static int rgbfb_mmap(struct fb_info *info, struct vm_area_struct *vma) +{ + return pnx4008_sdum_mmap(info, vma, NULL); +} + +static struct fb_ops rgbfb_ops = { + .fb_mmap = rgbfb_mmap, + .fb_setcolreg = rgbfb_setcolreg, + .fb_fillrect = cfb_fillrect, + .fb_copyarea = cfb_copyarea, + .fb_imageblit = cfb_imageblit, +}; + +static int rgbfb_remove(struct platform_device *pdev) +{ + struct fb_info *info = platform_get_drvdata(pdev); + + if (info) { + unregister_framebuffer(info); + fb_dealloc_cmap(&info->cmap); + framebuffer_release(info); + platform_set_drvdata(pdev, NULL); + kfree(info); + } + + pnx4008_free_dum_channel(channel_owned, pdev->id); + pnx4008_set_dum_exit_notification(pdev->id); + + return 0; +} + +static int __devinit rgbfb_probe(struct platform_device *pdev) +{ + struct fb_info *info; + struct dumchannel_uf chan_uf; + int ret; + char *option; + + info = framebuffer_alloc(sizeof(u32) * 16, &pdev->dev); + if (!info) { + ret = -ENOMEM; + goto err; + } + + pnx4008_get_fb_addresses(FB_TYPE_RGB, (void **)&info->screen_base, + (dma_addr_t *) &rgbfb_fix.smem_start, + &rgbfb_fix.smem_len); + + if ((ret = pnx4008_alloc_dum_channel(pdev->id)) < 0) + goto err0; + else { + channel_owned = ret; + chan_uf.channelnr = channel_owned; + chan_uf.dirty = (u32 *) NULL; + chan_uf.source = (u32 *) rgbfb_fix.smem_start; + chan_uf.x_offset = 0; + chan_uf.y_offset = 0; + chan_uf.width = LCD_X_RES; + chan_uf.height = LCD_Y_RES; + + if ((ret = pnx4008_put_dum_channel_uf(chan_uf, pdev->id))< 0) + goto err1; + + if ((ret = + pnx4008_set_dum_channel_sync(channel_owned, CONF_SYNC_ON, + pdev->id)) < 0) + goto err1; + + if ((ret = + pnx4008_set_dum_channel_dirty_detect(channel_owned, + CONF_DIRTYDETECTION_ON, + pdev->id)) < 0) + goto err1; + } + + if (!fb_get_options("pnxrgbfb", &option) && !strcmp(option, "nocursor")) + rgbfb_ops.fb_cursor = no_cursor; + + info->node = -1; + info->flags = FBINFO_FLAG_DEFAULT; + info->fbops = &rgbfb_ops; + info->fix = rgbfb_fix; + info->var = rgbfb_var; + info->screen_size = rgbfb_fix.smem_len; + info->pseudo_palette = info->par; + info->par = NULL; + + ret = fb_alloc_cmap(&info->cmap, 256, 0); + if (ret < 0) + goto err2; + + ret = register_framebuffer(info); + if (ret < 0) + goto err3; + platform_set_drvdata(pdev, info); + + return 0; + +err3: + fb_dealloc_cmap(&info->cmap); +err2: + framebuffer_release(info); +err1: + pnx4008_free_dum_channel(channel_owned, pdev->id); +err0: + kfree(info); +err: + return ret; +} + +static struct platform_driver rgbfb_driver = { + .driver = { + .name = "rgbfb", + }, + .probe = rgbfb_probe, + .remove = rgbfb_remove, +}; + +static int __init rgbfb_init(void) +{ + return platform_driver_register(&rgbfb_driver); +} + +static void __exit rgbfb_exit(void) +{ + platform_driver_unregister(&rgbfb_driver); +} + +module_init(rgbfb_init); +module_exit(rgbfb_exit); + +MODULE_LICENSE("GPL"); diff --git a/drivers/video/pnx4008/sdum.c b/drivers/video/pnx4008/sdum.c new file mode 100644 index 000000000000..51f0ecc2a511 --- /dev/null +++ b/drivers/video/pnx4008/sdum.c @@ -0,0 +1,872 @@ +/* + * drivers/video/pnx4008/sdum.c + * + * Display Update Master support + * + * Authors: Grigory Tolstolytkin <gtolstolytkin@ru.mvista.com> + * Vitaly Wool <vitalywool@gmail.com> + * Based on Philips Semiconductors's code + * + * Copyrght (c) 2005-2006 MontaVista Software, Inc. + * Copyright (c) 2005 Philips Semiconductors + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/mm.h> +#include <linux/tty.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/delay.h> +#include <linux/interrupt.h> +#include <linux/platform_device.h> +#include <linux/fb.h> +#include <linux/init.h> +#include <linux/dma-mapping.h> +#include <linux/clk.h> +#include <asm/uaccess.h> +#include <asm/arch/gpio.h> + +#include "sdum.h" +#include "fbcommon.h" +#include "dum.h" + +/* Framebuffers we have */ + +static struct pnx4008_fb_addr { + int fb_type; + long addr_offset; + long fb_length; +} fb_addr[] = { + [0] = { + FB_TYPE_YUV, 0, 0xB0000 + }, + [1] = { + FB_TYPE_RGB, 0xB0000, 0x50000 + }, +}; + +static struct dum_data { + u32 lcd_phys_start; + u32 lcd_virt_start; + u32 slave_phys_base; + u32 *slave_virt_base; + int fb_owning_channel[MAX_DUM_CHANNELS]; + struct dumchannel_uf chan_uf_store[MAX_DUM_CHANNELS]; +} dum_data; + +/* Different local helper functions */ + +static u32 nof_pixels_dx(struct dum_ch_setup *ch_setup) +{ + return (ch_setup->xmax - ch_setup->xmin + 1); +} + +static u32 nof_pixels_dy(struct dum_ch_setup *ch_setup) +{ + return (ch_setup->ymax - ch_setup->ymin + 1); +} + +static u32 nof_pixels_dxy(struct dum_ch_setup *ch_setup) +{ + return (nof_pixels_dx(ch_setup) * nof_pixels_dy(ch_setup)); +} + +static u32 nof_bytes(struct dum_ch_setup *ch_setup) +{ + u32 r = nof_pixels_dxy(ch_setup); + switch (ch_setup->format) { + case RGB888: + case RGB666: + r *= 4; + break; + + default: + r *= 2; + break; + } + return r; +} + +static u32 build_command(int disp_no, u32 reg, u32 val) +{ + return ((disp_no << 26) | BIT(25) | (val << 16) | (disp_no << 10) | + (reg << 0)); +} + +static u32 build_double_index(int disp_no, u32 val) +{ + return ((disp_no << 26) | (val << 16) | (disp_no << 10) | (val << 0)); +} + +static void build_disp_window(struct dum_ch_setup * ch_setup, struct disp_window * dw) +{ + dw->ymin = ch_setup->ymin; + dw->ymax = ch_setup->ymax; + dw->xmin_l = ch_setup->xmin & 0xFF; + dw->xmin_h = (ch_setup->xmin & BIT(8)) >> 8; + dw->xmax_l = ch_setup->xmax & 0xFF; + dw->xmax_h = (ch_setup->xmax & BIT(8)) >> 8; +} + +static int put_channel(struct dumchannel chan) +{ + int i = chan.channelnr; + + if (i < 0 || i > MAX_DUM_CHANNELS) + return -EINVAL; + else { + DUM_CH_MIN(i) = chan.dum_ch_min; + DUM_CH_MAX(i) = chan.dum_ch_max; + DUM_CH_CONF(i) = chan.dum_ch_conf; + DUM_CH_CTRL(i) = chan.dum_ch_ctrl; + } + + return 0; +} + +static void clear_channel(int channr) +{ + struct dumchannel chan; + + chan.channelnr = channr; + chan.dum_ch_min = 0; + chan.dum_ch_max = 0; + chan.dum_ch_conf = 0; + chan.dum_ch_ctrl = 0; + + put_channel(chan); +} + +static int put_cmd_string(struct cmdstring cmds) +{ + u16 *cmd_str_virtaddr; + u32 *cmd_ptr0_virtaddr; + u32 cmd_str_physaddr; + + int i = cmds.channelnr; + + if (i < 0 || i > MAX_DUM_CHANNELS) + return -EINVAL; + else if ((cmd_ptr0_virtaddr = + (int *)ioremap_nocache(DUM_COM_BASE, + sizeof(int) * MAX_DUM_CHANNELS)) == + NULL) + return -EIOREMAPFAILED; + else { + cmd_str_physaddr = ioread32(&cmd_ptr0_virtaddr[cmds.channelnr]); + if ((cmd_str_virtaddr = + (u16 *) ioremap_nocache(cmd_str_physaddr, + sizeof(cmds))) == NULL) { + iounmap(cmd_ptr0_virtaddr); + return -EIOREMAPFAILED; + } else { + int t; + for (t = 0; t < 8; t++) + iowrite16(*((u16 *)&cmds.prestringlen + t), + cmd_str_virtaddr + t); + + for (t = 0; t < cmds.prestringlen / 2; t++) + iowrite16(*((u16 *)&cmds.precmd + t), + cmd_str_virtaddr + t + 8); + + for (t = 0; t < cmds.poststringlen / 2; t++) + iowrite16(*((u16 *)&cmds.postcmd + t), + cmd_str_virtaddr + t + 8 + + cmds.prestringlen / 2); + + iounmap(cmd_ptr0_virtaddr); + iounmap(cmd_str_virtaddr); + } + } + + return 0; +} + +static u32 dum_ch_setup(int ch_no, struct dum_ch_setup * ch_setup) +{ + struct cmdstring cmds_c; + struct cmdstring *cmds = &cmds_c; + struct disp_window dw; + int standard; + u32 orientation = 0; + struct dumchannel chan = { 0 }; + int ret; + + if ((ch_setup->xmirror) || (ch_setup->ymirror) || (ch_setup->rotate)) { + standard = 0; + + orientation = BIT(1); /* always set 9-bit-bus */ + if (ch_setup->xmirror) + orientation |= BIT(4); + if (ch_setup->ymirror) + orientation |= BIT(3); + if (ch_setup->rotate) + orientation |= BIT(0); + } else + standard = 1; + + cmds->channelnr = ch_no; + + /* build command string header */ + if (standard) { + cmds->prestringlen = 32; + cmds->poststringlen = 0; + } else { + cmds->prestringlen = 48; + cmds->poststringlen = 16; + } + + cmds->format = + (u16) ((ch_setup->disp_no << 4) | (BIT(3)) | (ch_setup->format)); + cmds->reserved = 0x0; + cmds->startaddr_low = (ch_setup->minadr & 0xFFFF); + cmds->startaddr_high = (ch_setup->minadr >> 16); + + if ((ch_setup->minadr == 0) && (ch_setup->maxadr == 0) + && (ch_setup->xmin == 0) + && (ch_setup->ymin == 0) && (ch_setup->xmax == 0) + && (ch_setup->ymax == 0)) { + cmds->pixdatlen_low = 0; + cmds->pixdatlen_high = 0; + } else { + u32 nbytes = nof_bytes(ch_setup); + cmds->pixdatlen_low = (nbytes & 0xFFFF); + cmds->pixdatlen_high = (nbytes >> 16); + } + + if (ch_setup->slave_trans) + cmds->pixdatlen_high |= BIT(15); + + /* build pre-string */ + build_disp_window(ch_setup, &dw); + + if (standard) { + cmds->precmd[0] = + build_command(ch_setup->disp_no, DISP_XMIN_L_REG, 0x99); + cmds->precmd[1] = + build_command(ch_setup->disp_no, DISP_XMIN_L_REG, + dw.xmin_l); + cmds->precmd[2] = + build_command(ch_setup->disp_no, DISP_XMIN_H_REG, + dw.xmin_h); + cmds->precmd[3] = + build_command(ch_setup->disp_no, DISP_YMIN_REG, dw.ymin); + cmds->precmd[4] = + build_command(ch_setup->disp_no, DISP_XMAX_L_REG, + dw.xmax_l); + cmds->precmd[5] = + build_command(ch_setup->disp_no, DISP_XMAX_H_REG, + dw.xmax_h); + cmds->precmd[6] = + build_command(ch_setup->disp_no, DISP_YMAX_REG, dw.ymax); + cmds->precmd[7] = + build_double_index(ch_setup->disp_no, DISP_PIXEL_REG); + } else { + if (dw.xmin_l == ch_no) + cmds->precmd[0] = + build_command(ch_setup->disp_no, DISP_XMIN_L_REG, + 0x99); + else + cmds->precmd[0] = + build_command(ch_setup->disp_no, DISP_XMIN_L_REG, + ch_no); + + cmds->precmd[1] = + build_command(ch_setup->disp_no, DISP_XMIN_L_REG, + dw.xmin_l); + cmds->precmd[2] = + build_command(ch_setup->disp_no, DISP_XMIN_H_REG, + dw.xmin_h); + cmds->precmd[3] = + build_command(ch_setup->disp_no, DISP_YMIN_REG, dw.ymin); + cmds->precmd[4] = + build_command(ch_setup->disp_no, DISP_XMAX_L_REG, + dw.xmax_l); + cmds->precmd[5] = + build_command(ch_setup->disp_no, DISP_XMAX_H_REG, + dw.xmax_h); + cmds->precmd[6] = + build_command(ch_setup->disp_no, DISP_YMAX_REG, dw.ymax); + cmds->precmd[7] = + build_command(ch_setup->disp_no, DISP_1_REG, orientation); + cmds->precmd[8] = + build_double_index(ch_setup->disp_no, DISP_PIXEL_REG); + cmds->precmd[9] = + build_double_index(ch_setup->disp_no, DISP_PIXEL_REG); + cmds->precmd[0xA] = + build_double_index(ch_setup->disp_no, DISP_PIXEL_REG); + cmds->precmd[0xB] = + build_double_index(ch_setup->disp_no, DISP_PIXEL_REG); + cmds->postcmd[0] = + build_command(ch_setup->disp_no, DISP_1_REG, BIT(1)); + cmds->postcmd[1] = + build_command(ch_setup->disp_no, DISP_DUMMY1_REG, 1); + cmds->postcmd[2] = + build_command(ch_setup->disp_no, DISP_DUMMY1_REG, 2); + cmds->postcmd[3] = + build_command(ch_setup->disp_no, DISP_DUMMY1_REG, 3); + } + + if ((ret = put_cmd_string(cmds_c)) != 0) { + return ret; + } + + chan.channelnr = cmds->channelnr; + chan.dum_ch_min = ch_setup->dirtybuffer + ch_setup->minadr; + chan.dum_ch_max = ch_setup->dirtybuffer + ch_setup->maxadr; + chan.dum_ch_conf = 0x002; + chan.dum_ch_ctrl = 0x04; + + put_channel(chan); + + return 0; +} + +static u32 display_open(int ch_no, int auto_update, u32 * dirty_buffer, + u32 * frame_buffer, u32 xpos, u32 ypos, u32 w, u32 h) +{ + + struct dum_ch_setup k; + int ret; + + /* keep width & height within display area */ + if ((xpos + w) > DISP_MAX_X_SIZE) + w = DISP_MAX_X_SIZE - xpos; + + if ((ypos + h) > DISP_MAX_Y_SIZE) + h = DISP_MAX_Y_SIZE - ypos; + + /* assume 1 display only */ + k.disp_no = 0; + k.xmin = xpos; + k.ymin = ypos; + k.xmax = xpos + (w - 1); + k.ymax = ypos + (h - 1); + + /* adjust min and max values if necessary */ + if (k.xmin > DISP_MAX_X_SIZE - 1) + k.xmin = DISP_MAX_X_SIZE - 1; + if (k.ymin > DISP_MAX_Y_SIZE - 1) + k.ymin = DISP_MAX_Y_SIZE - 1; + + if (k.xmax > DISP_MAX_X_SIZE - 1) + k.xmax = DISP_MAX_X_SIZE - 1; + if (k.ymax > DISP_MAX_Y_SIZE - 1) + k.ymax = DISP_MAX_Y_SIZE - 1; + + k.xmirror = 0; + k.ymirror = 0; + k.rotate = 0; + k.minadr = (u32) frame_buffer; + k.maxadr = (u32) frame_buffer + (((w - 1) << 10) | ((h << 2) - 2)); + k.pad = PAD_1024; + k.dirtybuffer = (u32) dirty_buffer; + k.format = RGB888; + k.hwdirty = 0; + k.slave_trans = 0; + + ret = dum_ch_setup(ch_no, &k); + + return ret; +} + +static void lcd_reset(void) +{ + u32 *dum_pio_base = (u32 *)IO_ADDRESS(PNX4008_PIO_BASE); + + udelay(1); + iowrite32(BIT(19), &dum_pio_base[2]); + udelay(1); + iowrite32(BIT(19), &dum_pio_base[1]); + udelay(1); +} + +static int dum_init(struct platform_device *pdev) +{ + struct clk *clk; + + /* enable DUM clock */ + clk = clk_get(&pdev->dev, "dum_ck"); + if (IS_ERR(clk)) { + printk(KERN_ERR "pnx4008_dum: Unable to access DUM clock\n"); + return PTR_ERR(clk); + } + + clk_set_rate(clk, 1); + clk_put(clk); + + DUM_CTRL = V_DUM_RESET; + + /* set priority to "round-robin". All other params to "false" */ + DUM_CONF = BIT(9); + + /* Display 1 */ + DUM_WTCFG1 = PNX4008_DUM_WT_CFG; + DUM_RTCFG1 = PNX4008_DUM_RT_CFG; + DUM_TCFG = PNX4008_DUM_T_CFG; + + return 0; +} + +static void dum_chan_init(void) +{ + int i = 0, ch = 0; + u32 *cmdptrs; + u32 *cmdstrings; + + DUM_COM_BASE = + CMDSTRING_BASEADDR + BYTES_PER_CMDSTRING * NR_OF_CMDSTRINGS; + + if ((cmdptrs = + (u32 *) ioremap_nocache(DUM_COM_BASE, + sizeof(u32) * NR_OF_CMDSTRINGS)) == NULL) + return; + + for (ch = 0; ch < NR_OF_CMDSTRINGS; ch++) + iowrite32(CMDSTRING_BASEADDR + BYTES_PER_CMDSTRING * ch, + cmdptrs + ch); + + for (ch = 0; ch < MAX_DUM_CHANNELS; ch++) + clear_channel(ch); + + /* Clear the cmdstrings */ + cmdstrings = + (u32 *)ioremap_nocache(*cmdptrs, + BYTES_PER_CMDSTRING * NR_OF_CMDSTRINGS); + + if (!cmdstrings) + goto out; + + for (i = 0; i < NR_OF_CMDSTRINGS * BYTES_PER_CMDSTRING / sizeof(u32); + i++) + iowrite32(0, cmdstrings + i); + + iounmap((u32 *)cmdstrings); + +out: + iounmap((u32 *)cmdptrs); +} + +static void lcd_init(void) +{ + lcd_reset(); + + DUM_OUTP_FORMAT1 = 0; /* RGB666 */ + + udelay(1); + iowrite32(V_LCD_STANDBY_OFF, dum_data.slave_virt_base); + udelay(1); + iowrite32(V_LCD_USE_9BIT_BUS, dum_data.slave_virt_base); + udelay(1); + iowrite32(V_LCD_SYNC_RISE_L, dum_data.slave_virt_base); + udelay(1); + iowrite32(V_LCD_SYNC_RISE_H, dum_data.slave_virt_base); + udelay(1); + iowrite32(V_LCD_SYNC_FALL_L, dum_data.slave_virt_base); + udelay(1); + iowrite32(V_LCD_SYNC_FALL_H, dum_data.slave_virt_base); + udelay(1); + iowrite32(V_LCD_SYNC_ENABLE, dum_data.slave_virt_base); + udelay(1); + iowrite32(V_LCD_DISPLAY_ON, dum_data.slave_virt_base); + udelay(1); +} + +/* Interface exported to framebuffer drivers */ + +int pnx4008_get_fb_addresses(int fb_type, void **virt_addr, + dma_addr_t *phys_addr, int *fb_length) +{ + int i; + int ret = -1; + for (i = 0; i < ARRAY_SIZE(fb_addr); i++) + if (fb_addr[i].fb_type == fb_type) { + *virt_addr = (void *)(dum_data.lcd_virt_start + + fb_addr[i].addr_offset); + *phys_addr = + dum_data.lcd_phys_start + fb_addr[i].addr_offset; + *fb_length = fb_addr[i].fb_length; + ret = 0; + break; + } + + return ret; +} + +EXPORT_SYMBOL(pnx4008_get_fb_addresses); + +int pnx4008_alloc_dum_channel(int dev_id) +{ + int i = 0; + + while ((i < MAX_DUM_CHANNELS) && (dum_data.fb_owning_channel[i] != -1)) + i++; + + if (i == MAX_DUM_CHANNELS) + return -ENORESOURCESLEFT; + else { + dum_data.fb_owning_channel[i] = dev_id; + return i; + } +} + +EXPORT_SYMBOL(pnx4008_alloc_dum_channel); + +int pnx4008_free_dum_channel(int channr, int dev_id) +{ + if (channr < 0 || channr > MAX_DUM_CHANNELS) + return -EINVAL; + else if (dum_data.fb_owning_channel[channr] != dev_id) + return -EFBNOTOWNER; + else { + clear_channel(channr); + dum_data.fb_owning_channel[channr] = -1; + } + + return 0; +} + +EXPORT_SYMBOL(pnx4008_free_dum_channel); + +int pnx4008_put_dum_channel_uf(struct dumchannel_uf chan_uf, int dev_id) +{ + int i = chan_uf.channelnr; + int ret; + + if (i < 0 || i > MAX_DUM_CHANNELS) + return -EINVAL; + else if (dum_data.fb_owning_channel[i] != dev_id) + return -EFBNOTOWNER; + else if ((ret = + display_open(chan_uf.channelnr, 0, chan_uf.dirty, + chan_uf.source, chan_uf.y_offset, + chan_uf.x_offset, chan_uf.height, + chan_uf.width)) != 0) + return ret; + else { + dum_data.chan_uf_store[i].dirty = chan_uf.dirty; + dum_data.chan_uf_store[i].source = chan_uf.source; + dum_data.chan_uf_store[i].x_offset = chan_uf.x_offset; + dum_data.chan_uf_store[i].y_offset = chan_uf.y_offset; + dum_data.chan_uf_store[i].width = chan_uf.width; + dum_data.chan_uf_store[i].height = chan_uf.height; + } + + return 0; +} + +EXPORT_SYMBOL(pnx4008_put_dum_channel_uf); + +int pnx4008_set_dum_channel_sync(int channr, int val, int dev_id) +{ + if (channr < 0 || channr > MAX_DUM_CHANNELS) + return -EINVAL; + else if (dum_data.fb_owning_channel[channr] != dev_id) + return -EFBNOTOWNER; + else { + if (val == CONF_SYNC_ON) { + DUM_CH_CONF(channr) |= CONF_SYNCENABLE; + DUM_CH_CONF(channr) |= DUM_CHANNEL_CFG_SYNC_MASK | + DUM_CHANNEL_CFG_SYNC_MASK_SET; + } else if (val == CONF_SYNC_OFF) + DUM_CH_CONF(channr) &= ~CONF_SYNCENABLE; + else + return -EINVAL; + } + + return 0; +} + +EXPORT_SYMBOL(pnx4008_set_dum_channel_sync); + +int pnx4008_set_dum_channel_dirty_detect(int channr, int val, int dev_id) +{ + if (channr < 0 || channr > MAX_DUM_CHANNELS) + return -EINVAL; + else if (dum_data.fb_owning_channel[channr] != dev_id) + return -EFBNOTOWNER; + else { + if (val == CONF_DIRTYDETECTION_ON) + DUM_CH_CONF(channr) |= CONF_DIRTYENABLE; + else if (val == CONF_DIRTYDETECTION_OFF) + DUM_CH_CONF(channr) &= ~CONF_DIRTYENABLE; + else + return -EINVAL; + } + + return 0; +} + +EXPORT_SYMBOL(pnx4008_set_dum_channel_dirty_detect); + +#if 0 /* Functions not used currently, but likely to be used in future */ + +static int get_channel(struct dumchannel *p_chan) +{ + int i = p_chan->channelnr; + + if (i < 0 || i > MAX_DUM_CHANNELS) + return -EINVAL; + else { + p_chan->dum_ch_min = DUM_CH_MIN(i); + p_chan->dum_ch_max = DUM_CH_MAX(i); + p_chan->dum_ch_conf = DUM_CH_CONF(i); + p_chan->dum_ch_stat = DUM_CH_STAT(i); + p_chan->dum_ch_ctrl = 0; /* WriteOnly control register */ + } + + return 0; +} + +int pnx4008_get_dum_channel_uf(struct dumchannel_uf *p_chan_uf, int dev_id) +{ + int i = p_chan_uf->channelnr; + + if (i < 0 || i > MAX_DUM_CHANNELS) + return -EINVAL; + else if (dum_data.fb_owning_channel[i] != dev_id) + return -EFBNOTOWNER; + else { + p_chan_uf->dirty = dum_data.chan_uf_store[i].dirty; + p_chan_uf->source = dum_data.chan_uf_store[i].source; + p_chan_uf->x_offset = dum_data.chan_uf_store[i].x_offset; + p_chan_uf->y_offset = dum_data.chan_uf_store[i].y_offset; + p_chan_uf->width = dum_data.chan_uf_store[i].width; + p_chan_uf->height = dum_data.chan_uf_store[i].height; + } + + return 0; +} + +EXPORT_SYMBOL(pnx4008_get_dum_channel_uf); + +int pnx4008_get_dum_channel_config(int channr, int dev_id) +{ + int ret; + struct dumchannel chan; + + if (channr < 0 || channr > MAX_DUM_CHANNELS) + return -EINVAL; + else if (dum_data.fb_owning_channel[channr] != dev_id) + return -EFBNOTOWNER; + else { + chan.channelnr = channr; + if ((ret = get_channel(&chan)) != 0) + return ret; + } + + return (chan.dum_ch_conf & DUM_CHANNEL_CFG_MASK); +} + +EXPORT_SYMBOL(pnx4008_get_dum_channel_config); + +int pnx4008_force_update_dum_channel(int channr, int dev_id) +{ + if (channr < 0 || channr > MAX_DUM_CHANNELS) + return -EINVAL; + + else if (dum_data.fb_owning_channel[channr] != dev_id) + return -EFBNOTOWNER; + else + DUM_CH_CTRL(channr) = CTRL_SETDIRTY; + + return 0; +} + +EXPORT_SYMBOL(pnx4008_force_update_dum_channel); + +#endif + +int pnx4008_sdum_mmap(struct fb_info *info, struct vm_area_struct *vma, + struct device *dev) +{ + unsigned long off = vma->vm_pgoff << PAGE_SHIFT; + + if (off < info->fix.smem_len) { + vma->vm_pgoff += 1; + return dma_mmap_writecombine(dev, vma, + (void *)dum_data.lcd_virt_start, + dum_data.lcd_phys_start, + FB_DMA_SIZE); + } + return -EINVAL; +} + +EXPORT_SYMBOL(pnx4008_sdum_mmap); + +int pnx4008_set_dum_exit_notification(int dev_id) +{ + int i; + + for (i = 0; i < MAX_DUM_CHANNELS; i++) + if (dum_data.fb_owning_channel[i] == dev_id) + return -ERESOURCESNOTFREED; + + return 0; +} + +EXPORT_SYMBOL(pnx4008_set_dum_exit_notification); + +/* Platform device driver for DUM */ + +static int sdum_suspend(struct platform_device *pdev, pm_message_t state) +{ + int retval = 0; + struct clk *clk; + + clk = clk_get(0, "dum_ck"); + if (!IS_ERR(clk)) { + clk_set_rate(clk, 0); + clk_put(clk); + } else + retval = PTR_ERR(clk); + + /* disable BAC */ + DUM_CTRL = V_BAC_DISABLE_IDLE; + + /* LCD standby & turn off display */ + lcd_reset(); + + return retval; +} + +static int sdum_resume(struct platform_device *pdev) +{ + int retval = 0; + struct clk *clk; + + clk = clk_get(0, "dum_ck"); + if (!IS_ERR(clk)) { + clk_set_rate(clk, 1); + clk_put(clk); + } else + retval = PTR_ERR(clk); + + /* wait for BAC disable */ + DUM_CTRL = V_BAC_DISABLE_TRIG; + + while (DUM_CTRL & BAC_ENABLED) + udelay(10); + + /* re-init LCD */ + lcd_init(); + + /* enable BAC and reset MUX */ + DUM_CTRL = V_BAC_ENABLE; + udelay(1); + DUM_CTRL = V_MUX_RESET; + return 0; +} + +static int __devinit sdum_probe(struct platform_device *pdev) +{ + int ret = 0, i = 0; + + /* map frame buffer */ + dum_data.lcd_virt_start = (u32) dma_alloc_writecombine(&pdev->dev, + FB_DMA_SIZE, + &dum_data.lcd_phys_start, + GFP_KERNEL); + + if (!dum_data.lcd_virt_start) { + ret = -ENOMEM; + goto out_3; + } + + /* map slave registers */ + dum_data.slave_phys_base = PNX4008_DUM_SLAVE_BASE; + dum_data.slave_virt_base = + (u32 *) ioremap_nocache(dum_data.slave_phys_base, sizeof(u32)); + + if (dum_data.slave_virt_base == NULL) { + ret = -ENOMEM; + goto out_2; + } + + /* initialize DUM and LCD display */ + ret = dum_init(pdev); + if (ret) + goto out_1; + + dum_chan_init(); + lcd_init(); + + DUM_CTRL = V_BAC_ENABLE; + udelay(1); + DUM_CTRL = V_MUX_RESET; + + /* set decode address and sync clock divider */ + DUM_DECODE = dum_data.lcd_phys_start & DUM_DECODE_MASK; + DUM_CLK_DIV = PNX4008_DUM_CLK_DIV; + + for (i = 0; i < MAX_DUM_CHANNELS; i++) + dum_data.fb_owning_channel[i] = -1; + + /*setup wakeup interrupt */ + start_int_set_rising_edge(SE_DISP_SYNC_INT); + start_int_ack(SE_DISP_SYNC_INT); + start_int_umask(SE_DISP_SYNC_INT); + + return 0; + +out_1: + iounmap((void *)dum_data.slave_virt_base); +out_2: + dma_free_writecombine(&pdev->dev, FB_DMA_SIZE, + (void *)dum_data.lcd_virt_start, + dum_data.lcd_phys_start); +out_3: + return ret; +} + +static int sdum_remove(struct platform_device *pdev) +{ + struct clk *clk; + + start_int_mask(SE_DISP_SYNC_INT); + + clk = clk_get(0, "dum_ck"); + if (!IS_ERR(clk)) { + clk_set_rate(clk, 0); + clk_put(clk); + } + + iounmap((void *)dum_data.slave_virt_base); + + dma_free_writecombine(&pdev->dev, FB_DMA_SIZE, + (void *)dum_data.lcd_virt_start, + dum_data.lcd_phys_start); + + return 0; +} + +static struct platform_driver sdum_driver = { + .driver = { + .name = "sdum", + }, + .probe = sdum_probe, + .remove = sdum_remove, + .suspend = sdum_suspend, + .resume = sdum_resume, +}; + +int __init sdum_init(void) +{ + return platform_driver_register(&sdum_driver); +} + +static void __exit sdum_exit(void) +{ + platform_driver_unregister(&sdum_driver); +}; + +module_init(sdum_init); +module_exit(sdum_exit); + +MODULE_LICENSE("GPL"); diff --git a/drivers/video/pnx4008/sdum.h b/drivers/video/pnx4008/sdum.h new file mode 100644 index 000000000000..e8c5dcdd8813 --- /dev/null +++ b/drivers/video/pnx4008/sdum.h @@ -0,0 +1,139 @@ +/* + * Copyright (C) 2005 Philips Semiconductors + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA, or http://www.gnu.org/licenses/gpl.html +*/ + +#define MAX_DUM_CHANNELS 64 + +#define RGB_MEM_WINDOW(x) (0x10000000 + (x)*0x00100000) + +#define QCIF_OFFSET(x) (((x) == 0) ? 0x00000: ((x) == 1) ? 0x30000: -1) +#define CIF_OFFSET(x) (((x) == 0) ? 0x00000: ((x) == 1) ? 0x60000: -1) + +#define CTRL_SETDIRTY (0x00000001) +#define CONF_DIRTYENABLE (0x00000020) +#define CONF_SYNCENABLE (0x00000004) + +#define DIRTY_ENABLED(conf) ((conf) & 0x0020) +#define SYNC_ENABLED(conf) ((conf) & 0x0004) + +/* Display 1 & 2 Write Timing Configuration */ +#define PNX4008_DUM_WT_CFG 0x00372000 + +/* Display 1 & 2 Read Timing Configuration */ +#define PNX4008_DUM_RT_CFG 0x00003A47 + +/* DUM Transit State Timing Configuration */ +#define PNX4008_DUM_T_CFG 0x1D /* 29 HCLK cycles */ + +/* DUM Sync count clock divider */ +#define PNX4008_DUM_CLK_DIV 0x02DD + +/* Memory size for framebuffer, allocated through dma_alloc_writecombine(). + * Must be PAGE aligned + */ +#define FB_DMA_SIZE (PAGE_ALIGN(SZ_1M + PAGE_SIZE)) + +#define OFFSET_RGBBUFFER (0xB0000) +#define OFFSET_YUVBUFFER (0x00000) + +#define YUVBUFFER (lcd_video_start + OFFSET_YUVBUFFER) +#define RGBBUFFER (lcd_video_start + OFFSET_RGBBUFFER) + +#define CMDSTRING_BASEADDR (0x00C000) /* iram */ +#define BYTES_PER_CMDSTRING (0x80) +#define NR_OF_CMDSTRINGS (64) + +#define MAX_NR_PRESTRINGS (0x40) +#define MAX_NR_POSTSTRINGS (0x40) + +/* various mask definitions */ +#define DUM_CLK_ENABLE 0x01 +#define DUM_CLK_DISABLE 0 +#define DUM_DECODE_MASK 0x1FFFFFFF +#define DUM_CHANNEL_CFG_MASK 0x01FF +#define DUM_CHANNEL_CFG_SYNC_MASK 0xFFFE00FF +#define DUM_CHANNEL_CFG_SYNC_MASK_SET 0x0CA00 + +#define SDUM_RETURNVAL_BASE (0x500) + +#define CONF_SYNC_OFF (0x602) +#define CONF_SYNC_ON (0x603) + +#define CONF_DIRTYDETECTION_OFF (0x600) +#define CONF_DIRTYDETECTION_ON (0x601) + +/* Set the corresponding bit. */ +#define BIT(n) (0x1U << (n)) + +struct dumchannel_uf { + int channelnr; + u32 *dirty; + u32 *source; + u32 x_offset; + u32 y_offset; + u32 width; + u32 height; +}; + +enum { + FB_TYPE_YUV, + FB_TYPE_RGB +}; + +struct cmdstring { + int channelnr; + uint16_t prestringlen; + uint16_t poststringlen; + uint16_t format; + uint16_t reserved; + uint16_t startaddr_low; + uint16_t startaddr_high; + uint16_t pixdatlen_low; + uint16_t pixdatlen_high; + u32 precmd[MAX_NR_PRESTRINGS]; + u32 postcmd[MAX_NR_POSTSTRINGS]; + +}; + +struct dumchannel { + int channelnr; + int dum_ch_min; + int dum_ch_max; + int dum_ch_conf; + int dum_ch_stat; + int dum_ch_ctrl; +}; + +int pnx4008_alloc_dum_channel(int dev_id); +int pnx4008_free_dum_channel(int channr, int dev_id); + +int pnx4008_get_dum_channel_uf(struct dumchannel_uf *pChan_uf, int dev_id); +int pnx4008_put_dum_channel_uf(struct dumchannel_uf chan_uf, int dev_id); + +int pnx4008_set_dum_channel_sync(int channr, int val, int dev_id); +int pnx4008_set_dum_channel_dirty_detect(int channr, int val, int dev_id); + +int pnx4008_force_dum_update_channel(int channr, int dev_id); + +int pnx4008_get_dum_channel_config(int channr, int dev_id); + +int pnx4008_sdum_mmap(struct fb_info *info, struct vm_area_struct *vma, struct device *dev); +int pnx4008_set_dum_exit_notification(int dev_id); + +int pnx4008_get_fb_addresses(int fb_type, void **virt_addr, + dma_addr_t * phys_addr, int *fb_length); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index d0434406eaeb..f42e64210ee5 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -84,7 +84,7 @@ static struct linux_binfmt elf_format = { .min_coredump = ELF_EXEC_PAGESIZE }; -#define BAD_ADDR(x) ((unsigned long)(x) > TASK_SIZE) +#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE) static int set_brk(unsigned long start, unsigned long end) { @@ -394,7 +394,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, * <= p_memsize so it's only necessary to check p_memsz. */ k = load_addr + eppnt->p_vaddr; - if (k > TASK_SIZE || + if (BAD_ADDR(k) || eppnt->p_filesz > eppnt->p_memsz || eppnt->p_memsz > TASK_SIZE || TASK_SIZE - eppnt->p_memsz < k) { @@ -887,7 +887,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) * allowed task size. Note that p_filesz must always be * <= p_memsz so it is only necessary to check p_memsz. */ - if (k > TASK_SIZE || elf_ppnt->p_filesz > elf_ppnt->p_memsz || + if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz || elf_ppnt->p_memsz > TASK_SIZE || TASK_SIZE - elf_ppnt->p_memsz < k) { /* set_brk can never work. Avoid overflows. */ @@ -941,10 +941,9 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) interpreter, &interp_load_addr); if (BAD_ADDR(elf_entry)) { - printk(KERN_ERR "Unable to load interpreter %.128s\n", - elf_interpreter); force_sig(SIGSEGV, current); - retval = -ENOEXEC; /* Nobody gets to see this, but.. */ + retval = IS_ERR((void *)elf_entry) ? + (int)elf_entry : -EINVAL; goto out_free_dentry; } reloc_func_desc = interp_load_addr; @@ -955,8 +954,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) } else { elf_entry = loc->elf_ex.e_entry; if (BAD_ADDR(elf_entry)) { - send_sig(SIGSEGV, current, 0); - retval = -ENOEXEC; /* Nobody gets to see this, but.. */ + force_sig(SIGSEGV, current); + retval = -EINVAL; goto out_free_dentry; } } diff --git a/fs/block_dev.c b/fs/block_dev.c index 9633a490dab0..37534573960b 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -739,7 +739,7 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder, if (!bo) return -ENOMEM; - mutex_lock(&bdev->bd_mutex); + mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION); res = bd_claim(bdev, holder); if (res || !add_bd_holder(bdev, bo)) free_bd_holder(bo); @@ -764,7 +764,7 @@ static void bd_release_from_kobject(struct block_device *bdev, if (!kobj) return; - mutex_lock(&bdev->bd_mutex); + mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION); bd_release(bdev); if ((bo = del_bd_holder(bdev, kobj))) free_bd_holder(bo); @@ -822,6 +822,22 @@ struct block_device *open_by_devnum(dev_t dev, unsigned mode) EXPORT_SYMBOL(open_by_devnum); +static int +blkdev_get_partition(struct block_device *bdev, mode_t mode, unsigned flags); + +struct block_device *open_partition_by_devnum(dev_t dev, unsigned mode) +{ + struct block_device *bdev = bdget(dev); + int err = -ENOMEM; + int flags = mode & FMODE_WRITE ? O_RDWR : O_RDONLY; + if (bdev) + err = blkdev_get_partition(bdev, mode, flags); + return err ? ERR_PTR(err) : bdev; +} + +EXPORT_SYMBOL(open_partition_by_devnum); + + /* * This routine checks whether a removable media has been changed, * and invalidates all buffer-cache-entries in that case. This @@ -868,7 +884,11 @@ void bd_set_size(struct block_device *bdev, loff_t size) } EXPORT_SYMBOL(bd_set_size); -static int do_open(struct block_device *bdev, struct file *file) +static int +blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags); + +static int +do_open(struct block_device *bdev, struct file *file, unsigned int subclass) { struct module *owner = NULL; struct gendisk *disk; @@ -885,7 +905,8 @@ static int do_open(struct block_device *bdev, struct file *file) } owner = disk->fops->owner; - mutex_lock(&bdev->bd_mutex); + mutex_lock_nested(&bdev->bd_mutex, subclass); + if (!bdev->bd_openers) { bdev->bd_disk = disk; bdev->bd_contains = bdev; @@ -912,11 +933,11 @@ static int do_open(struct block_device *bdev, struct file *file) ret = -ENOMEM; if (!whole) goto out_first; - ret = blkdev_get(whole, file->f_mode, file->f_flags); + ret = blkdev_get_whole(whole, file->f_mode, file->f_flags); if (ret) goto out_first; bdev->bd_contains = whole; - mutex_lock(&whole->bd_mutex); + mutex_lock_nested(&whole->bd_mutex, BD_MUTEX_WHOLE); whole->bd_part_count++; p = disk->part[part - 1]; bdev->bd_inode->i_data.backing_dev_info = @@ -944,7 +965,8 @@ static int do_open(struct block_device *bdev, struct file *file) if (bdev->bd_invalidated) rescan_partitions(bdev->bd_disk, bdev); } else { - mutex_lock(&bdev->bd_contains->bd_mutex); + mutex_lock_nested(&bdev->bd_contains->bd_mutex, + BD_MUTEX_PARTITION); bdev->bd_contains->bd_part_count++; mutex_unlock(&bdev->bd_contains->bd_mutex); } @@ -985,11 +1007,49 @@ int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags) fake_file.f_dentry = &fake_dentry; fake_dentry.d_inode = bdev->bd_inode; - return do_open(bdev, &fake_file); + return do_open(bdev, &fake_file, BD_MUTEX_NORMAL); } EXPORT_SYMBOL(blkdev_get); +static int +blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags) +{ + /* + * This crockload is due to bad choice of ->open() type. + * It will go away. + * For now, block device ->open() routine must _not_ + * examine anything in 'inode' argument except ->i_rdev. + */ + struct file fake_file = {}; + struct dentry fake_dentry = {}; + fake_file.f_mode = mode; + fake_file.f_flags = flags; + fake_file.f_dentry = &fake_dentry; + fake_dentry.d_inode = bdev->bd_inode; + + return do_open(bdev, &fake_file, BD_MUTEX_WHOLE); +} + +static int +blkdev_get_partition(struct block_device *bdev, mode_t mode, unsigned flags) +{ + /* + * This crockload is due to bad choice of ->open() type. + * It will go away. + * For now, block device ->open() routine must _not_ + * examine anything in 'inode' argument except ->i_rdev. + */ + struct file fake_file = {}; + struct dentry fake_dentry = {}; + fake_file.f_mode = mode; + fake_file.f_flags = flags; + fake_file.f_dentry = &fake_dentry; + fake_dentry.d_inode = bdev->bd_inode; + + return do_open(bdev, &fake_file, BD_MUTEX_PARTITION); +} + static int blkdev_open(struct inode * inode, struct file * filp) { struct block_device *bdev; @@ -1005,7 +1065,7 @@ static int blkdev_open(struct inode * inode, struct file * filp) bdev = bd_acquire(inode); - res = do_open(bdev, filp); + res = do_open(bdev, filp, BD_MUTEX_NORMAL); if (res) return res; @@ -1019,13 +1079,13 @@ static int blkdev_open(struct inode * inode, struct file * filp) return res; } -int blkdev_put(struct block_device *bdev) +static int __blkdev_put(struct block_device *bdev, unsigned int subclass) { int ret = 0; struct inode *bd_inode = bdev->bd_inode; struct gendisk *disk = bdev->bd_disk; - mutex_lock(&bdev->bd_mutex); + mutex_lock_nested(&bdev->bd_mutex, subclass); lock_kernel(); if (!--bdev->bd_openers) { sync_blockdev(bdev); @@ -1035,7 +1095,8 @@ int blkdev_put(struct block_device *bdev) if (disk->fops->release) ret = disk->fops->release(bd_inode, NULL); } else { - mutex_lock(&bdev->bd_contains->bd_mutex); + mutex_lock_nested(&bdev->bd_contains->bd_mutex, + subclass + 1); bdev->bd_contains->bd_part_count--; mutex_unlock(&bdev->bd_contains->bd_mutex); } @@ -1051,9 +1112,8 @@ int blkdev_put(struct block_device *bdev) } bdev->bd_disk = NULL; bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; - if (bdev != bdev->bd_contains) { - blkdev_put(bdev->bd_contains); - } + if (bdev != bdev->bd_contains) + __blkdev_put(bdev->bd_contains, subclass + 1); bdev->bd_contains = NULL; } unlock_kernel(); @@ -1062,8 +1122,20 @@ int blkdev_put(struct block_device *bdev) return ret; } +int blkdev_put(struct block_device *bdev) +{ + return __blkdev_put(bdev, BD_MUTEX_NORMAL); +} + EXPORT_SYMBOL(blkdev_put); +int blkdev_put_partition(struct block_device *bdev) +{ + return __blkdev_put(bdev, BD_MUTEX_PARTITION); +} + +EXPORT_SYMBOL(blkdev_put_partition); + static int blkdev_close(struct inode * inode, struct file * filp) { struct block_device *bdev = I_BDEV(filp->f_mapping->host); diff --git a/fs/dcache.c b/fs/dcache.c index c6e3535be192..1b4a3a34ec57 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -38,7 +38,7 @@ int sysctl_vfs_cache_pressure __read_mostly = 100; EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); -static seqlock_t rename_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED; +static __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); EXPORT_SYMBOL(dcache_lock); @@ -1339,10 +1339,10 @@ void d_move(struct dentry * dentry, struct dentry * target) */ if (target < dentry) { spin_lock(&target->d_lock); - spin_lock(&dentry->d_lock); + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); } else { spin_lock(&dentry->d_lock); - spin_lock(&target->d_lock); + spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED); } /* Move the dentry to the target hash queue, if on different bucket */ diff --git a/fs/direct-io.c b/fs/direct-io.c index 538fb0418fba..5981e17f46f0 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -220,7 +220,8 @@ static void dio_complete(struct dio *dio, loff_t offset, ssize_t bytes) if (dio->end_io && dio->result) dio->end_io(dio->iocb, offset, bytes, dio->map_bh.b_private); if (dio->lock_type == DIO_LOCKING) - up_read(&dio->inode->i_alloc_sem); + /* lockdep: non-owner release */ + up_read_non_owner(&dio->inode->i_alloc_sem); } /* @@ -1261,7 +1262,8 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, } if (dio_lock_type == DIO_LOCKING) - down_read(&inode->i_alloc_sem); + /* lockdep: not the owner will release it */ + down_read_non_owner(&inode->i_alloc_sem); } /* diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 9c677bbd0b08..19ffb043abbc 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -120,7 +120,7 @@ struct epoll_filefd { */ struct wake_task_node { struct list_head llink; - task_t *task; + struct task_struct *task; wait_queue_head_t *wq; }; @@ -413,7 +413,7 @@ static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq) { int wake_nests = 0; unsigned long flags; - task_t *this_task = current; + struct task_struct *this_task = current; struct list_head *lsthead = &psw->wake_task_list, *lnk; struct wake_task_node *tncur; struct wake_task_node tnode; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 9f43879d6d68..f2702cda9779 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1157,7 +1157,7 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type, struct buffer_head tmp_bh; struct buffer_head *bh; - mutex_lock(&inode->i_mutex); + mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); while (towrite > 0) { tocopy = sb->s_blocksize - offset < towrite ? sb->s_blocksize - offset : towrite; diff --git a/fs/ext3/super.c b/fs/ext3/super.c index f2dd71336612..813d589cc6c0 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2614,7 +2614,7 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type, struct buffer_head *bh; handle_t *handle = journal_current_handle(); - mutex_lock(&inode->i_mutex); + mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); while (towrite > 0) { tocopy = sb->s_blocksize - offset < towrite ? sb->s_blocksize - offset : towrite; diff --git a/fs/namei.c b/fs/namei.c index c784e8bb57a3..c9750d755aff 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1423,7 +1423,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) struct dentry *p; if (p1 == p2) { - mutex_lock(&p1->d_inode->i_mutex); + mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); return NULL; } @@ -1431,22 +1431,22 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) for (p = p1; p->d_parent != p; p = p->d_parent) { if (p->d_parent == p2) { - mutex_lock(&p2->d_inode->i_mutex); - mutex_lock(&p1->d_inode->i_mutex); + mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_CHILD); return p; } } for (p = p2; p->d_parent != p; p = p->d_parent) { if (p->d_parent == p1) { - mutex_lock(&p1->d_inode->i_mutex); - mutex_lock(&p2->d_inode->i_mutex); + mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD); return p; } } - mutex_lock(&p1->d_inode->i_mutex); - mutex_lock(&p2->d_inode->i_mutex); + mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD); return NULL; } @@ -1751,7 +1751,7 @@ struct dentry *lookup_create(struct nameidata *nd, int is_dir) { struct dentry *dentry = ERR_PTR(-EEXIST); - mutex_lock(&nd->dentry->d_inode->i_mutex); + mutex_lock_nested(&nd->dentry->d_inode->i_mutex, I_MUTEX_PARENT); /* * Yucky last component or no last component at all? * (foo/., foo/.., /////) @@ -2008,7 +2008,7 @@ static long do_rmdir(int dfd, const char __user *pathname) error = -EBUSY; goto exit1; } - mutex_lock(&nd.dentry->d_inode->i_mutex); + mutex_lock_nested(&nd.dentry->d_inode->i_mutex, I_MUTEX_PARENT); dentry = lookup_hash(&nd); error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { @@ -2082,7 +2082,7 @@ static long do_unlinkat(int dfd, const char __user *pathname) error = -EISDIR; if (nd.last_type != LAST_NORM) goto exit1; - mutex_lock(&nd.dentry->d_inode->i_mutex); + mutex_lock_nested(&nd.dentry->d_inode->i_mutex, I_MUTEX_PARENT); dentry = lookup_hash(&nd); error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 4c86b7e1d1eb..d313f356e66a 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -367,6 +367,12 @@ static void ntfs_destroy_extent_inode(ntfs_inode *ni) kmem_cache_free(ntfs_inode_cache, ni); } +/* + * The attribute runlist lock has separate locking rules from the + * normal runlist lock, so split the two lock-classes: + */ +static struct lock_class_key attr_list_rl_lock_class; + /** * __ntfs_init_inode - initialize ntfs specific part of an inode * @sb: super block of mounted volume @@ -394,6 +400,8 @@ void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni) ni->attr_list_size = 0; ni->attr_list = NULL; ntfs_init_runlist(&ni->attr_list_rl); + lockdep_set_class(&ni->attr_list_rl.lock, + &attr_list_rl_lock_class); ni->itype.index.bmp_ino = NULL; ni->itype.index.block_size = 0; ni->itype.index.vcn_size = 0; @@ -405,6 +413,13 @@ void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni) ni->ext.base_ntfs_ino = NULL; } +/* + * Extent inodes get MFT-mapped in a nested way, while the base inode + * is still mapped. Teach this nesting to the lock validator by creating + * a separate class for nested inode's mrec_lock's: + */ +static struct lock_class_key extent_inode_mrec_lock_key; + inline ntfs_inode *ntfs_new_extent_inode(struct super_block *sb, unsigned long mft_no) { @@ -413,6 +428,7 @@ inline ntfs_inode *ntfs_new_extent_inode(struct super_block *sb, ntfs_debug("Entering."); if (likely(ni != NULL)) { __ntfs_init_inode(sb, ni); + lockdep_set_class(&ni->mrec_lock, &extent_inode_mrec_lock_key); ni->mft_no = mft_no; ni->type = AT_UNUSED; ni->name = NULL; @@ -1722,6 +1738,15 @@ err_out: return err; } +/* + * The MFT inode has special locking, so teach the lock validator + * about this by splitting off the locking rules of the MFT from + * the locking rules of other inodes. The MFT inode can never be + * accessed from the VFS side (or even internally), only by the + * map_mft functions. + */ +static struct lock_class_key mft_ni_runlist_lock_key, mft_ni_mrec_lock_key; + /** * ntfs_read_inode_mount - special read_inode for mount time use only * @vi: inode to read @@ -2148,6 +2173,14 @@ int ntfs_read_inode_mount(struct inode *vi) ntfs_attr_put_search_ctx(ctx); ntfs_debug("Done."); ntfs_free(m); + + /* + * Split the locking rules of the MFT inode from the + * locking rules of other inodes: + */ + lockdep_set_class(&ni->runlist.lock, &mft_ni_runlist_lock_key); + lockdep_set_class(&ni->mrec_lock, &mft_ni_mrec_lock_key); + return 0; em_put_err_out: diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 0e14acea3f8b..74e0ee8fce72 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -1724,6 +1724,14 @@ upcase_failed: return FALSE; } +/* + * The lcn and mft bitmap inodes are NTFS-internal inodes with + * their own special locking rules: + */ +static struct lock_class_key + lcnbmp_runlist_lock_key, lcnbmp_mrec_lock_key, + mftbmp_runlist_lock_key, mftbmp_mrec_lock_key; + /** * load_system_files - open the system files using normal functions * @vol: ntfs super block describing device whose system files to load @@ -1780,6 +1788,10 @@ static BOOL load_system_files(ntfs_volume *vol) ntfs_error(sb, "Failed to load $MFT/$BITMAP attribute."); goto iput_mirr_err_out; } + lockdep_set_class(&NTFS_I(vol->mftbmp_ino)->runlist.lock, + &mftbmp_runlist_lock_key); + lockdep_set_class(&NTFS_I(vol->mftbmp_ino)->mrec_lock, + &mftbmp_mrec_lock_key); /* Read upcase table and setup @vol->upcase and @vol->upcase_len. */ if (!load_and_init_upcase(vol)) goto iput_mftbmp_err_out; @@ -1802,6 +1814,11 @@ static BOOL load_system_files(ntfs_volume *vol) iput(vol->lcnbmp_ino); goto bitmap_failed; } + lockdep_set_class(&NTFS_I(vol->lcnbmp_ino)->runlist.lock, + &lcnbmp_runlist_lock_key); + lockdep_set_class(&NTFS_I(vol->lcnbmp_ino)->mrec_lock, + &lcnbmp_mrec_lock_key); + NInoSetSparseDisabled(NTFS_I(vol->lcnbmp_ino)); if ((vol->nr_clusters + 7) >> 3 > i_size_read(vol->lcnbmp_ino)) { iput(vol->lcnbmp_ino); @@ -2743,6 +2760,17 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) struct inode *tmp_ino; int blocksize, result; + /* + * We do a pretty difficult piece of bootstrap by reading the + * MFT (and other metadata) from disk into memory. We'll only + * release this metadata during umount, so the locking patterns + * observed during bootstrap do not count. So turn off the + * observation of locking patterns (strictly for this context + * only) while mounting NTFS. [The validator is still active + * otherwise, even for this context: it will for example record + * lock class registrations.] + */ + lockdep_off(); ntfs_debug("Entering."); #ifndef NTFS_RW sb->s_flags |= MS_RDONLY; @@ -2754,6 +2782,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) if (!silent) ntfs_error(sb, "Allocation of NTFS volume structure " "failed. Aborting mount..."); + lockdep_on(); return -ENOMEM; } /* Initialize ntfs_volume structure. */ @@ -2940,6 +2969,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) mutex_unlock(&ntfs_lock); sb->s_export_op = &ntfs_export_ops; lock_kernel(); + lockdep_on(); return 0; } ntfs_error(sb, "Failed to allocate root directory."); @@ -3059,6 +3089,7 @@ err_out_now: sb->s_fs_info = NULL; kfree(vol); ntfs_debug("Failed, returning -EINVAL."); + lockdep_on(); return -EINVAL; } diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 28eb3c886034..5567328f1041 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -2203,7 +2203,7 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type, size_t towrite = len; struct buffer_head tmp_bh, *bh; - mutex_lock(&inode->i_mutex); + mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); while (towrite > 0) { tocopy = sb->s_blocksize - offset < towrite ? sb->s_blocksize - offset : towrite; diff --git a/fs/super.c b/fs/super.c index 9b780c42d845..6d4e8174b6db 100644 --- a/fs/super.c +++ b/fs/super.c @@ -53,7 +53,7 @@ DEFINE_SPINLOCK(sb_lock); * Allocates and initializes a new &struct super_block. alloc_super() * returns a pointer new superblock or %NULL if allocation had failed. */ -static struct super_block *alloc_super(void) +static struct super_block *alloc_super(struct file_system_type *type) { struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER); static struct super_operations default_op; @@ -72,6 +72,13 @@ static struct super_block *alloc_super(void) INIT_LIST_HEAD(&s->s_inodes); init_rwsem(&s->s_umount); mutex_init(&s->s_lock); + lockdep_set_class(&s->s_umount, &type->s_umount_key); + /* + * The locking rules for s_lock are up to the + * filesystem. For example ext3fs has different + * lock ordering than usbfs: + */ + lockdep_set_class(&s->s_lock, &type->s_lock_key); down_write(&s->s_umount); s->s_count = S_BIAS; atomic_set(&s->s_active, 1); @@ -295,7 +302,7 @@ retry: } if (!s) { spin_unlock(&sb_lock); - s = alloc_super(); + s = alloc_super(type); if (!s) return ERR_PTR(-ENOMEM); goto retry; diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 19a99726e58d..992ee0b87cc3 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1326,7 +1326,7 @@ static ssize_t ufs_quota_write(struct super_block *sb, int type, size_t towrite = len; struct buffer_head *bh; - mutex_lock(&inode->i_mutex); + mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); while (towrite > 0) { tocopy = sb->s_blocksize - offset < towrite ? sb->s_blocksize - offset : towrite; diff --git a/include/asm-alpha/rwsem.h b/include/asm-alpha/rwsem.h index fafdd4f7010a..1570c0b54336 100644 --- a/include/asm-alpha/rwsem.h +++ b/include/asm-alpha/rwsem.h @@ -36,20 +36,11 @@ struct rw_semaphore { #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) spinlock_t wait_lock; struct list_head wait_list; -#if RWSEM_DEBUG - int debug; -#endif }; -#if RWSEM_DEBUG -#define __RWSEM_DEBUG_INIT , 0 -#else -#define __RWSEM_DEBUG_INIT /* */ -#endif - #define __RWSEM_INITIALIZER(name) \ { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ - LIST_HEAD_INIT((name).wait_list) __RWSEM_DEBUG_INIT } + LIST_HEAD_INIT((name).wait_list) } #define DECLARE_RWSEM(name) \ struct rw_semaphore name = __RWSEM_INITIALIZER(name) @@ -59,9 +50,6 @@ static inline void init_rwsem(struct rw_semaphore *sem) sem->count = RWSEM_UNLOCKED_VALUE; spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->wait_list); -#if RWSEM_DEBUG - sem->debug = 0; -#endif } static inline void __down_read(struct rw_semaphore *sem) diff --git a/include/asm-generic/mutex-null.h b/include/asm-generic/mutex-null.h index 5cf8b7ce0c45..254a126ede5c 100644 --- a/include/asm-generic/mutex-null.h +++ b/include/asm-generic/mutex-null.h @@ -10,15 +10,10 @@ #ifndef _ASM_GENERIC_MUTEX_NULL_H #define _ASM_GENERIC_MUTEX_NULL_H -/* extra parameter only needed for mutex debugging: */ -#ifndef __IP__ -# define __IP__ -#endif - -#define __mutex_fastpath_lock(count, fail_fn) fail_fn(count __RET_IP__) -#define __mutex_fastpath_lock_retval(count, fail_fn) fail_fn(count __RET_IP__) -#define __mutex_fastpath_unlock(count, fail_fn) fail_fn(count __RET_IP__) -#define __mutex_fastpath_trylock(count, fail_fn) fail_fn(count) -#define __mutex_slowpath_needs_to_unlock() 1 +#define __mutex_fastpath_lock(count, fail_fn) fail_fn(count) +#define __mutex_fastpath_lock_retval(count, fail_fn) fail_fn(count) +#define __mutex_fastpath_unlock(count, fail_fn) fail_fn(count) +#define __mutex_fastpath_trylock(count, fail_fn) fail_fn(count) +#define __mutex_slowpath_needs_to_unlock() 1 #endif diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index c74521157461..e160e04290fb 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -7,6 +7,8 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; +#define per_cpu_offset(x) (__per_cpu_offset[x]) + /* Separate out the type, so (int[3], foo) works. */ #define DEFINE_PER_CPU(type, name) \ __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name diff --git a/include/asm-i386/irqflags.h b/include/asm-i386/irqflags.h new file mode 100644 index 000000000000..e1bdb97c07fa --- /dev/null +++ b/include/asm-i386/irqflags.h @@ -0,0 +1,127 @@ +/* + * include/asm-i386/irqflags.h + * + * IRQ flags handling + * + * This file gets included from lowlevel asm headers too, to provide + * wrapped versions of the local_irq_*() APIs, based on the + * raw_local_irq_*() functions from the lowlevel headers. + */ +#ifndef _ASM_IRQFLAGS_H +#define _ASM_IRQFLAGS_H + +#ifndef __ASSEMBLY__ + +static inline unsigned long __raw_local_save_flags(void) +{ + unsigned long flags; + + __asm__ __volatile__( + "pushfl ; popl %0" + : "=g" (flags) + : /* no input */ + ); + + return flags; +} + +#define raw_local_save_flags(flags) \ + do { (flags) = __raw_local_save_flags(); } while (0) + +static inline void raw_local_irq_restore(unsigned long flags) +{ + __asm__ __volatile__( + "pushl %0 ; popfl" + : /* no output */ + :"g" (flags) + :"memory", "cc" + ); +} + +static inline void raw_local_irq_disable(void) +{ + __asm__ __volatile__("cli" : : : "memory"); +} + +static inline void raw_local_irq_enable(void) +{ + __asm__ __volatile__("sti" : : : "memory"); +} + +/* + * Used in the idle loop; sti takes one instruction cycle + * to complete: + */ +static inline void raw_safe_halt(void) +{ + __asm__ __volatile__("sti; hlt" : : : "memory"); +} + +/* + * Used when interrupts are already enabled or to + * shutdown the processor: + */ +static inline void halt(void) +{ + __asm__ __volatile__("hlt": : :"memory"); +} + +static inline int raw_irqs_disabled_flags(unsigned long flags) +{ + return !(flags & (1 << 9)); +} + +static inline int raw_irqs_disabled(void) +{ + unsigned long flags = __raw_local_save_flags(); + + return raw_irqs_disabled_flags(flags); +} + +/* + * For spinlocks, etc: + */ +static inline unsigned long __raw_local_irq_save(void) +{ + unsigned long flags = __raw_local_save_flags(); + + raw_local_irq_disable(); + + return flags; +} + +#define raw_local_irq_save(flags) \ + do { (flags) = __raw_local_irq_save(); } while (0) + +#endif /* __ASSEMBLY__ */ + +/* + * Do the CPU's IRQ-state tracing from assembly code. We call a + * C function, so save all the C-clobbered registers: + */ +#ifdef CONFIG_TRACE_IRQFLAGS + +# define TRACE_IRQS_ON \ + pushl %eax; \ + pushl %ecx; \ + pushl %edx; \ + call trace_hardirqs_on; \ + popl %edx; \ + popl %ecx; \ + popl %eax; + +# define TRACE_IRQS_OFF \ + pushl %eax; \ + pushl %ecx; \ + pushl %edx; \ + call trace_hardirqs_off; \ + popl %edx; \ + popl %ecx; \ + popl %eax; + +#else +# define TRACE_IRQS_ON +# define TRACE_IRQS_OFF +#endif + +#endif diff --git a/include/asm-i386/rwsem.h b/include/asm-i386/rwsem.h index be4ab859238e..2f07601562e7 100644 --- a/include/asm-i386/rwsem.h +++ b/include/asm-i386/rwsem.h @@ -40,6 +40,7 @@ #include <linux/list.h> #include <linux/spinlock.h> +#include <linux/lockdep.h> struct rwsem_waiter; @@ -61,36 +62,34 @@ struct rw_semaphore { #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) spinlock_t wait_lock; struct list_head wait_list; -#if RWSEM_DEBUG - int debug; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; #endif }; -/* - * initialisation - */ -#if RWSEM_DEBUG -#define __RWSEM_DEBUG_INIT , 0 +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } #else -#define __RWSEM_DEBUG_INIT /* */ +# define __RWSEM_DEP_MAP_INIT(lockname) #endif + #define __RWSEM_INITIALIZER(name) \ { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) \ - __RWSEM_DEBUG_INIT } + __RWSEM_DEP_MAP_INIT(name) } #define DECLARE_RWSEM(name) \ struct rw_semaphore name = __RWSEM_INITIALIZER(name) -static inline void init_rwsem(struct rw_semaphore *sem) -{ - sem->count = RWSEM_UNLOCKED_VALUE; - spin_lock_init(&sem->wait_lock); - INIT_LIST_HEAD(&sem->wait_list); -#if RWSEM_DEBUG - sem->debug = 0; -#endif -} +extern void __init_rwsem(struct rw_semaphore *sem, const char *name, + struct lock_class_key *key); + +#define init_rwsem(sem) \ +do { \ + static struct lock_class_key __key; \ + \ + __init_rwsem((sem), #sem, &__key); \ +} while (0) /* * lock for reading @@ -143,7 +142,7 @@ LOCK_PREFIX " cmpxchgl %2,%0\n\t" /* * lock for writing */ -static inline void __down_write(struct rw_semaphore *sem) +static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) { int tmp; @@ -167,6 +166,11 @@ LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtract 0x0000ffff, returns the : "memory", "cc"); } +static inline void __down_write(struct rw_semaphore *sem) +{ + __down_write_nested(sem, 0); +} + /* * trylock for writing -- returns 1 if successful, 0 if contention */ diff --git a/include/asm-i386/spinlock.h b/include/asm-i386/spinlock.h index 04ba30234c48..87c40f830653 100644 --- a/include/asm-i386/spinlock.h +++ b/include/asm-i386/spinlock.h @@ -31,6 +31,11 @@ "jmp 1b\n" \ "3:\n\t" +/* + * NOTE: there's an irqs-on section here, which normally would have to be + * irq-traced, but on CONFIG_TRACE_IRQFLAGS we never use + * __raw_spin_lock_string_flags(). + */ #define __raw_spin_lock_string_flags \ "\n1:\t" \ "lock ; decb %0\n\t" \ @@ -63,6 +68,12 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock) "=m" (lock->slock) : : "memory"); } +/* + * It is easier for the lock validator if interrupts are not re-enabled + * in the middle of a lock-acquire. This is a performance feature anyway + * so we turn it off: + */ +#ifndef CONFIG_PROVE_LOCKING static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags) { alternative_smp( @@ -70,6 +81,7 @@ static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long fla __raw_spin_lock_string_up, "=m" (lock->slock) : "r" (flags) : "memory"); } +#endif static inline int __raw_spin_trylock(raw_spinlock_t *lock) { diff --git a/include/asm-i386/system.h b/include/asm-i386/system.h index cab0180567f9..db398d88b1d9 100644 --- a/include/asm-i386/system.h +++ b/include/asm-i386/system.h @@ -456,25 +456,7 @@ static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long l #define set_wmb(var, value) do { var = value; wmb(); } while (0) -/* interrupt control.. */ -#define local_save_flags(x) do { typecheck(unsigned long,x); __asm__ __volatile__("pushfl ; popl %0":"=g" (x): /* no input */); } while (0) -#define local_irq_restore(x) do { typecheck(unsigned long,x); __asm__ __volatile__("pushl %0 ; popfl": /* no output */ :"g" (x):"memory", "cc"); } while (0) -#define local_irq_disable() __asm__ __volatile__("cli": : :"memory") -#define local_irq_enable() __asm__ __volatile__("sti": : :"memory") -/* used in the idle loop; sti takes one instruction cycle to complete */ -#define safe_halt() __asm__ __volatile__("sti; hlt": : :"memory") -/* used when interrupts are already enabled or to shutdown the processor */ -#define halt() __asm__ __volatile__("hlt": : :"memory") - -#define irqs_disabled() \ -({ \ - unsigned long flags; \ - local_save_flags(flags); \ - !(flags & (1<<9)); \ -}) - -/* For spinlocks etc */ -#define local_irq_save(x) __asm__ __volatile__("pushfl ; popl %0 ; cli":"=g" (x): /* no input */ :"memory") +#include <linux/irqflags.h> /* * disable hlt during certain critical i/o operations diff --git a/include/asm-ia64/irq.h b/include/asm-ia64/irq.h index 8acb00190d5a..79479e2c6966 100644 --- a/include/asm-ia64/irq.h +++ b/include/asm-ia64/irq.h @@ -14,8 +14,6 @@ #define NR_IRQS 256 #define NR_IRQ_VECTORS NR_IRQS -#define IRQF_PERCPU 0x02000000 - static __inline__ int irq_canonicalize (int irq) { diff --git a/include/asm-ia64/percpu.h b/include/asm-ia64/percpu.h index 24d898b650c5..fbe5cf3ab8dc 100644 --- a/include/asm-ia64/percpu.h +++ b/include/asm-ia64/percpu.h @@ -36,6 +36,7 @@ #ifdef CONFIG_SMP extern unsigned long __per_cpu_offset[NR_CPUS]; +#define per_cpu_offset(x) (__per_cpu_offset(x)) /* Equal to __per_cpu_offset[smp_processor_id()], but faster to access: */ DECLARE_PER_CPU(unsigned long, local_per_cpu_offset); diff --git a/include/asm-ia64/rwsem.h b/include/asm-ia64/rwsem.h index 1327c91ea39c..2d1640cc240a 100644 --- a/include/asm-ia64/rwsem.h +++ b/include/asm-ia64/rwsem.h @@ -33,9 +33,6 @@ struct rw_semaphore { signed long count; spinlock_t wait_lock; struct list_head wait_list; -#if RWSEM_DEBUG - int debug; -#endif }; #define RWSEM_UNLOCKED_VALUE __IA64_UL_CONST(0x0000000000000000) @@ -45,19 +42,9 @@ struct rw_semaphore { #define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) -/* - * initialization - */ -#if RWSEM_DEBUG -#define __RWSEM_DEBUG_INIT , 0 -#else -#define __RWSEM_DEBUG_INIT /* */ -#endif - #define __RWSEM_INITIALIZER(name) \ { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ - LIST_HEAD_INIT((name).wait_list) \ - __RWSEM_DEBUG_INIT } + LIST_HEAD_INIT((name).wait_list) } #define DECLARE_RWSEM(name) \ struct rw_semaphore name = __RWSEM_INITIALIZER(name) @@ -73,9 +60,6 @@ init_rwsem (struct rw_semaphore *sem) sem->count = RWSEM_UNLOCKED_VALUE; spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->wait_list); -#if RWSEM_DEBUG - sem->debug = 0; -#endif } /* diff --git a/include/asm-ia64/thread_info.h b/include/asm-ia64/thread_info.h index 8bc9869e5765..8adcde0934ca 100644 --- a/include/asm-ia64/thread_info.h +++ b/include/asm-ia64/thread_info.h @@ -68,7 +68,7 @@ struct thread_info { #define end_of_stack(p) (unsigned long *)((void *)(p) + IA64_RBS_OFFSET) #define __HAVE_ARCH_TASK_STRUCT_ALLOCATOR -#define alloc_task_struct() ((task_t *)__get_free_pages(GFP_KERNEL | __GFP_COMP, KERNEL_STACK_SIZE_ORDER)) +#define alloc_task_struct() ((struct task_struct *)__get_free_pages(GFP_KERNEL | __GFP_COMP, KERNEL_STACK_SIZE_ORDER)) #define free_task_struct(tsk) free_pages((unsigned long) (tsk), KERNEL_STACK_SIZE_ORDER) #endif /* !__ASSEMBLY */ diff --git a/include/asm-m32r/system.h b/include/asm-m32r/system.h index 66c4742f09e7..311cebf44eff 100644 --- a/include/asm-m32r/system.h +++ b/include/asm-m32r/system.h @@ -18,7 +18,7 @@ * switch_to(prev, next) should switch from task `prev' to `next' * `prev' will never be the same as `next'. * - * `next' and `prev' should be task_t, but it isn't always defined + * `next' and `prev' should be struct task_struct, but it isn't always defined */ #define switch_to(prev, next, last) do { \ diff --git a/include/asm-powerpc/i8259.h b/include/asm-powerpc/i8259.h index 0392159e16e4..c80e113052cd 100644 --- a/include/asm-powerpc/i8259.h +++ b/include/asm-powerpc/i8259.h @@ -4,11 +4,13 @@ #include <linux/irq.h> -extern struct hw_interrupt_type i8259_pic; - +#ifdef CONFIG_PPC_MERGE +extern void i8259_init(struct device_node *node, unsigned long intack_addr); +extern unsigned int i8259_irq(struct pt_regs *regs); +#else extern void i8259_init(unsigned long intack_addr, int offset); extern int i8259_irq(struct pt_regs *regs); -extern int i8259_irq_cascade(struct pt_regs *regs, void *unused); +#endif #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_I8259_H */ diff --git a/include/asm-powerpc/irq.h b/include/asm-powerpc/irq.h index eb5f33e1977a..e05754752028 100644 --- a/include/asm-powerpc/irq.h +++ b/include/asm-powerpc/irq.h @@ -9,26 +9,14 @@ * 2 of the License, or (at your option) any later version. */ +#include <linux/config.h> #include <linux/threads.h> +#include <linux/list.h> +#include <linux/radix-tree.h> #include <asm/types.h> #include <asm/atomic.h> -/* this number is used when no interrupt has been assigned */ -#define NO_IRQ (-1) - -/* - * These constants are used for passing information about interrupt - * signal polarity and level/edge sensing to the low-level PIC chip - * drivers. - */ -#define IRQ_SENSE_MASK 0x1 -#define IRQ_SENSE_LEVEL 0x1 /* interrupt on active level */ -#define IRQ_SENSE_EDGE 0x0 /* interrupt triggered by edge */ - -#define IRQ_POLARITY_MASK 0x2 -#define IRQ_POLARITY_POSITIVE 0x2 /* high level or low->high edge */ -#define IRQ_POLARITY_NEGATIVE 0x0 /* low level or high->low edge */ #define get_irq_desc(irq) (&irq_desc[(irq)]) @@ -36,50 +24,325 @@ #define for_each_irq(i) \ for ((i) = 0; (i) < NR_IRQS; ++(i)) -#ifdef CONFIG_PPC64 +extern atomic_t ppc_n_lost_interrupts; -/* - * Maximum number of interrupt sources that we can handle. +#ifdef CONFIG_PPC_MERGE + +/* This number is used when no interrupt has been assigned */ +#define NO_IRQ (0) + +/* This is a special irq number to return from get_irq() to tell that + * no interrupt happened _and_ ignore it (don't count it as bad). Some + * platforms like iSeries rely on that. */ +#define NO_IRQ_IGNORE ((unsigned int)-1) + +/* Total number of virq in the platform (make it a CONFIG_* option ? */ #define NR_IRQS 512 -/* Interrupt numbers are virtual in case they are sparsely - * distributed by the hardware. +/* Number of irqs reserved for the legacy controller */ +#define NUM_ISA_INTERRUPTS 16 + +/* This type is the placeholder for a hardware interrupt number. It has to + * be big enough to enclose whatever representation is used by a given + * platform. + */ +typedef unsigned long irq_hw_number_t; + +/* Interrupt controller "host" data structure. This could be defined as a + * irq domain controller. That is, it handles the mapping between hardware + * and virtual interrupt numbers for a given interrupt domain. The host + * structure is generally created by the PIC code for a given PIC instance + * (though a host can cover more than one PIC if they have a flat number + * model). It's the host callbacks that are responsible for setting the + * irq_chip on a given irq_desc after it's been mapped. + * + * The host code and data structures are fairly agnostic to the fact that + * we use an open firmware device-tree. We do have references to struct + * device_node in two places: in irq_find_host() to find the host matching + * a given interrupt controller node, and of course as an argument to its + * counterpart host->ops->match() callback. However, those are treated as + * generic pointers by the core and the fact that it's actually a device-node + * pointer is purely a convention between callers and implementation. This + * code could thus be used on other architectures by replacing those two + * by some sort of arch-specific void * "token" used to identify interrupt + * controllers. */ -extern unsigned int virt_irq_to_real_map[NR_IRQS]; +struct irq_host; +struct radix_tree_root; -/* The maximum virtual IRQ number that we support. This - * can be set by the platform and will be reduced by the - * value of __irq_offset_value. It defaults to and is - * capped by (NR_IRQS - 1). +/* Functions below are provided by the host and called whenever a new mapping + * is created or an old mapping is disposed. The host can then proceed to + * whatever internal data structures management is required. It also needs + * to setup the irq_desc when returning from map(). */ -extern unsigned int virt_irq_max; +struct irq_host_ops { + /* Match an interrupt controller device node to a host, returns + * 1 on a match + */ + int (*match)(struct irq_host *h, struct device_node *node); + + /* Create or update a mapping between a virtual irq number and a hw + * irq number. This can be called several times for the same mapping + * but with different flags, though unmap shall always be called + * before the virq->hw mapping is changed. + */ + int (*map)(struct irq_host *h, unsigned int virq, + irq_hw_number_t hw, unsigned int flags); + + /* Dispose of such a mapping */ + void (*unmap)(struct irq_host *h, unsigned int virq); + + /* Translate device-tree interrupt specifier from raw format coming + * from the firmware to a irq_hw_number_t (interrupt line number) and + * trigger flags that can be passed to irq_create_mapping(). + * If no translation is provided, raw format is assumed to be one cell + * for interrupt line and default sense. + */ + int (*xlate)(struct irq_host *h, struct device_node *ctrler, + u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_flags); +}; + +struct irq_host { + struct list_head link; + + /* type of reverse mapping technique */ + unsigned int revmap_type; +#define IRQ_HOST_MAP_LEGACY 0 /* legacy 8259, gets irqs 1..15 */ +#define IRQ_HOST_MAP_NOMAP 1 /* no fast reverse mapping */ +#define IRQ_HOST_MAP_LINEAR 2 /* linear map of interrupts */ +#define IRQ_HOST_MAP_TREE 3 /* radix tree */ + union { + struct { + unsigned int size; + unsigned int *revmap; + } linear; + struct radix_tree_root tree; + } revmap_data; + struct irq_host_ops *ops; + void *host_data; + irq_hw_number_t inval_irq; +}; + +/* The main irq map itself is an array of NR_IRQ entries containing the + * associate host and irq number. An entry with a host of NULL is free. + * An entry can be allocated if it's free, the allocator always then sets + * hwirq first to the host's invalid irq number and then fills ops. + */ +struct irq_map_entry { + irq_hw_number_t hwirq; + struct irq_host *host; +}; + +extern struct irq_map_entry irq_map[NR_IRQS]; + -/* Create a mapping for a real_irq if it doesn't already exist. - * Return the virtual irq as a convenience. +/*** + * irq_alloc_host - Allocate a new irq_host data structure + * @node: device-tree node of the interrupt controller + * @revmap_type: type of reverse mapping to use + * @revmap_arg: for IRQ_HOST_MAP_LINEAR linear only: size of the map + * @ops: map/unmap host callbacks + * @inval_irq: provide a hw number in that host space that is always invalid + * + * Allocates and initialize and irq_host structure. Note that in the case of + * IRQ_HOST_MAP_LEGACY, the map() callback will be called before this returns + * for all legacy interrupts except 0 (which is always the invalid irq for + * a legacy controller). For a IRQ_HOST_MAP_LINEAR, the map is allocated by + * this call as well. For a IRQ_HOST_MAP_TREE, the radix tree will be allocated + * later during boot automatically (the reverse mapping will use the slow path + * until that happens). + */ +extern struct irq_host *irq_alloc_host(unsigned int revmap_type, + unsigned int revmap_arg, + struct irq_host_ops *ops, + irq_hw_number_t inval_irq); + + +/*** + * irq_find_host - Locates a host for a given device node + * @node: device-tree node of the interrupt controller + */ +extern struct irq_host *irq_find_host(struct device_node *node); + + +/*** + * irq_set_default_host - Set a "default" host + * @host: default host pointer + * + * For convenience, it's possible to set a "default" host that will be used + * whenever NULL is passed to irq_create_mapping(). It makes life easier for + * platforms that want to manipulate a few hard coded interrupt numbers that + * aren't properly represented in the device-tree. + */ +extern void irq_set_default_host(struct irq_host *host); + + +/*** + * irq_set_virq_count - Set the maximum number of virt irqs + * @count: number of linux virtual irqs, capped with NR_IRQS + * + * This is mainly for use by platforms like iSeries who want to program + * the virtual irq number in the controller to avoid the reverse mapping + */ +extern void irq_set_virq_count(unsigned int count); + + +/*** + * irq_create_mapping - Map a hardware interrupt into linux virq space + * @host: host owning this hardware interrupt or NULL for default host + * @hwirq: hardware irq number in that host space + * @flags: flags passed to the controller. contains the trigger type among + * others. Use IRQ_TYPE_* defined in include/linux/irq.h + * + * Only one mapping per hardware interrupt is permitted. Returns a linux + * virq number. The flags can be used to provide sense information to the + * controller (typically extracted from the device-tree). If no information + * is passed, the controller defaults will apply (for example, xics can only + * do edge so flags are irrelevant for some pseries specific irqs). + * + * The device-tree generally contains the trigger info in an encoding that is + * specific to a given type of controller. In that case, you can directly use + * host->ops->trigger_xlate() to translate that. + * + * It is recommended that new PICs that don't have existing OF bindings chose + * to use a representation of triggers identical to linux. + */ +extern unsigned int irq_create_mapping(struct irq_host *host, + irq_hw_number_t hwirq, + unsigned int flags); + + +/*** + * irq_dispose_mapping - Unmap an interrupt + * @virq: linux virq number of the interrupt to unmap + */ +extern void irq_dispose_mapping(unsigned int virq); + +/*** + * irq_find_mapping - Find a linux virq from an hw irq number. + * @host: host owning this hardware interrupt + * @hwirq: hardware irq number in that host space + * + * This is a slow path, for use by generic code. It's expected that an + * irq controller implementation directly calls the appropriate low level + * mapping function. */ -int virt_irq_create_mapping(unsigned int real_irq); -void virt_irq_init(void); +extern unsigned int irq_find_mapping(struct irq_host *host, + irq_hw_number_t hwirq); -static inline unsigned int virt_irq_to_real(unsigned int virt_irq) + +/*** + * irq_radix_revmap - Find a linux virq from a hw irq number. + * @host: host owning this hardware interrupt + * @hwirq: hardware irq number in that host space + * + * This is a fast path, for use by irq controller code that uses radix tree + * revmaps + */ +extern unsigned int irq_radix_revmap(struct irq_host *host, + irq_hw_number_t hwirq); + +/*** + * irq_linear_revmap - Find a linux virq from a hw irq number. + * @host: host owning this hardware interrupt + * @hwirq: hardware irq number in that host space + * + * This is a fast path, for use by irq controller code that uses linear + * revmaps. It does fallback to the slow path if the revmap doesn't exist + * yet and will create the revmap entry with appropriate locking + */ + +extern unsigned int irq_linear_revmap(struct irq_host *host, + irq_hw_number_t hwirq); + + + +/*** + * irq_alloc_virt - Allocate virtual irq numbers + * @host: host owning these new virtual irqs + * @count: number of consecutive numbers to allocate + * @hint: pass a hint number, the allocator will try to use a 1:1 mapping + * + * This is a low level function that is used internally by irq_create_mapping() + * and that can be used by some irq controllers implementations for things + * like allocating ranges of numbers for MSIs. The revmaps are left untouched. + */ +extern unsigned int irq_alloc_virt(struct irq_host *host, + unsigned int count, + unsigned int hint); + +/*** + * irq_free_virt - Free virtual irq numbers + * @virq: virtual irq number of the first interrupt to free + * @count: number of interrupts to free + * + * This function is the opposite of irq_alloc_virt. It will not clear reverse + * maps, this should be done previously by unmap'ing the interrupt. In fact, + * all interrupts covered by the range being freed should have been unmapped + * prior to calling this. + */ +extern void irq_free_virt(unsigned int virq, unsigned int count); + + +/* -- OF helpers -- */ + +/* irq_create_of_mapping - Map a hardware interrupt into linux virq space + * @controller: Device node of the interrupt controller + * @inspec: Interrupt specifier from the device-tree + * @intsize: Size of the interrupt specifier from the device-tree + * + * This function is identical to irq_create_mapping except that it takes + * as input informations straight from the device-tree (typically the results + * of the of_irq_map_*() functions + */ +extern unsigned int irq_create_of_mapping(struct device_node *controller, + u32 *intspec, unsigned int intsize); + + +/* irq_of_parse_and_map - Parse nad Map an interrupt into linux virq space + * @device: Device node of the device whose interrupt is to be mapped + * @index: Index of the interrupt to map + * + * This function is a wrapper that chains of_irq_map_one() and + * irq_create_of_mapping() to make things easier to callers + */ +extern unsigned int irq_of_parse_and_map(struct device_node *dev, int index); + +/* -- End OF helpers -- */ + +/*** + * irq_early_init - Init irq remapping subsystem + */ +extern void irq_early_init(void); + +static __inline__ int irq_canonicalize(int irq) { - return virt_irq_to_real_map[virt_irq]; + return irq; } -extern unsigned int real_irq_to_virt_slowpath(unsigned int real_irq); + +#else /* CONFIG_PPC_MERGE */ + +/* This number is used when no interrupt has been assigned */ +#define NO_IRQ (-1) +#define NO_IRQ_IGNORE (-2) + /* - * List of interrupt controllers. + * These constants are used for passing information about interrupt + * signal polarity and level/edge sensing to the low-level PIC chip + * drivers. */ -#define IC_INVALID 0 -#define IC_OPEN_PIC 1 -#define IC_PPC_XIC 2 -#define IC_CELL_PIC 3 -#define IC_ISERIES 4 +#define IRQ_SENSE_MASK 0x1 +#define IRQ_SENSE_LEVEL 0x1 /* interrupt on active level */ +#define IRQ_SENSE_EDGE 0x0 /* interrupt triggered by edge */ -extern u64 ppc64_interrupt_controller; +#define IRQ_POLARITY_MASK 0x2 +#define IRQ_POLARITY_POSITIVE 0x2 /* high level or low->high edge */ +#define IRQ_POLARITY_NEGATIVE 0x0 /* low level or high->low edge */ -#else /* 32-bit */ #if defined(CONFIG_40x) #include <asm/ibm4xx.h> @@ -512,16 +775,11 @@ extern u64 ppc64_interrupt_controller; #endif /* CONFIG_8260 */ -#endif +#endif /* Whatever way too big #ifdef */ #define NR_MASK_WORDS ((NR_IRQS + 31) / 32) /* pedantic: these are long because they are used with set_bit --RR */ extern unsigned long ppc_cached_irq_mask[NR_MASK_WORDS]; -extern atomic_t ppc_n_lost_interrupts; - -#define virt_irq_create_mapping(x) (x) - -#endif /* * Because many systems have two overlapping names spaces for @@ -560,6 +818,7 @@ static __inline__ int irq_canonicalize(int irq) irq = 9; return irq; } +#endif /* CONFIG_PPC_MERGE */ extern int distribute_irqs; @@ -579,9 +838,8 @@ extern struct thread_info *softirq_ctx[NR_CPUS]; extern void irq_ctx_init(void); extern void call_do_softirq(struct thread_info *tp); -extern int call___do_IRQ(int irq, struct pt_regs *regs, - struct thread_info *tp); - +extern int call_handle_irq(int irq, void *p1, void *p2, + struct thread_info *tp, void *func); #else #define irq_ctx_init() diff --git a/include/asm-powerpc/irqflags.h b/include/asm-powerpc/irqflags.h new file mode 100644 index 000000000000..7970cbaeaa54 --- /dev/null +++ b/include/asm-powerpc/irqflags.h @@ -0,0 +1,31 @@ +/* + * include/asm-powerpc/irqflags.h + * + * IRQ flags handling + * + * This file gets included from lowlevel asm headers too, to provide + * wrapped versions of the local_irq_*() APIs, based on the + * raw_local_irq_*() macros from the lowlevel headers. + */ +#ifndef _ASM_IRQFLAGS_H +#define _ASM_IRQFLAGS_H + +/* + * Get definitions for raw_local_save_flags(x), etc. + */ +#include <asm-powerpc/hw_irq.h> + +/* + * Do the CPU's IRQ-state tracing from assembly code. We call a + * C function, so save all the C-clobbered registers: + */ +#ifdef CONFIG_TRACE_IRQFLAGS + +#error No support on PowerPC yet for CONFIG_TRACE_IRQFLAGS + +#else +# define TRACE_IRQS_ON +# define TRACE_IRQS_OFF +#endif + +#endif diff --git a/include/asm-powerpc/machdep.h b/include/asm-powerpc/machdep.h index eba133d149a7..c17c13742401 100644 --- a/include/asm-powerpc/machdep.h +++ b/include/asm-powerpc/machdep.h @@ -97,7 +97,7 @@ struct machdep_calls { void (*show_percpuinfo)(struct seq_file *m, int i); void (*init_IRQ)(void); - int (*get_irq)(struct pt_regs *); + unsigned int (*get_irq)(struct pt_regs *); #ifdef CONFIG_KEXEC void (*kexec_cpu_down)(int crash_shutdown, int secondary); #endif diff --git a/include/asm-powerpc/mpic.h b/include/asm-powerpc/mpic.h index f0d22ac34b96..eb241c99c457 100644 --- a/include/asm-powerpc/mpic.h +++ b/include/asm-powerpc/mpic.h @@ -114,9 +114,6 @@ #define MPIC_VEC_TIMER_1 248 #define MPIC_VEC_TIMER_0 247 -/* Type definition of the cascade handler */ -typedef int (*mpic_cascade_t)(struct pt_regs *regs, void *data); - #ifdef CONFIG_MPIC_BROKEN_U3 /* Fixup table entry */ struct mpic_irq_fixup @@ -132,10 +129,19 @@ struct mpic_irq_fixup /* The instance data of a given MPIC */ struct mpic { + /* The device node of the interrupt controller */ + struct device_node *of_node; + + /* The remapper for this MPIC */ + struct irq_host *irqhost; + /* The "linux" controller struct */ - hw_irq_controller hc_irq; + struct irq_chip hc_irq; +#ifdef CONFIG_MPIC_BROKEN_U3 + struct irq_chip hc_ht_irq; +#endif #ifdef CONFIG_SMP - hw_irq_controller hc_ipi; + struct irq_chip hc_ipi; #endif const char *name; /* Flags */ @@ -144,20 +150,12 @@ struct mpic unsigned int isu_size; unsigned int isu_shift; unsigned int isu_mask; - /* Offset of irq vector numbers */ - unsigned int irq_offset; unsigned int irq_count; - /* Offset of ipi vector numbers */ - unsigned int ipi_offset; /* Number of sources */ unsigned int num_sources; /* Number of CPUs */ unsigned int num_cpus; - /* cascade handler */ - mpic_cascade_t cascade; - void *cascade_data; - unsigned int cascade_vec; - /* senses array */ + /* default senses array */ unsigned char *senses; unsigned int senses_count; @@ -213,14 +211,11 @@ struct mpic * The values in the array start at the first source of the MPIC, * that is senses[0] correspond to linux irq "irq_offset". */ -extern struct mpic *mpic_alloc(unsigned long phys_addr, +extern struct mpic *mpic_alloc(struct device_node *node, + unsigned long phys_addr, unsigned int flags, unsigned int isu_size, - unsigned int irq_offset, unsigned int irq_count, - unsigned int ipi_offset, - unsigned char *senses, - unsigned int senses_num, const char *name); /* Assign ISUs, to call before mpic_init() @@ -232,22 +227,27 @@ extern struct mpic *mpic_alloc(unsigned long phys_addr, extern void mpic_assign_isu(struct mpic *mpic, unsigned int isu_num, unsigned long phys_addr); +/* Set default sense codes + * + * @mpic: controller + * @senses: array of sense codes + * @count: size of above array + * + * Optionally provide an array (indexed on hardware interrupt numbers + * for this MPIC) of default sense codes for the chip. Those are linux + * sense codes IRQ_TYPE_* + * + * The driver gets ownership of the pointer, don't dispose of it or + * anything like that. __init only. + */ +extern void mpic_set_default_senses(struct mpic *mpic, u8 *senses, int count); + + /* Initialize the controller. After this has been called, none of the above * should be called again for this mpic */ extern void mpic_init(struct mpic *mpic); -/* Setup a cascade. Currently, only one cascade is supported this - * way, though you can always do a normal request_irq() and add - * other cascades this way. You should call this _after_ having - * added all the ISUs - * - * @irq_no: "linux" irq number of the cascade (that is offset'ed vector) - * @handler: cascade handler function - */ -extern void mpic_setup_cascade(unsigned int irq_no, mpic_cascade_t hanlder, - void *data); - /* * All of the following functions must only be used after the * ISUs have been assigned and the controller fully initialized @@ -284,9 +284,9 @@ extern void mpic_send_ipi(unsigned int ipi_no, unsigned int cpu_mask); void smp_mpic_message_pass(int target, int msg); /* Fetch interrupt from a given mpic */ -extern int mpic_get_one_irq(struct mpic *mpic, struct pt_regs *regs); +extern unsigned int mpic_get_one_irq(struct mpic *mpic, struct pt_regs *regs); /* This one gets to the primary mpic */ -extern int mpic_get_irq(struct pt_regs *regs); +extern unsigned int mpic_get_irq(struct pt_regs *regs); /* Set the EPIC clock ratio */ void mpic_set_clk_ratio(struct mpic *mpic, u32 clock_ratio); @@ -294,8 +294,5 @@ void mpic_set_clk_ratio(struct mpic *mpic, u32 clock_ratio); /* Enable/Disable EPIC serial interrupt mode */ void mpic_set_serial_int(struct mpic *mpic, int enable); -/* global mpic for pSeries */ -extern struct mpic *pSeries_mpic; - #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_MPIC_H */ diff --git a/include/asm-powerpc/percpu.h b/include/asm-powerpc/percpu.h index faa1fc703053..2f2e3024fa61 100644 --- a/include/asm-powerpc/percpu.h +++ b/include/asm-powerpc/percpu.h @@ -14,6 +14,7 @@ #define __per_cpu_offset(cpu) (paca[cpu].data_offset) #define __my_cpu_offset() get_paca()->data_offset +#define per_cpu_offset(x) (__per_cpu_offset(x)) /* Separate out the type, so (int[3], foo) works. */ #define DEFINE_PER_CPU(type, name) \ diff --git a/include/asm-powerpc/prom.h b/include/asm-powerpc/prom.h index 010d186d095b..b095a285c84b 100644 --- a/include/asm-powerpc/prom.h +++ b/include/asm-powerpc/prom.h @@ -64,11 +64,6 @@ struct boot_param_header typedef u32 phandle; typedef u32 ihandle; -struct interrupt_info { - int line; - int sense; /* +ve/-ve logic, edge or level, etc. */ -}; - struct property { char *name; int length; @@ -81,8 +76,6 @@ struct device_node { char *type; phandle node; phandle linux_phandle; - int n_intrs; - struct interrupt_info *intrs; char *full_name; struct property *properties; @@ -167,8 +160,8 @@ extern void unflatten_device_tree(void); extern void early_init_devtree(void *); extern int device_is_compatible(struct device_node *device, const char *); extern int machine_is_compatible(const char *compat); -extern unsigned char *get_property(struct device_node *node, const char *name, - int *lenp); +extern void *get_property(struct device_node *node, const char *name, + int *lenp); extern void print_properties(struct device_node *node); extern int prom_n_addr_cells(struct device_node* np); extern int prom_n_size_cells(struct device_node* np); @@ -204,6 +197,15 @@ extern int release_OF_resource(struct device_node* node, int index); */ +/* Helper to read a big number */ +static inline u64 of_read_number(u32 *cell, int size) +{ + u64 r = 0; + while (size--) + r = (r << 32) | *(cell++); + return r; +} + /* Translate an OF address block into a CPU physical address */ #define OF_BAD_ADDR ((u64)-1) @@ -240,5 +242,83 @@ extern void kdump_move_device_tree(void); /* CPU OF node matching */ struct device_node *of_get_cpu_node(int cpu, unsigned int *thread); + +/* + * OF interrupt mapping + */ + +/* This structure is returned when an interrupt is mapped. The controller + * field needs to be put() after use + */ + +#define OF_MAX_IRQ_SPEC 4 /* We handle specifiers of at most 4 cells */ + +struct of_irq { + struct device_node *controller; /* Interrupt controller node */ + u32 size; /* Specifier size */ + u32 specifier[OF_MAX_IRQ_SPEC]; /* Specifier copy */ +}; + +/*** + * of_irq_map_init - Initialize the irq remapper + * @flags: flags defining workarounds to enable + * + * Some machines have bugs in the device-tree which require certain workarounds + * to be applied. Call this before any interrupt mapping attempts to enable + * those workarounds. + */ +#define OF_IMAP_OLDWORLD_MAC 0x00000001 +#define OF_IMAP_NO_PHANDLE 0x00000002 + +extern void of_irq_map_init(unsigned int flags); + +/*** + * of_irq_map_raw - Low level interrupt tree parsing + * @parent: the device interrupt parent + * @intspec: interrupt specifier ("interrupts" property of the device) + * @addr: address specifier (start of "reg" property of the device) + * @out_irq: structure of_irq filled by this function + * + * Returns 0 on success and a negative number on error + * + * This function is a low-level interrupt tree walking function. It + * can be used to do a partial walk with synthetized reg and interrupts + * properties, for example when resolving PCI interrupts when no device + * node exist for the parent. + * + */ + +extern int of_irq_map_raw(struct device_node *parent, u32 *intspec, u32 *addr, + struct of_irq *out_irq); + + +/*** + * of_irq_map_one - Resolve an interrupt for a device + * @device: the device whose interrupt is to be resolved + * @index: index of the interrupt to resolve + * @out_irq: structure of_irq filled by this function + * + * This function resolves an interrupt, walking the tree, for a given + * device-tree node. It's the high level pendant to of_irq_map_raw(). + * It also implements the workarounds for OldWolrd Macs. + */ +extern int of_irq_map_one(struct device_node *device, int index, + struct of_irq *out_irq); + +/*** + * of_irq_map_pci - Resolve the interrupt for a PCI device + * @pdev: the device whose interrupt is to be resolved + * @out_irq: structure of_irq filled by this function + * + * This function resolves the PCI interrupt for a given PCI device. If a + * device-node exists for a given pci_dev, it will use normal OF tree + * walking. If not, it will implement standard swizzling and walk up the + * PCI tree until an device-node is found, at which point it will finish + * resolving using the OF tree walking. + */ +struct pci_dev; +extern int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq); + + #endif /* __KERNEL__ */ #endif /* _POWERPC_PROM_H */ diff --git a/include/asm-powerpc/rwsem.h b/include/asm-powerpc/rwsem.h index 2c2fe9647595..e929145e1e46 100644 --- a/include/asm-powerpc/rwsem.h +++ b/include/asm-powerpc/rwsem.h @@ -28,24 +28,11 @@ struct rw_semaphore { #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) spinlock_t wait_lock; struct list_head wait_list; -#if RWSEM_DEBUG - int debug; -#endif }; -/* - * initialisation - */ -#if RWSEM_DEBUG -#define __RWSEM_DEBUG_INIT , 0 -#else -#define __RWSEM_DEBUG_INIT /* */ -#endif - #define __RWSEM_INITIALIZER(name) \ { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ - LIST_HEAD_INIT((name).wait_list) \ - __RWSEM_DEBUG_INIT } + LIST_HEAD_INIT((name).wait_list) } #define DECLARE_RWSEM(name) \ struct rw_semaphore name = __RWSEM_INITIALIZER(name) @@ -60,9 +47,6 @@ static inline void init_rwsem(struct rw_semaphore *sem) sem->count = RWSEM_UNLOCKED_VALUE; spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->wait_list); -#if RWSEM_DEBUG - sem->debug = 0; -#endif } /* diff --git a/include/asm-powerpc/spu.h b/include/asm-powerpc/spu.h index 9609d3ee8798..c02d105d8294 100644 --- a/include/asm-powerpc/spu.h +++ b/include/asm-powerpc/spu.h @@ -117,6 +117,7 @@ struct spu { struct list_head sched_list; int number; int nid; + unsigned int irqs[3]; u32 isrc; u32 node; u64 flags; diff --git a/include/asm-s390/irqflags.h b/include/asm-s390/irqflags.h new file mode 100644 index 000000000000..65f4db627e7a --- /dev/null +++ b/include/asm-s390/irqflags.h @@ -0,0 +1,50 @@ +/* + * include/asm-s390/irqflags.h + * + * Copyright (C) IBM Corp. 2006 + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> + */ + +#ifndef __ASM_IRQFLAGS_H +#define __ASM_IRQFLAGS_H + +#ifdef __KERNEL__ + +/* interrupt control.. */ +#define raw_local_irq_enable() ({ \ + unsigned long __dummy; \ + __asm__ __volatile__ ( \ + "stosm 0(%1),0x03" \ + : "=m" (__dummy) : "a" (&__dummy) : "memory" ); \ + }) + +#define raw_local_irq_disable() ({ \ + unsigned long __flags; \ + __asm__ __volatile__ ( \ + "stnsm 0(%1),0xfc" : "=m" (__flags) : "a" (&__flags) ); \ + __flags; \ + }) + +#define raw_local_save_flags(x) \ + __asm__ __volatile__("stosm 0(%1),0" : "=m" (x) : "a" (&x), "m" (x) ) + +#define raw_local_irq_restore(x) \ + __asm__ __volatile__("ssm 0(%0)" : : "a" (&x), "m" (x) : "memory") + +#define raw_irqs_disabled() \ +({ \ + unsigned long flags; \ + local_save_flags(flags); \ + !((flags >> __FLAG_SHIFT) & 3); \ +}) + +static inline int raw_irqs_disabled_flags(unsigned long flags) +{ + return !((flags >> __FLAG_SHIFT) & 3); +} + +/* For spinlocks etc */ +#define raw_local_irq_save(x) ((x) = raw_local_irq_disable()) + +#endif /* __KERNEL__ */ +#endif /* __ASM_IRQFLAGS_H */ diff --git a/include/asm-s390/percpu.h b/include/asm-s390/percpu.h index d9a8cca9b653..28b3517e787c 100644 --- a/include/asm-s390/percpu.h +++ b/include/asm-s390/percpu.h @@ -42,6 +42,7 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; #define __get_cpu_var(var) __reloc_hide(var,S390_lowcore.percpu_offset) #define __raw_get_cpu_var(var) __reloc_hide(var,S390_lowcore.percpu_offset) #define per_cpu(var,cpu) __reloc_hide(var,__per_cpu_offset[cpu]) +#define per_cpu_offset(x) (__per_cpu_offset[x]) /* A macro to avoid #include hell... */ #define percpu_modcopy(pcpudst, src, size) \ diff --git a/include/asm-s390/rwsem.h b/include/asm-s390/rwsem.h index 0422a085dd56..13ec16965150 100644 --- a/include/asm-s390/rwsem.h +++ b/include/asm-s390/rwsem.h @@ -61,6 +61,9 @@ struct rw_semaphore { signed long count; spinlock_t wait_lock; struct list_head wait_list; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif }; #ifndef __s390x__ @@ -80,8 +83,16 @@ struct rw_semaphore { /* * initialisation */ + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } +#else +# define __RWSEM_DEP_MAP_INIT(lockname) +#endif + #define __RWSEM_INITIALIZER(name) \ -{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) } +{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) \ + __RWSEM_DEP_MAP_INIT(name) } #define DECLARE_RWSEM(name) \ struct rw_semaphore name = __RWSEM_INITIALIZER(name) @@ -93,6 +104,17 @@ static inline void init_rwsem(struct rw_semaphore *sem) INIT_LIST_HEAD(&sem->wait_list); } +extern void __init_rwsem(struct rw_semaphore *sem, const char *name, + struct lock_class_key *key); + +#define init_rwsem(sem) \ +do { \ + static struct lock_class_key __key; \ + \ + __init_rwsem((sem), #sem, &__key); \ +} while (0) + + /* * lock for reading */ @@ -155,7 +177,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) /* * lock for writing */ -static inline void __down_write(struct rw_semaphore *sem) +static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) { signed long old, new, tmp; @@ -181,6 +203,11 @@ static inline void __down_write(struct rw_semaphore *sem) rwsem_down_write_failed(sem); } +static inline void __down_write(struct rw_semaphore *sem) +{ + __down_write_nested(sem, 0); +} + /* * trylock for writing -- returns 1 if successful, 0 if contention */ diff --git a/include/asm-s390/semaphore.h b/include/asm-s390/semaphore.h index 702cf436698c..32cdc69f39f4 100644 --- a/include/asm-s390/semaphore.h +++ b/include/asm-s390/semaphore.h @@ -37,7 +37,8 @@ struct semaphore { static inline void sema_init (struct semaphore *sem, int val) { - *sem = (struct semaphore) __SEMAPHORE_INITIALIZER((*sem),val); + atomic_set(&sem->count, val); + init_waitqueue_head(&sem->wait); } static inline void init_MUTEX (struct semaphore *sem) diff --git a/include/asm-s390/system.h b/include/asm-s390/system.h index 71a0732cd518..9ab186ffde23 100644 --- a/include/asm-s390/system.h +++ b/include/asm-s390/system.h @@ -301,34 +301,6 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size) #define set_mb(var, value) do { var = value; mb(); } while (0) #define set_wmb(var, value) do { var = value; wmb(); } while (0) -/* interrupt control.. */ -#define local_irq_enable() ({ \ - unsigned long __dummy; \ - __asm__ __volatile__ ( \ - "stosm 0(%1),0x03" \ - : "=m" (__dummy) : "a" (&__dummy) : "memory" ); \ - }) - -#define local_irq_disable() ({ \ - unsigned long __flags; \ - __asm__ __volatile__ ( \ - "stnsm 0(%1),0xfc" : "=m" (__flags) : "a" (&__flags) ); \ - __flags; \ - }) - -#define local_save_flags(x) \ - __asm__ __volatile__("stosm 0(%1),0" : "=m" (x) : "a" (&x), "m" (x) ) - -#define local_irq_restore(x) \ - __asm__ __volatile__("ssm 0(%0)" : : "a" (&x), "m" (x) : "memory") - -#define irqs_disabled() \ -({ \ - unsigned long flags; \ - local_save_flags(flags); \ - !((flags >> __FLAG_SHIFT) & 3); \ -}) - #ifdef __s390x__ #define __ctl_load(array, low, high) ({ \ @@ -442,8 +414,7 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size) }) #endif /* __s390x__ */ -/* For spinlocks etc */ -#define local_irq_save(x) ((x) = local_irq_disable()) +#include <linux/irqflags.h> /* * Use to set psw mask except for the first byte which @@ -482,4 +453,3 @@ extern void (*_machine_power_off)(void); #endif /* __KERNEL__ */ #endif - diff --git a/include/asm-sh/rwsem.h b/include/asm-sh/rwsem.h index 0262d3d1e5e0..9d2aea5e8488 100644 --- a/include/asm-sh/rwsem.h +++ b/include/asm-sh/rwsem.h @@ -25,24 +25,11 @@ struct rw_semaphore { #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) spinlock_t wait_lock; struct list_head wait_list; -#if RWSEM_DEBUG - int debug; -#endif }; -/* - * initialisation - */ -#if RWSEM_DEBUG -#define __RWSEM_DEBUG_INIT , 0 -#else -#define __RWSEM_DEBUG_INIT /* */ -#endif - #define __RWSEM_INITIALIZER(name) \ { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ - LIST_HEAD_INIT((name).wait_list) \ - __RWSEM_DEBUG_INIT } + LIST_HEAD_INIT((name).wait_list) } #define DECLARE_RWSEM(name) \ struct rw_semaphore name = __RWSEM_INITIALIZER(name) @@ -57,9 +44,6 @@ static inline void init_rwsem(struct rw_semaphore *sem) sem->count = RWSEM_UNLOCKED_VALUE; spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->wait_list); -#if RWSEM_DEBUG - sem->debug = 0; -#endif } /* diff --git a/include/asm-sh/system.h b/include/asm-sh/system.h index b752e5cbb830..ce2e60664a86 100644 --- a/include/asm-sh/system.h +++ b/include/asm-sh/system.h @@ -12,7 +12,7 @@ */ #define switch_to(prev, next, last) do { \ - task_t *__last; \ + struct task_struct *__last; \ register unsigned long *__ts1 __asm__ ("r1") = &prev->thread.sp; \ register unsigned long *__ts2 __asm__ ("r2") = &prev->thread.pc; \ register unsigned long *__ts4 __asm__ ("r4") = (unsigned long *)prev; \ diff --git a/include/asm-sparc64/percpu.h b/include/asm-sparc64/percpu.h index a6ece06b83db..ced8cbde046d 100644 --- a/include/asm-sparc64/percpu.h +++ b/include/asm-sparc64/percpu.h @@ -11,6 +11,7 @@ extern unsigned long __per_cpu_base; extern unsigned long __per_cpu_shift; #define __per_cpu_offset(__cpu) \ (__per_cpu_base + ((unsigned long)(__cpu) << __per_cpu_shift)) +#define per_cpu_offset(x) (__per_cpu_offset(x)) /* Separate out the type, so (int[3], foo) works. */ #define DEFINE_PER_CPU(type, name) \ diff --git a/include/asm-x86_64/irqflags.h b/include/asm-x86_64/irqflags.h new file mode 100644 index 000000000000..cce6937e87c0 --- /dev/null +++ b/include/asm-x86_64/irqflags.h @@ -0,0 +1,141 @@ +/* + * include/asm-x86_64/irqflags.h + * + * IRQ flags handling + * + * This file gets included from lowlevel asm headers too, to provide + * wrapped versions of the local_irq_*() APIs, based on the + * raw_local_irq_*() functions from the lowlevel headers. + */ +#ifndef _ASM_IRQFLAGS_H +#define _ASM_IRQFLAGS_H + +#ifndef __ASSEMBLY__ +/* + * Interrupt control: + */ + +static inline unsigned long __raw_local_save_flags(void) +{ + unsigned long flags; + + __asm__ __volatile__( + "# __raw_save_flags\n\t" + "pushfq ; popq %q0" + : "=g" (flags) + : /* no input */ + : "memory" + ); + + return flags; +} + +#define raw_local_save_flags(flags) \ + do { (flags) = __raw_local_save_flags(); } while (0) + +static inline void raw_local_irq_restore(unsigned long flags) +{ + __asm__ __volatile__( + "pushq %0 ; popfq" + : /* no output */ + :"g" (flags) + :"memory", "cc" + ); +} + +#ifdef CONFIG_X86_VSMP + +/* + * Interrupt control for the VSMP architecture: + */ + +static inline void raw_local_irq_disable(void) +{ + unsigned long flags = __raw_local_save_flags(); + + raw_local_irq_restore((flags & ~(1 << 9)) | (1 << 18)); +} + +static inline void raw_local_irq_enable(void) +{ + unsigned long flags = __raw_local_save_flags(); + + raw_local_irq_restore((flags | (1 << 9)) & ~(1 << 18)); +} + +static inline int raw_irqs_disabled_flags(unsigned long flags) +{ + return !(flags & (1<<9)) || (flags & (1 << 18)); +} + +#else /* CONFIG_X86_VSMP */ + +static inline void raw_local_irq_disable(void) +{ + __asm__ __volatile__("cli" : : : "memory"); +} + +static inline void raw_local_irq_enable(void) +{ + __asm__ __volatile__("sti" : : : "memory"); +} + +static inline int raw_irqs_disabled_flags(unsigned long flags) +{ + return !(flags & (1 << 9)); +} + +#endif + +/* + * For spinlocks, etc.: + */ + +static inline unsigned long __raw_local_irq_save(void) +{ + unsigned long flags = __raw_local_save_flags(); + + raw_local_irq_disable(); + + return flags; +} + +#define raw_local_irq_save(flags) \ + do { (flags) = __raw_local_irq_save(); } while (0) + +static inline int raw_irqs_disabled(void) +{ + unsigned long flags = __raw_local_save_flags(); + + return raw_irqs_disabled_flags(flags); +} + +/* + * Used in the idle loop; sti takes one instruction cycle + * to complete: + */ +static inline void raw_safe_halt(void) +{ + __asm__ __volatile__("sti; hlt" : : : "memory"); +} + +/* + * Used when interrupts are already enabled or to + * shutdown the processor: + */ +static inline void halt(void) +{ + __asm__ __volatile__("hlt": : :"memory"); +} + +#else /* __ASSEMBLY__: */ +# ifdef CONFIG_TRACE_IRQFLAGS +# define TRACE_IRQS_ON call trace_hardirqs_on_thunk +# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk +# else +# define TRACE_IRQS_ON +# define TRACE_IRQS_OFF +# endif +#endif + +#endif diff --git a/include/asm-x86_64/kdebug.h b/include/asm-x86_64/kdebug.h index cd52c7f33bca..2b0c088e2957 100644 --- a/include/asm-x86_64/kdebug.h +++ b/include/asm-x86_64/kdebug.h @@ -49,7 +49,7 @@ static inline int notify_die(enum die_val val, const char *str, return atomic_notifier_call_chain(&die_chain, val, &args); } -extern int printk_address(unsigned long address); +extern void printk_address(unsigned long address); extern void die(const char *,struct pt_regs *,long); extern void __die(const char *,struct pt_regs *,long); extern void show_registers(struct pt_regs *regs); diff --git a/include/asm-x86_64/percpu.h b/include/asm-x86_64/percpu.h index 549eb929b2c0..08dd9f9dda81 100644 --- a/include/asm-x86_64/percpu.h +++ b/include/asm-x86_64/percpu.h @@ -14,6 +14,8 @@ #define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset) #define __my_cpu_offset() read_pda(data_offset) +#define per_cpu_offset(x) (__per_cpu_offset(x)) + /* Separate out the type, so (int[3], foo) works. */ #define DEFINE_PER_CPU(type, name) \ __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name diff --git a/include/asm-x86_64/system.h b/include/asm-x86_64/system.h index 68e559f3631c..f67f2873a922 100644 --- a/include/asm-x86_64/system.h +++ b/include/asm-x86_64/system.h @@ -244,43 +244,7 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, #define warn_if_not_ulong(x) do { unsigned long foo; (void) (&(x) == &foo); } while (0) -/* interrupt control.. */ -#define local_save_flags(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# save_flags \n\t pushfq ; popq %q0":"=g" (x): /* no input */ :"memory"); } while (0) -#define local_irq_restore(x) __asm__ __volatile__("# restore_flags \n\t pushq %0 ; popfq": /* no output */ :"g" (x):"memory", "cc") - -#ifdef CONFIG_X86_VSMP -/* Interrupt control for VSMP architecture */ -#define local_irq_disable() do { unsigned long flags; local_save_flags(flags); local_irq_restore((flags & ~(1 << 9)) | (1 << 18)); } while (0) -#define local_irq_enable() do { unsigned long flags; local_save_flags(flags); local_irq_restore((flags | (1 << 9)) & ~(1 << 18)); } while (0) - -#define irqs_disabled() \ -({ \ - unsigned long flags; \ - local_save_flags(flags); \ - (flags & (1<<18)) || !(flags & (1<<9)); \ -}) - -/* For spinlocks etc */ -#define local_irq_save(x) do { local_save_flags(x); local_irq_restore((x & ~(1 << 9)) | (1 << 18)); } while (0) -#else /* CONFIG_X86_VSMP */ -#define local_irq_disable() __asm__ __volatile__("cli": : :"memory") -#define local_irq_enable() __asm__ __volatile__("sti": : :"memory") - -#define irqs_disabled() \ -({ \ - unsigned long flags; \ - local_save_flags(flags); \ - !(flags & (1<<9)); \ -}) - -/* For spinlocks etc */ -#define local_irq_save(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# local_irq_save \n\t pushfq ; popq %0 ; cli":"=g" (x): /* no input */ :"memory"); } while (0) -#endif - -/* used in the idle loop; sti takes one instruction cycle to complete */ -#define safe_halt() __asm__ __volatile__("sti; hlt": : :"memory") -/* used when interrupts are already enabled or to shutdown the processor */ -#define halt() __asm__ __volatile__("hlt": : :"memory") +#include <linux/irqflags.h> void cpu_idle_wait(void); diff --git a/include/asm-xtensa/rwsem.h b/include/asm-xtensa/rwsem.h index abcd86dc5ab9..0aad3a587551 100644 --- a/include/asm-xtensa/rwsem.h +++ b/include/asm-xtensa/rwsem.h @@ -31,24 +31,11 @@ struct rw_semaphore { #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) spinlock_t wait_lock; struct list_head wait_list; -#if RWSEM_DEBUG - int debug; -#endif }; -/* - * initialisation - */ -#if RWSEM_DEBUG -#define __RWSEM_DEBUG_INIT , 0 -#else -#define __RWSEM_DEBUG_INIT /* */ -#endif - #define __RWSEM_INITIALIZER(name) \ { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ - LIST_HEAD_INIT((name).wait_list) \ - __RWSEM_DEBUG_INIT } + LIST_HEAD_INIT((name).wait_list) } #define DECLARE_RWSEM(name) \ struct rw_semaphore name = __RWSEM_INITIALIZER(name) @@ -63,9 +50,6 @@ static inline void init_rwsem(struct rw_semaphore *sem) sem->count = RWSEM_UNLOCKED_VALUE; spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->wait_list); -#if RWSEM_DEBUG - sem->debug = 0; -#endif } /* diff --git a/include/linux/completion.h b/include/linux/completion.h index 90663ad217f9..251c41e3ddd5 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -21,6 +21,18 @@ struct completion { #define DECLARE_COMPLETION(work) \ struct completion work = COMPLETION_INITIALIZER(work) +/* + * Lockdep needs to run a non-constant initializer for on-stack + * completions - so we use the _ONSTACK() variant for those that + * are on the kernel stack: + */ +#ifdef CONFIG_LOCKDEP +# define DECLARE_COMPLETION_ONSTACK(work) \ + struct completion work = ({ init_completion(&work); work; }) +#else +# define DECLARE_COMPLETION_ONSTACK(work) DECLARE_COMPLETION(work) +#endif + static inline void init_completion(struct completion *x) { x->done = 0; diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 0dd1610a94a9..471781ffeab1 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -114,6 +114,18 @@ struct dentry { unsigned char d_iname[DNAME_INLINE_LEN_MIN]; /* small names */ }; +/* + * dentry->d_lock spinlock nesting subclasses: + * + * 0: normal + * 1: nested + */ +enum dentry_d_lock_class +{ + DENTRY_D_LOCK_NORMAL, /* implicitly used by plain spin_lock() APIs. */ + DENTRY_D_LOCK_NESTED +}; + struct dentry_operations { int (*d_revalidate)(struct dentry *, struct nameidata *); int (*d_hash) (struct dentry *, struct qstr *); diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h new file mode 100644 index 000000000000..6a7047851e48 --- /dev/null +++ b/include/linux/debug_locks.h @@ -0,0 +1,69 @@ +#ifndef __LINUX_DEBUG_LOCKING_H +#define __LINUX_DEBUG_LOCKING_H + +extern int debug_locks; +extern int debug_locks_silent; + +/* + * Generic 'turn off all lock debugging' function: + */ +extern int debug_locks_off(void); + +/* + * In the debug case we carry the caller's instruction pointer into + * other functions, but we dont want the function argument overhead + * in the nondebug case - hence these macros: + */ +#define _RET_IP_ (unsigned long)__builtin_return_address(0) +#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) + +#define DEBUG_LOCKS_WARN_ON(c) \ +({ \ + int __ret = 0; \ + \ + if (unlikely(c)) { \ + if (debug_locks_off()) \ + WARN_ON(1); \ + __ret = 1; \ + } \ + __ret; \ +}) + +#ifdef CONFIG_SMP +# define SMP_DEBUG_LOCKS_WARN_ON(c) DEBUG_LOCKS_WARN_ON(c) +#else +# define SMP_DEBUG_LOCKS_WARN_ON(c) do { } while (0) +#endif + +#ifdef CONFIG_DEBUG_LOCKING_API_SELFTESTS + extern void locking_selftest(void); +#else +# define locking_selftest() do { } while (0) +#endif + +#ifdef CONFIG_LOCKDEP +extern void debug_show_all_locks(void); +extern void debug_show_held_locks(struct task_struct *task); +extern void debug_check_no_locks_freed(const void *from, unsigned long len); +extern void debug_check_no_locks_held(struct task_struct *task); +#else +static inline void debug_show_all_locks(void) +{ +} + +static inline void debug_show_held_locks(struct task_struct *task) +{ +} + +static inline void +debug_check_no_locks_freed(const void *from, unsigned long len) +{ +} + +static inline void +debug_check_no_locks_held(struct task_struct *task) +{ +} +#endif + +#endif diff --git a/include/linux/fs.h b/include/linux/fs.h index e04a5cfe874f..134b32068246 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -436,6 +436,21 @@ struct block_device { }; /* + * bdev->bd_mutex nesting subclasses for the lock validator: + * + * 0: normal + * 1: 'whole' + * 2: 'partition' + */ +enum bdev_bd_mutex_lock_class +{ + BD_MUTEX_NORMAL, + BD_MUTEX_WHOLE, + BD_MUTEX_PARTITION +}; + + +/* * Radix-tree tags, for tagging dirty and writeback pages within the pagecache * radix trees */ @@ -543,6 +558,25 @@ struct inode { }; /* + * inode->i_mutex nesting subclasses for the lock validator: + * + * 0: the object of the current VFS operation + * 1: parent + * 2: child/target + * 3: quota file + * + * The locking order between these classes is + * parent -> child -> normal -> quota + */ +enum inode_i_mutex_lock_class +{ + I_MUTEX_NORMAL, + I_MUTEX_PARENT, + I_MUTEX_CHILD, + I_MUTEX_QUOTA +}; + +/* * NOTE: in a 32bit arch with a preemptable kernel and * an UP compile the i_size_read/write must be atomic * with respect to the local cpu (unlike with preempt disabled), @@ -1276,6 +1310,8 @@ struct file_system_type { struct module *owner; struct file_system_type * next; struct list_head fs_supers; + struct lock_class_key s_lock_key; + struct lock_class_key s_umount_key; }; extern int get_sb_bdev(struct file_system_type *fs_type, @@ -1404,6 +1440,7 @@ extern void bd_set_size(struct block_device *, loff_t size); extern void bd_forget(struct inode *inode); extern void bdput(struct block_device *); extern struct block_device *open_by_devnum(dev_t, unsigned); +extern struct block_device *open_partition_by_devnum(dev_t, unsigned); extern const struct file_operations def_blk_fops; extern const struct address_space_operations def_blk_aops; extern const struct file_operations def_chr_fops; @@ -1414,6 +1451,7 @@ extern int blkdev_ioctl(struct inode *, struct file *, unsigned, unsigned long); extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); extern int blkdev_get(struct block_device *, mode_t, unsigned); extern int blkdev_put(struct block_device *); +extern int blkdev_put_partition(struct block_device *); extern int bd_claim(struct block_device *, void *); extern void bd_release(struct block_device *); #ifdef CONFIG_SYSFS diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 114ae583cca9..50d8b5744cf6 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -3,6 +3,7 @@ #include <linux/preempt.h> #include <linux/smp_lock.h> +#include <linux/lockdep.h> #include <asm/hardirq.h> #include <asm/system.h> @@ -86,9 +87,6 @@ extern void synchronize_irq(unsigned int irq); # define synchronize_irq(irq) barrier() #endif -#define nmi_enter() irq_enter() -#define nmi_exit() sub_preempt_count(HARDIRQ_OFFSET) - struct task_struct; #ifndef CONFIG_VIRT_CPU_ACCOUNTING @@ -97,12 +95,35 @@ static inline void account_system_vtime(struct task_struct *tsk) } #endif +/* + * It is safe to do non-atomic ops on ->hardirq_context, + * because NMI handlers may not preempt and the ops are + * always balanced, so the interrupted value of ->hardirq_context + * will always be restored. + */ #define irq_enter() \ do { \ account_system_vtime(current); \ add_preempt_count(HARDIRQ_OFFSET); \ + trace_hardirq_enter(); \ + } while (0) + +/* + * Exit irq context without processing softirqs: + */ +#define __irq_exit() \ + do { \ + trace_hardirq_exit(); \ + account_system_vtime(current); \ + sub_preempt_count(HARDIRQ_OFFSET); \ } while (0) +/* + * Exit irq context and process softirqs if needed: + */ extern void irq_exit(void); +#define nmi_enter() do { lockdep_off(); irq_enter(); } while (0) +#define nmi_exit() do { __irq_exit(); lockdep_on(); } while (0) + #endif /* LINUX_HARDIRQ_H */ diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 07d7305f131e..e4bccbcc2750 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -91,6 +91,7 @@ struct hrtimer_base { ktime_t (*get_softirq_time)(void); struct hrtimer *curr_timer; ktime_t softirq_time; + struct lock_class_key lock_key; }; /* diff --git a/include/linux/ide.h b/include/linux/ide.h index 285316c836b5..dc7abef10965 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1359,7 +1359,7 @@ extern struct semaphore ide_cfg_sem; * ide_drive_t->hwif: constant, no locking */ -#define local_irq_set(flags) do { local_save_flags((flags)); local_irq_enable(); } while (0) +#define local_irq_set(flags) do { local_save_flags((flags)); local_irq_enable_in_hardirq(); } while (0) extern struct bus_type ide_bus_type; diff --git a/include/linux/idr.h b/include/linux/idr.h index f559a719dbe8..826803449db7 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -66,7 +66,7 @@ struct idr { .id_free = NULL, \ .layers = 0, \ .id_free_cnt = 0, \ - .lock = SPIN_LOCK_UNLOCKED, \ + .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ } #define DEFINE_IDR(name) struct idr name = IDR_INIT(name) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 3a256957fb56..60aac2cea0cf 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -3,6 +3,8 @@ #include <linux/file.h> #include <linux/rcupdate.h> +#include <linux/irqflags.h> +#include <linux/lockdep.h> #define INIT_FDTABLE \ { \ @@ -21,7 +23,7 @@ .count = ATOMIC_INIT(1), \ .fdt = &init_files.fdtab, \ .fdtab = INIT_FDTABLE, \ - .file_lock = SPIN_LOCK_UNLOCKED, \ + .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), \ .next_fd = 0, \ .close_on_exec_init = { { 0, } }, \ .open_fds_init = { { 0, } }, \ @@ -36,7 +38,7 @@ .user_id = 0, \ .next = NULL, \ .wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.wait), \ - .ctx_lock = SPIN_LOCK_UNLOCKED, \ + .ctx_lock = __SPIN_LOCK_UNLOCKED(name.ctx_lock), \ .reqs_active = 0U, \ .max_reqs = ~0U, \ } @@ -48,7 +50,7 @@ .mm_users = ATOMIC_INIT(2), \ .mm_count = ATOMIC_INIT(1), \ .mmap_sem = __RWSEM_INITIALIZER(name.mmap_sem), \ - .page_table_lock = SPIN_LOCK_UNLOCKED, \ + .page_table_lock = __SPIN_LOCK_UNLOCKED(name.page_table_lock), \ .mmlist = LIST_HEAD_INIT(name.mmlist), \ .cpu_vm_mask = CPU_MASK_ALL, \ } @@ -69,7 +71,7 @@ #define INIT_SIGHAND(sighand) { \ .count = ATOMIC_INIT(1), \ .action = { { { .sa_handler = NULL, } }, }, \ - .siglock = SPIN_LOCK_UNLOCKED, \ + .siglock = __SPIN_LOCK_UNLOCKED(sighand.siglock), \ } extern struct group_info init_groups; @@ -119,12 +121,13 @@ extern struct group_info init_groups; .list = LIST_HEAD_INIT(tsk.pending.list), \ .signal = {{0}}}, \ .blocked = {{0}}, \ - .alloc_lock = SPIN_LOCK_UNLOCKED, \ + .alloc_lock = __SPIN_LOCK_UNLOCKED(tsk.alloc_lock), \ .journal_info = NULL, \ .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ .fs_excl = ATOMIC_INIT(0), \ .pi_lock = SPIN_LOCK_UNLOCKED, \ - INIT_RT_MUTEXES(tsk) \ + INIT_TRACE_IRQFLAGS \ + INIT_LOCKDEP \ } diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index da3e0dbe61d4..d5afee95fd43 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -10,6 +10,7 @@ #include <linux/irqreturn.h> #include <linux/hardirq.h> #include <linux/sched.h> +#include <linux/irqflags.h> #include <asm/atomic.h> #include <asm/ptrace.h> #include <asm/system.h> @@ -80,12 +81,64 @@ extern int request_irq(unsigned int, unsigned long, const char *, void *); extern void free_irq(unsigned int, void *); +/* + * On lockdep we dont want to enable hardirqs in hardirq + * context. Use local_irq_enable_in_hardirq() to annotate + * kernel code that has to do this nevertheless (pretty much + * the only valid case is for old/broken hardware that is + * insanely slow). + * + * NOTE: in theory this might break fragile code that relies + * on hardirq delivery - in practice we dont seem to have such + * places left. So the only effect should be slightly increased + * irqs-off latencies. + */ +#ifdef CONFIG_LOCKDEP +# define local_irq_enable_in_hardirq() do { } while (0) +#else +# define local_irq_enable_in_hardirq() local_irq_enable() +#endif #ifdef CONFIG_GENERIC_HARDIRQS extern void disable_irq_nosync(unsigned int irq); extern void disable_irq(unsigned int irq); extern void enable_irq(unsigned int irq); +/* + * Special lockdep variants of irq disabling/enabling. + * These should be used for locking constructs that + * know that a particular irq context which is disabled, + * and which is the only irq-context user of a lock, + * that it's safe to take the lock in the irq-disabled + * section without disabling hardirqs. + * + * On !CONFIG_LOCKDEP they are equivalent to the normal + * irq disable/enable methods. + */ +static inline void disable_irq_nosync_lockdep(unsigned int irq) +{ + disable_irq_nosync(irq); +#ifdef CONFIG_LOCKDEP + local_irq_disable(); +#endif +} + +static inline void disable_irq_lockdep(unsigned int irq) +{ + disable_irq(irq); +#ifdef CONFIG_LOCKDEP + local_irq_disable(); +#endif +} + +static inline void enable_irq_lockdep(unsigned int irq) +{ +#ifdef CONFIG_LOCKDEP + local_irq_enable(); +#endif + enable_irq(irq); +} + /* IRQ wakeup (PM) control: */ extern int set_irq_wake(unsigned int irq, unsigned int on); @@ -99,7 +152,19 @@ static inline int disable_irq_wake(unsigned int irq) return set_irq_wake(irq, 0); } -#endif +#else /* !CONFIG_GENERIC_HARDIRQS */ +/* + * NOTE: non-genirq architectures, if they want to support the lock + * validator need to define the methods below in their asm/irq.h + * files, under an #ifdef CONFIG_LOCKDEP section. + */ +# ifndef CONFIG_LOCKDEP +# define disable_irq_nosync_lockdep(irq) disable_irq_nosync(irq) +# define disable_irq_lockdep(irq) disable_irq(irq) +# define enable_irq_lockdep(irq) enable_irq(irq) +# endif + +#endif /* CONFIG_GENERIC_HARDIRQS */ #ifndef __ARCH_SET_SOFTIRQ_PENDING #define set_softirq_pending(x) (local_softirq_pending() = (x)) @@ -135,13 +200,11 @@ static inline void __deprecated save_and_cli(unsigned long *x) #define save_and_cli(x) save_and_cli(&x) #endif /* CONFIG_SMP */ -/* SoftIRQ primitives. */ -#define local_bh_disable() \ - do { add_preempt_count(SOFTIRQ_OFFSET); barrier(); } while (0) -#define __local_bh_enable() \ - do { barrier(); sub_preempt_count(SOFTIRQ_OFFSET); } while (0) - +extern void local_bh_disable(void); +extern void __local_bh_enable(void); +extern void _local_bh_enable(void); extern void local_bh_enable(void); +extern void local_bh_enable_ip(unsigned long ip); /* PLEASE, avoid to allocate new softirqs, if you need not _really_ high frequency threaded job scheduling. For almost all the purposes diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 87a9fc039b47..5612dfeeae50 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -55,6 +55,7 @@ struct resource_list { #define IORESOURCE_IRQ_LOWEDGE (1<<1) #define IORESOURCE_IRQ_HIGHLEVEL (1<<2) #define IORESOURCE_IRQ_LOWLEVEL (1<<3) +#define IORESOURCE_IRQ_SHAREABLE (1<<4) /* ISA PnP DMA specific bits (IORESOURCE_BITS) */ #define IORESOURCE_DMA_TYPE_MASK (3<<0) diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h new file mode 100644 index 000000000000..412e025bc5c7 --- /dev/null +++ b/include/linux/irqflags.h @@ -0,0 +1,96 @@ +/* + * include/linux/irqflags.h + * + * IRQ flags tracing: follow the state of the hardirq and softirq flags and + * provide callbacks for transitions between ON and OFF states. + * + * This file gets included from lowlevel asm headers too, to provide + * wrapped versions of the local_irq_*() APIs, based on the + * raw_local_irq_*() macros from the lowlevel headers. + */ +#ifndef _LINUX_TRACE_IRQFLAGS_H +#define _LINUX_TRACE_IRQFLAGS_H + +#ifdef CONFIG_TRACE_IRQFLAGS + extern void trace_hardirqs_on(void); + extern void trace_hardirqs_off(void); + extern void trace_softirqs_on(unsigned long ip); + extern void trace_softirqs_off(unsigned long ip); +# define trace_hardirq_context(p) ((p)->hardirq_context) +# define trace_softirq_context(p) ((p)->softirq_context) +# define trace_hardirqs_enabled(p) ((p)->hardirqs_enabled) +# define trace_softirqs_enabled(p) ((p)->softirqs_enabled) +# define trace_hardirq_enter() do { current->hardirq_context++; } while (0) +# define trace_hardirq_exit() do { current->hardirq_context--; } while (0) +# define trace_softirq_enter() do { current->softirq_context++; } while (0) +# define trace_softirq_exit() do { current->softirq_context--; } while (0) +# define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1, +#else +# define trace_hardirqs_on() do { } while (0) +# define trace_hardirqs_off() do { } while (0) +# define trace_softirqs_on(ip) do { } while (0) +# define trace_softirqs_off(ip) do { } while (0) +# define trace_hardirq_context(p) 0 +# define trace_softirq_context(p) 0 +# define trace_hardirqs_enabled(p) 0 +# define trace_softirqs_enabled(p) 0 +# define trace_hardirq_enter() do { } while (0) +# define trace_hardirq_exit() do { } while (0) +# define trace_softirq_enter() do { } while (0) +# define trace_softirq_exit() do { } while (0) +# define INIT_TRACE_IRQFLAGS +#endif + +#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT + +#include <asm/irqflags.h> + +#define local_irq_enable() \ + do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0) +#define local_irq_disable() \ + do { raw_local_irq_disable(); trace_hardirqs_off(); } while (0) +#define local_irq_save(flags) \ + do { raw_local_irq_save(flags); trace_hardirqs_off(); } while (0) + +#define local_irq_restore(flags) \ + do { \ + if (raw_irqs_disabled_flags(flags)) { \ + raw_local_irq_restore(flags); \ + trace_hardirqs_off(); \ + } else { \ + trace_hardirqs_on(); \ + raw_local_irq_restore(flags); \ + } \ + } while (0) +#else /* !CONFIG_TRACE_IRQFLAGS_SUPPORT */ +/* + * The local_irq_*() APIs are equal to the raw_local_irq*() + * if !TRACE_IRQFLAGS. + */ +# define raw_local_irq_disable() local_irq_disable() +# define raw_local_irq_enable() local_irq_enable() +# define raw_local_irq_save(flags) local_irq_save(flags) +# define raw_local_irq_restore(flags) local_irq_restore(flags) +#endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */ + +#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT +#define safe_halt() \ + do { \ + trace_hardirqs_on(); \ + raw_safe_halt(); \ + } while (0) + +#define local_save_flags(flags) raw_local_save_flags(flags) + +#define irqs_disabled() \ +({ \ + unsigned long flags; \ + \ + raw_local_save_flags(flags); \ + raw_irqs_disabled_flags(flags); \ +}) + +#define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags) +#endif /* CONFIG_X86 */ + +#endif diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 54e2549f96ba..849043ce4ed6 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -57,10 +57,25 @@ do { \ #define print_fn_descriptor_symbol(fmt, addr) print_symbol(fmt, addr) #endif -#define print_symbol(fmt, addr) \ -do { \ - __check_printsym_format(fmt, ""); \ - __print_symbol(fmt, addr); \ +static inline void print_symbol(const char *fmt, unsigned long addr) +{ + __check_printsym_format(fmt, ""); + __print_symbol(fmt, (unsigned long) + __builtin_extract_return_addr((void *)addr)); +} + +#ifndef CONFIG_64BIT +#define print_ip_sym(ip) \ +do { \ + printk("[<%08lx>]", ip); \ + print_symbol(" %s\n", ip); \ } while(0) +#else +#define print_ip_sym(ip) \ +do { \ + printk("[<%016lx>]", ip); \ + print_symbol(" %s\n", ip); \ +} while(0) +#endif #endif /*_LINUX_KALLSYMS_H*/ diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h new file mode 100644 index 000000000000..316e0fb8d7b1 --- /dev/null +++ b/include/linux/lockdep.h @@ -0,0 +1,353 @@ +/* + * Runtime locking correctness validator + * + * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + * + * see Documentation/lockdep-design.txt for more details. + */ +#ifndef __LINUX_LOCKDEP_H +#define __LINUX_LOCKDEP_H + +#include <linux/linkage.h> +#include <linux/list.h> +#include <linux/debug_locks.h> +#include <linux/stacktrace.h> + +#ifdef CONFIG_LOCKDEP + +/* + * Lock-class usage-state bits: + */ +enum lock_usage_bit +{ + LOCK_USED = 0, + LOCK_USED_IN_HARDIRQ, + LOCK_USED_IN_SOFTIRQ, + LOCK_ENABLED_SOFTIRQS, + LOCK_ENABLED_HARDIRQS, + LOCK_USED_IN_HARDIRQ_READ, + LOCK_USED_IN_SOFTIRQ_READ, + LOCK_ENABLED_SOFTIRQS_READ, + LOCK_ENABLED_HARDIRQS_READ, + LOCK_USAGE_STATES +}; + +/* + * Usage-state bitmasks: + */ +#define LOCKF_USED (1 << LOCK_USED) +#define LOCKF_USED_IN_HARDIRQ (1 << LOCK_USED_IN_HARDIRQ) +#define LOCKF_USED_IN_SOFTIRQ (1 << LOCK_USED_IN_SOFTIRQ) +#define LOCKF_ENABLED_HARDIRQS (1 << LOCK_ENABLED_HARDIRQS) +#define LOCKF_ENABLED_SOFTIRQS (1 << LOCK_ENABLED_SOFTIRQS) + +#define LOCKF_ENABLED_IRQS (LOCKF_ENABLED_HARDIRQS | LOCKF_ENABLED_SOFTIRQS) +#define LOCKF_USED_IN_IRQ (LOCKF_USED_IN_HARDIRQ | LOCKF_USED_IN_SOFTIRQ) + +#define LOCKF_USED_IN_HARDIRQ_READ (1 << LOCK_USED_IN_HARDIRQ_READ) +#define LOCKF_USED_IN_SOFTIRQ_READ (1 << LOCK_USED_IN_SOFTIRQ_READ) +#define LOCKF_ENABLED_HARDIRQS_READ (1 << LOCK_ENABLED_HARDIRQS_READ) +#define LOCKF_ENABLED_SOFTIRQS_READ (1 << LOCK_ENABLED_SOFTIRQS_READ) + +#define LOCKF_ENABLED_IRQS_READ \ + (LOCKF_ENABLED_HARDIRQS_READ | LOCKF_ENABLED_SOFTIRQS_READ) +#define LOCKF_USED_IN_IRQ_READ \ + (LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ) + +#define MAX_LOCKDEP_SUBCLASSES 8UL + +/* + * Lock-classes are keyed via unique addresses, by embedding the + * lockclass-key into the kernel (or module) .data section. (For + * static locks we use the lock address itself as the key.) + */ +struct lockdep_subclass_key { + char __one_byte; +} __attribute__ ((__packed__)); + +struct lock_class_key { + struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES]; +}; + +/* + * The lock-class itself: + */ +struct lock_class { + /* + * class-hash: + */ + struct list_head hash_entry; + + /* + * global list of all lock-classes: + */ + struct list_head lock_entry; + + struct lockdep_subclass_key *key; + unsigned int subclass; + + /* + * IRQ/softirq usage tracking bits: + */ + unsigned long usage_mask; + struct stack_trace usage_traces[LOCK_USAGE_STATES]; + + /* + * These fields represent a directed graph of lock dependencies, + * to every node we attach a list of "forward" and a list of + * "backward" graph nodes. + */ + struct list_head locks_after, locks_before; + + /* + * Generation counter, when doing certain classes of graph walking, + * to ensure that we check one node only once: + */ + unsigned int version; + + /* + * Statistics counter: + */ + unsigned long ops; + + const char *name; + int name_version; +}; + +/* + * Map the lock object (the lock instance) to the lock-class object. + * This is embedded into specific lock instances: + */ +struct lockdep_map { + struct lock_class_key *key; + struct lock_class *class[MAX_LOCKDEP_SUBCLASSES]; + const char *name; +}; + +/* + * Every lock has a list of other locks that were taken after it. + * We only grow the list, never remove from it: + */ +struct lock_list { + struct list_head entry; + struct lock_class *class; + struct stack_trace trace; +}; + +/* + * We record lock dependency chains, so that we can cache them: + */ +struct lock_chain { + struct list_head entry; + u64 chain_key; +}; + +struct held_lock { + /* + * One-way hash of the dependency chain up to this point. We + * hash the hashes step by step as the dependency chain grows. + * + * We use it for dependency-caching and we skip detection + * passes and dependency-updates if there is a cache-hit, so + * it is absolutely critical for 100% coverage of the validator + * to have a unique key value for every unique dependency path + * that can occur in the system, to make a unique hash value + * as likely as possible - hence the 64-bit width. + * + * The task struct holds the current hash value (initialized + * with zero), here we store the previous hash value: + */ + u64 prev_chain_key; + struct lock_class *class; + unsigned long acquire_ip; + struct lockdep_map *instance; + + /* + * The lock-stack is unified in that the lock chains of interrupt + * contexts nest ontop of process context chains, but we 'separate' + * the hashes by starting with 0 if we cross into an interrupt + * context, and we also keep do not add cross-context lock + * dependencies - the lock usage graph walking covers that area + * anyway, and we'd just unnecessarily increase the number of + * dependencies otherwise. [Note: hardirq and softirq contexts + * are separated from each other too.] + * + * The following field is used to detect when we cross into an + * interrupt context: + */ + int irq_context; + int trylock; + int read; + int check; + int hardirqs_off; +}; + +/* + * Initialization, self-test and debugging-output methods: + */ +extern void lockdep_init(void); +extern void lockdep_info(void); +extern void lockdep_reset(void); +extern void lockdep_reset_lock(struct lockdep_map *lock); +extern void lockdep_free_key_range(void *start, unsigned long size); + +extern void lockdep_off(void); +extern void lockdep_on(void); +extern int lockdep_internal(void); + +/* + * These methods are used by specific locking variants (spinlocks, + * rwlocks, mutexes and rwsems) to pass init/acquire/release events + * to lockdep: + */ + +extern void lockdep_init_map(struct lockdep_map *lock, const char *name, + struct lock_class_key *key); + +/* + * Reinitialize a lock key - for cases where there is special locking or + * special initialization of locks so that the validator gets the scope + * of dependencies wrong: they are either too broad (they need a class-split) + * or they are too narrow (they suffer from a false class-split): + */ +#define lockdep_set_class(lock, key) \ + lockdep_init_map(&(lock)->dep_map, #key, key) +#define lockdep_set_class_and_name(lock, key, name) \ + lockdep_init_map(&(lock)->dep_map, name, key) + +/* + * Acquire a lock. + * + * Values for "read": + * + * 0: exclusive (write) acquire + * 1: read-acquire (no recursion allowed) + * 2: read-acquire with same-instance recursion allowed + * + * Values for check: + * + * 0: disabled + * 1: simple checks (freeing, held-at-exit-time, etc.) + * 2: full validation + */ +extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass, + int trylock, int read, int check, unsigned long ip); + +extern void lock_release(struct lockdep_map *lock, int nested, + unsigned long ip); + +# define INIT_LOCKDEP .lockdep_recursion = 0, + +#else /* !LOCKDEP */ + +static inline void lockdep_off(void) +{ +} + +static inline void lockdep_on(void) +{ +} + +static inline int lockdep_internal(void) +{ + return 0; +} + +# define lock_acquire(l, s, t, r, c, i) do { } while (0) +# define lock_release(l, n, i) do { } while (0) +# define lockdep_init() do { } while (0) +# define lockdep_info() do { } while (0) +# define lockdep_init_map(lock, name, key) do { (void)(key); } while (0) +# define lockdep_set_class(lock, key) do { (void)(key); } while (0) +# define lockdep_set_class_and_name(lock, key, name) \ + do { (void)(key); } while (0) +# define INIT_LOCKDEP +# define lockdep_reset() do { debug_locks = 1; } while (0) +# define lockdep_free_key_range(start, size) do { } while (0) +/* + * The class key takes no space if lockdep is disabled: + */ +struct lock_class_key { }; +#endif /* !LOCKDEP */ + +#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_GENERIC_HARDIRQS) +extern void early_init_irq_lock_class(void); +#else +# define early_init_irq_lock_class() do { } while (0) +#endif + +#ifdef CONFIG_TRACE_IRQFLAGS +extern void early_boot_irqs_off(void); +extern void early_boot_irqs_on(void); +#else +# define early_boot_irqs_off() do { } while (0) +# define early_boot_irqs_on() do { } while (0) +#endif + +/* + * For trivial one-depth nesting of a lock-class, the following + * global define can be used. (Subsystems with multiple levels + * of nesting should define their own lock-nesting subclasses.) + */ +#define SINGLE_DEPTH_NESTING 1 + +/* + * Map the dependency ops to NOP or to real lockdep ops, depending + * on the per lock-class debug mode: + */ + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# ifdef CONFIG_PROVE_LOCKING +# define spin_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i) +# else +# define spin_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i) +# endif +# define spin_release(l, n, i) lock_release(l, n, i) +#else +# define spin_acquire(l, s, t, i) do { } while (0) +# define spin_release(l, n, i) do { } while (0) +#endif + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# ifdef CONFIG_PROVE_LOCKING +# define rwlock_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i) +# define rwlock_acquire_read(l, s, t, i) lock_acquire(l, s, t, 2, 2, i) +# else +# define rwlock_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i) +# define rwlock_acquire_read(l, s, t, i) lock_acquire(l, s, t, 2, 1, i) +# endif +# define rwlock_release(l, n, i) lock_release(l, n, i) +#else +# define rwlock_acquire(l, s, t, i) do { } while (0) +# define rwlock_acquire_read(l, s, t, i) do { } while (0) +# define rwlock_release(l, n, i) do { } while (0) +#endif + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# ifdef CONFIG_PROVE_LOCKING +# define mutex_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i) +# else +# define mutex_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i) +# endif +# define mutex_release(l, n, i) lock_release(l, n, i) +#else +# define mutex_acquire(l, s, t, i) do { } while (0) +# define mutex_release(l, n, i) do { } while (0) +#endif + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# ifdef CONFIG_PROVE_LOCKING +# define rwsem_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i) +# define rwsem_acquire_read(l, s, t, i) lock_acquire(l, s, t, 1, 2, i) +# else +# define rwsem_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i) +# define rwsem_acquire_read(l, s, t, i) lock_acquire(l, s, t, 1, 1, i) +# endif +# define rwsem_release(l, n, i) lock_release(l, n, i) +#else +# define rwsem_acquire(l, s, t, i) do { } while (0) +# define rwsem_acquire_read(l, s, t, i) do { } while (0) +# define rwsem_release(l, n, i) do { } while (0) +#endif + +#endif /* __LINUX_LOCKDEP_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 75179529e399..990957e0929f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -14,6 +14,7 @@ #include <linux/prio_tree.h> #include <linux/fs.h> #include <linux/mutex.h> +#include <linux/debug_locks.h> struct mempolicy; struct anon_vma; @@ -1034,13 +1035,6 @@ static inline void vm_stat_account(struct mm_struct *mm, } #endif /* CONFIG_PROC_FS */ -static inline void -debug_check_no_locks_freed(const void *from, unsigned long len) -{ - mutex_debug_check_no_locks_freed(from, len); - rt_mutex_debug_check_no_locks_freed(from, len); -} - #ifndef CONFIG_DEBUG_PAGEALLOC static inline void kernel_map_pages(struct page *page, int numpages, int enable) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 27e748eb72b0..656b588a9f96 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -150,6 +150,10 @@ struct zone { unsigned long lowmem_reserve[MAX_NR_ZONES]; #ifdef CONFIG_NUMA + /* + * zone reclaim becomes active if more unmapped pages exist. + */ + unsigned long min_unmapped_ratio; struct per_cpu_pageset *pageset[NR_CPUS]; #else struct per_cpu_pageset pageset[NR_CPUS]; @@ -414,6 +418,8 @@ int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); +int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, + struct file *, void __user *, size_t *, loff_t *); #include <linux/topology.h> /* Returns the number of the current Node. */ diff --git a/include/linux/module.h b/include/linux/module.h index 9e9dc7c24d95..d06c74fb8c26 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -358,6 +358,7 @@ static inline int module_is_live(struct module *mod) /* Is this address in a module? (second is with no locks, for oops) */ struct module *module_text_address(unsigned long addr); struct module *__module_text_address(unsigned long addr); +int is_module_address(unsigned long addr); /* Returns module and fills in value, defined and namebuf, or NULL if symnum out of range. */ @@ -496,6 +497,11 @@ static inline struct module *__module_text_address(unsigned long addr) return NULL; } +static inline int is_module_address(unsigned long addr) +{ + return 0; +} + /* Get/put a kernel symbol (calls should be symmetric) */ #define symbol_get(x) ({ extern typeof(x) x __attribute__((weak)); &(x); }) #define symbol_put(x) do { } while(0) diff --git a/include/linux/mutex-debug.h b/include/linux/mutex-debug.h index 8b5769f00467..2537285e1064 100644 --- a/include/linux/mutex-debug.h +++ b/include/linux/mutex-debug.h @@ -2,22 +2,22 @@ #define __LINUX_MUTEX_DEBUG_H #include <linux/linkage.h> +#include <linux/lockdep.h> /* * Mutexes - debugging helpers: */ -#define __DEBUG_MUTEX_INITIALIZER(lockname) \ - , .held_list = LIST_HEAD_INIT(lockname.held_list), \ - .name = #lockname , .magic = &lockname +#define __DEBUG_MUTEX_INITIALIZER(lockname) \ + , .magic = &lockname -#define mutex_init(sem) __mutex_init(sem, __FUNCTION__) +#define mutex_init(mutex) \ +do { \ + static struct lock_class_key __key; \ + \ + __mutex_init((mutex), #mutex, &__key); \ +} while (0) extern void FASTCALL(mutex_destroy(struct mutex *lock)); -extern void mutex_debug_show_all_locks(void); -extern void mutex_debug_show_held_locks(struct task_struct *filter); -extern void mutex_debug_check_no_locks_held(struct task_struct *task); -extern void mutex_debug_check_no_locks_freed(const void *from, unsigned long len); - #endif diff --git a/include/linux/mutex.h b/include/linux/mutex.h index f1ac507fa20d..27c48daa3183 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -13,6 +13,7 @@ #include <linux/list.h> #include <linux/spinlock_types.h> #include <linux/linkage.h> +#include <linux/lockdep.h> #include <asm/atomic.h> @@ -50,11 +51,12 @@ struct mutex { struct list_head wait_list; #ifdef CONFIG_DEBUG_MUTEXES struct thread_info *owner; - struct list_head held_list; - unsigned long acquire_ip; const char *name; void *magic; #endif +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif }; /* @@ -74,24 +76,34 @@ struct mutex_waiter { # include <linux/mutex-debug.h> #else # define __DEBUG_MUTEX_INITIALIZER(lockname) -# define mutex_init(mutex) __mutex_init(mutex, NULL) +# define mutex_init(mutex) \ +do { \ + static struct lock_class_key __key; \ + \ + __mutex_init((mutex), #mutex, &__key); \ +} while (0) # define mutex_destroy(mutex) do { } while (0) -# define mutex_debug_show_all_locks() do { } while (0) -# define mutex_debug_show_held_locks(p) do { } while (0) -# define mutex_debug_check_no_locks_held(task) do { } while (0) -# define mutex_debug_check_no_locks_freed(from, len) do { } while (0) +#endif + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ + , .dep_map = { .name = #lockname } +#else +# define __DEP_MAP_MUTEX_INITIALIZER(lockname) #endif #define __MUTEX_INITIALIZER(lockname) \ { .count = ATOMIC_INIT(1) \ , .wait_lock = SPIN_LOCK_UNLOCKED \ , .wait_list = LIST_HEAD_INIT(lockname.wait_list) \ - __DEBUG_MUTEX_INITIALIZER(lockname) } + __DEBUG_MUTEX_INITIALIZER(lockname) \ + __DEP_MAP_MUTEX_INITIALIZER(lockname) } #define DEFINE_MUTEX(mutexname) \ struct mutex mutexname = __MUTEX_INITIALIZER(mutexname) -extern void fastcall __mutex_init(struct mutex *lock, const char *name); +extern void __mutex_init(struct mutex *lock, const char *name, + struct lock_class_key *key); /*** * mutex_is_locked - is the mutex locked @@ -110,6 +122,13 @@ static inline int fastcall mutex_is_locked(struct mutex *lock) */ extern void fastcall mutex_lock(struct mutex *lock); extern int fastcall mutex_lock_interruptible(struct mutex *lock); + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass); +#else +# define mutex_lock_nested(lock, subclass) mutex_lock(lock) +#endif + /* * NOTE: mutex_trylock() follows the spin_trylock() convention, * not the down_trylock() convention! diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 51dbab9710c7..7ff386a6ae87 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -65,7 +65,7 @@ struct raw_notifier_head { } while (0) #define ATOMIC_NOTIFIER_INIT(name) { \ - .lock = SPIN_LOCK_UNLOCKED, \ + .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ .head = NULL } #define BLOCKING_NOTIFIER_INIT(name) { \ .rwsem = __RWSEM_INITIALIZER((name).rwsem), \ diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h index fa4a3b82ba70..5d41dee82f80 100644 --- a/include/linux/rtmutex.h +++ b/include/linux/rtmutex.h @@ -29,8 +29,6 @@ struct rt_mutex { struct task_struct *owner; #ifdef CONFIG_DEBUG_RT_MUTEXES int save_state; - struct list_head held_list_entry; - unsigned long acquire_ip; const char *name, *file; int line; void *magic; @@ -98,14 +96,6 @@ extern int rt_mutex_trylock(struct rt_mutex *lock); extern void rt_mutex_unlock(struct rt_mutex *lock); -#ifdef CONFIG_DEBUG_RT_MUTEXES -# define INIT_RT_MUTEX_DEBUG(tsk) \ - .held_list_head = LIST_HEAD_INIT(tsk.held_list_head), \ - .held_list_lock = SPIN_LOCK_UNLOCKED -#else -# define INIT_RT_MUTEX_DEBUG(tsk) -#endif - #ifdef CONFIG_RT_MUTEXES # define INIT_RT_MUTEXES(tsk) \ .pi_waiters = PLIST_HEAD_INIT(tsk.pi_waiters, tsk.pi_lock), \ diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h index f30f805080ae..ae1fcadd598e 100644 --- a/include/linux/rwsem-spinlock.h +++ b/include/linux/rwsem-spinlock.h @@ -32,30 +32,37 @@ struct rw_semaphore { __s32 activity; spinlock_t wait_lock; struct list_head wait_list; -#if RWSEM_DEBUG - int debug; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; #endif }; -/* - * initialisation - */ -#if RWSEM_DEBUG -#define __RWSEM_DEBUG_INIT , 0 +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } #else -#define __RWSEM_DEBUG_INIT /* */ +# define __RWSEM_DEP_MAP_INIT(lockname) #endif #define __RWSEM_INITIALIZER(name) \ -{ 0, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) __RWSEM_DEBUG_INIT } +{ 0, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) } #define DECLARE_RWSEM(name) \ struct rw_semaphore name = __RWSEM_INITIALIZER(name) -extern void FASTCALL(init_rwsem(struct rw_semaphore *sem)); +extern void __init_rwsem(struct rw_semaphore *sem, const char *name, + struct lock_class_key *key); + +#define init_rwsem(sem) \ +do { \ + static struct lock_class_key __key; \ + \ + __init_rwsem((sem), #sem, &__key); \ +} while (0) + extern void FASTCALL(__down_read(struct rw_semaphore *sem)); extern int FASTCALL(__down_read_trylock(struct rw_semaphore *sem)); extern void FASTCALL(__down_write(struct rw_semaphore *sem)); +extern void FASTCALL(__down_write_nested(struct rw_semaphore *sem, int subclass)); extern int FASTCALL(__down_write_trylock(struct rw_semaphore *sem)); extern void FASTCALL(__up_read(struct rw_semaphore *sem)); extern void FASTCALL(__up_write(struct rw_semaphore *sem)); diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index f99fe90732ab..658afb37c3f5 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -9,8 +9,6 @@ #include <linux/linkage.h> -#define RWSEM_DEBUG 0 - #ifdef __KERNEL__ #include <linux/types.h> @@ -26,89 +24,58 @@ struct rw_semaphore; #include <asm/rwsem.h> /* use an arch-specific implementation */ #endif -#ifndef rwsemtrace -#if RWSEM_DEBUG -extern void FASTCALL(rwsemtrace(struct rw_semaphore *sem, const char *str)); -#else -#define rwsemtrace(SEM,FMT) -#endif -#endif - /* * lock for reading */ -static inline void down_read(struct rw_semaphore *sem) -{ - might_sleep(); - rwsemtrace(sem,"Entering down_read"); - __down_read(sem); - rwsemtrace(sem,"Leaving down_read"); -} +extern void down_read(struct rw_semaphore *sem); /* * trylock for reading -- returns 1 if successful, 0 if contention */ -static inline int down_read_trylock(struct rw_semaphore *sem) -{ - int ret; - rwsemtrace(sem,"Entering down_read_trylock"); - ret = __down_read_trylock(sem); - rwsemtrace(sem,"Leaving down_read_trylock"); - return ret; -} +extern int down_read_trylock(struct rw_semaphore *sem); /* * lock for writing */ -static inline void down_write(struct rw_semaphore *sem) -{ - might_sleep(); - rwsemtrace(sem,"Entering down_write"); - __down_write(sem); - rwsemtrace(sem,"Leaving down_write"); -} +extern void down_write(struct rw_semaphore *sem); /* * trylock for writing -- returns 1 if successful, 0 if contention */ -static inline int down_write_trylock(struct rw_semaphore *sem) -{ - int ret; - rwsemtrace(sem,"Entering down_write_trylock"); - ret = __down_write_trylock(sem); - rwsemtrace(sem,"Leaving down_write_trylock"); - return ret; -} +extern int down_write_trylock(struct rw_semaphore *sem); /* * release a read lock */ -static inline void up_read(struct rw_semaphore *sem) -{ - rwsemtrace(sem,"Entering up_read"); - __up_read(sem); - rwsemtrace(sem,"Leaving up_read"); -} +extern void up_read(struct rw_semaphore *sem); /* * release a write lock */ -static inline void up_write(struct rw_semaphore *sem) -{ - rwsemtrace(sem,"Entering up_write"); - __up_write(sem); - rwsemtrace(sem,"Leaving up_write"); -} +extern void up_write(struct rw_semaphore *sem); /* * downgrade write lock to read lock */ -static inline void downgrade_write(struct rw_semaphore *sem) -{ - rwsemtrace(sem,"Entering downgrade_write"); - __downgrade_write(sem); - rwsemtrace(sem,"Leaving downgrade_write"); -} +extern void downgrade_write(struct rw_semaphore *sem); + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +/* + * nested locking: + */ +extern void down_read_nested(struct rw_semaphore *sem, int subclass); +extern void down_write_nested(struct rw_semaphore *sem, int subclass); +/* + * Take/release a lock when not the owner will release it: + */ +extern void down_read_non_owner(struct rw_semaphore *sem); +extern void up_read_non_owner(struct rw_semaphore *sem); +#else +# define down_read_nested(sem, subclass) down_read(sem) +# define down_write_nested(sem, subclass) down_write(sem) +# define down_read_non_owner(sem) down_read(sem) +# define up_read_non_owner(sem) up_read(sem) +#endif #endif /* __KERNEL__ */ #endif /* _LINUX_RWSEM_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index aaf723308ed4..1c876e27ff93 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -184,11 +184,11 @@ extern unsigned long weighted_cpuload(const int cpu); extern rwlock_t tasklist_lock; extern spinlock_t mmlist_lock; -typedef struct task_struct task_t; +struct task_struct; extern void sched_init(void); extern void sched_init_smp(void); -extern void init_idle(task_t *idle, int cpu); +extern void init_idle(struct task_struct *idle, int cpu); extern cpumask_t nohz_cpu_mask; @@ -383,7 +383,7 @@ struct signal_struct { wait_queue_head_t wait_chldexit; /* for wait4() */ /* current thread group signal load-balancing target: */ - task_t *curr_target; + struct task_struct *curr_target; /* shared signal handling: */ struct sigpending shared_pending; @@ -534,7 +534,6 @@ extern struct user_struct *find_user(uid_t); extern struct user_struct root_user; #define INIT_USER (&root_user) -typedef struct prio_array prio_array_t; struct backing_dev_info; struct reclaim_state; @@ -699,7 +698,7 @@ extern int groups_search(struct group_info *group_info, gid_t grp); ((gi)->blocks[(i)/NGROUPS_PER_BLOCK][(i)%NGROUPS_PER_BLOCK]) #ifdef ARCH_HAS_PREFETCH_SWITCH_STACK -extern void prefetch_stack(struct task_struct*); +extern void prefetch_stack(struct task_struct *t); #else static inline void prefetch_stack(struct task_struct *t) { } #endif @@ -715,6 +714,8 @@ enum sleep_type { SLEEP_INTERRUPTED, }; +struct prio_array; + struct task_struct { volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ struct thread_info *thread_info; @@ -732,7 +733,7 @@ struct task_struct { int load_weight; /* for niceness load balancing purposes */ int prio, static_prio, normal_prio; struct list_head run_list; - prio_array_t *array; + struct prio_array *array; unsigned short ioprio; unsigned int btrace_seq; @@ -865,16 +866,34 @@ struct task_struct { struct plist_head pi_waiters; /* Deadlock detection and priority inheritance handling */ struct rt_mutex_waiter *pi_blocked_on; -# ifdef CONFIG_DEBUG_RT_MUTEXES - spinlock_t held_list_lock; - struct list_head held_list_head; -# endif #endif #ifdef CONFIG_DEBUG_MUTEXES /* mutex deadlock detection */ struct mutex_waiter *blocked_on; #endif +#ifdef CONFIG_TRACE_IRQFLAGS + unsigned int irq_events; + int hardirqs_enabled; + unsigned long hardirq_enable_ip; + unsigned int hardirq_enable_event; + unsigned long hardirq_disable_ip; + unsigned int hardirq_disable_event; + int softirqs_enabled; + unsigned long softirq_disable_ip; + unsigned int softirq_disable_event; + unsigned long softirq_enable_ip; + unsigned int softirq_enable_event; + int hardirq_context; + int softirq_context; +#endif +#ifdef CONFIG_LOCKDEP +# define MAX_LOCK_DEPTH 30UL + u64 curr_chain_key; + int lockdep_depth; + struct held_lock held_locks[MAX_LOCK_DEPTH]; + unsigned int lockdep_recursion; +#endif /* journalling filesystem info */ void *journal_info; @@ -1013,9 +1032,9 @@ static inline void put_task_struct(struct task_struct *t) #define used_math() tsk_used_math(current) #ifdef CONFIG_SMP -extern int set_cpus_allowed(task_t *p, cpumask_t new_mask); +extern int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask); #else -static inline int set_cpus_allowed(task_t *p, cpumask_t new_mask) +static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) { if (!cpu_isset(0, new_mask)) return -EINVAL; @@ -1024,7 +1043,8 @@ static inline int set_cpus_allowed(task_t *p, cpumask_t new_mask) #endif extern unsigned long long sched_clock(void); -extern unsigned long long current_sched_time(const task_t *current_task); +extern unsigned long long +current_sched_time(const struct task_struct *current_task); /* sched_exec is called by processes performing an exec */ #ifdef CONFIG_SMP @@ -1042,27 +1062,27 @@ static inline void idle_task_exit(void) {} extern void sched_idle_next(void); #ifdef CONFIG_RT_MUTEXES -extern int rt_mutex_getprio(task_t *p); -extern void rt_mutex_setprio(task_t *p, int prio); -extern void rt_mutex_adjust_pi(task_t *p); +extern int rt_mutex_getprio(struct task_struct *p); +extern void rt_mutex_setprio(struct task_struct *p, int prio); +extern void rt_mutex_adjust_pi(struct task_struct *p); #else -static inline int rt_mutex_getprio(task_t *p) +static inline int rt_mutex_getprio(struct task_struct *p) { return p->normal_prio; } # define rt_mutex_adjust_pi(p) do { } while (0) #endif -extern void set_user_nice(task_t *p, long nice); -extern int task_prio(const task_t *p); -extern int task_nice(const task_t *p); -extern int can_nice(const task_t *p, const int nice); -extern int task_curr(const task_t *p); +extern void set_user_nice(struct task_struct *p, long nice); +extern int task_prio(const struct task_struct *p); +extern int task_nice(const struct task_struct *p); +extern int can_nice(const struct task_struct *p, const int nice); +extern int task_curr(const struct task_struct *p); extern int idle_cpu(int cpu); extern int sched_setscheduler(struct task_struct *, int, struct sched_param *); -extern task_t *idle_task(int cpu); -extern task_t *curr_task(int cpu); -extern void set_curr_task(int cpu, task_t *p); +extern struct task_struct *idle_task(int cpu); +extern struct task_struct *curr_task(int cpu); +extern void set_curr_task(int cpu, struct task_struct *p); void yield(void); @@ -1119,8 +1139,8 @@ extern void FASTCALL(wake_up_new_task(struct task_struct * tsk, #else static inline void kick_process(struct task_struct *tsk) { } #endif -extern void FASTCALL(sched_fork(task_t * p, int clone_flags)); -extern void FASTCALL(sched_exit(task_t * p)); +extern void FASTCALL(sched_fork(struct task_struct * p, int clone_flags)); +extern void FASTCALL(sched_exit(struct task_struct * p)); extern int in_group_p(gid_t); extern int in_egroup_p(gid_t); @@ -1225,17 +1245,17 @@ extern NORET_TYPE void do_group_exit(int); extern void daemonize(const char *, ...); extern int allow_signal(int); extern int disallow_signal(int); -extern task_t *child_reaper; +extern struct task_struct *child_reaper; extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *); extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *); -task_t *fork_idle(int); +struct task_struct *fork_idle(int); extern void set_task_comm(struct task_struct *tsk, char *from); extern void get_task_comm(char *to, struct task_struct *tsk); #ifdef CONFIG_SMP -extern void wait_task_inactive(task_t * p); +extern void wait_task_inactive(struct task_struct * p); #else #define wait_task_inactive(p) do { } while (0) #endif @@ -1261,13 +1281,13 @@ extern void wait_task_inactive(task_t * p); /* de_thread depends on thread_group_leader not being a pid based check */ #define thread_group_leader(p) (p == p->group_leader) -static inline task_t *next_thread(const task_t *p) +static inline struct task_struct *next_thread(const struct task_struct *p) { return list_entry(rcu_dereference(p->thread_group.next), - task_t, thread_group); + struct task_struct, thread_group); } -static inline int thread_group_empty(task_t *p) +static inline int thread_group_empty(struct task_struct *p) { return list_empty(&p->thread_group); } diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 7bc5c7c12b54..46000936f8f1 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -38,9 +38,17 @@ typedef struct { * These macros triggered gcc-3.x compile-time problems. We think these are * OK now. Be cautious. */ -#define SEQLOCK_UNLOCKED { 0, SPIN_LOCK_UNLOCKED } -#define seqlock_init(x) do { *(x) = (seqlock_t) SEQLOCK_UNLOCKED; } while (0) +#define __SEQLOCK_UNLOCKED(lockname) \ + { 0, __SPIN_LOCK_UNLOCKED(lockname) } +#define SEQLOCK_UNLOCKED \ + __SEQLOCK_UNLOCKED(old_style_seqlock_init) + +#define seqlock_init(x) \ + do { *(x) = (seqlock_t) __SEQLOCK_UNLOCKED(x); } while (0) + +#define DEFINE_SEQLOCK(x) \ + seqlock_t x = __SEQLOCK_UNLOCKED(x) /* Lock out other writers and update the count. * Acts like a normal spin_lock/unlock. diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 57d7d4965f9a..3597b4f14389 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -604,9 +604,12 @@ static inline __u32 skb_queue_len(const struct sk_buff_head *list_) return list_->qlen; } +extern struct lock_class_key skb_queue_lock_key; + static inline void skb_queue_head_init(struct sk_buff_head *list) { spin_lock_init(&list->lock); + lockdep_set_class(&list->lock, &skb_queue_lock_key); list->prev = list->next = (struct sk_buff *)list; list->qlen = 0; } diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index ae23beef9cc9..31473db92d3b 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -82,14 +82,40 @@ extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock); /* * Pull the __raw*() functions/declarations (UP-nondebug doesnt need them): */ -#if defined(CONFIG_SMP) +#ifdef CONFIG_SMP # include <asm/spinlock.h> #else # include <linux/spinlock_up.h> #endif -#define spin_lock_init(lock) do { *(lock) = SPIN_LOCK_UNLOCKED; } while (0) -#define rwlock_init(lock) do { *(lock) = RW_LOCK_UNLOCKED; } while (0) +#ifdef CONFIG_DEBUG_SPINLOCK + extern void __spin_lock_init(spinlock_t *lock, const char *name, + struct lock_class_key *key); +# define spin_lock_init(lock) \ +do { \ + static struct lock_class_key __key; \ + \ + __spin_lock_init((lock), #lock, &__key); \ +} while (0) + +#else +# define spin_lock_init(lock) \ + do { *(lock) = SPIN_LOCK_UNLOCKED; } while (0) +#endif + +#ifdef CONFIG_DEBUG_SPINLOCK + extern void __rwlock_init(rwlock_t *lock, const char *name, + struct lock_class_key *key); +# define rwlock_init(lock) \ +do { \ + static struct lock_class_key __key; \ + \ + __rwlock_init((lock), #lock, &__key); \ +} while (0) +#else +# define rwlock_init(lock) \ + do { *(lock) = RW_LOCK_UNLOCKED; } while (0) +#endif #define spin_is_locked(lock) __raw_spin_is_locked(&(lock)->raw_lock) @@ -113,7 +139,6 @@ extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock); #define _raw_spin_lock_flags(lock, flags) _raw_spin_lock(lock) extern int _raw_spin_trylock(spinlock_t *lock); extern void _raw_spin_unlock(spinlock_t *lock); - extern void _raw_read_lock(rwlock_t *lock); extern int _raw_read_trylock(rwlock_t *lock); extern void _raw_read_unlock(rwlock_t *lock); @@ -121,17 +146,17 @@ extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock); extern int _raw_write_trylock(rwlock_t *lock); extern void _raw_write_unlock(rwlock_t *lock); #else -# define _raw_spin_unlock(lock) __raw_spin_unlock(&(lock)->raw_lock) -# define _raw_spin_trylock(lock) __raw_spin_trylock(&(lock)->raw_lock) # define _raw_spin_lock(lock) __raw_spin_lock(&(lock)->raw_lock) # define _raw_spin_lock_flags(lock, flags) \ __raw_spin_lock_flags(&(lock)->raw_lock, *(flags)) +# define _raw_spin_trylock(lock) __raw_spin_trylock(&(lock)->raw_lock) +# define _raw_spin_unlock(lock) __raw_spin_unlock(&(lock)->raw_lock) # define _raw_read_lock(rwlock) __raw_read_lock(&(rwlock)->raw_lock) -# define _raw_write_lock(rwlock) __raw_write_lock(&(rwlock)->raw_lock) -# define _raw_read_unlock(rwlock) __raw_read_unlock(&(rwlock)->raw_lock) -# define _raw_write_unlock(rwlock) __raw_write_unlock(&(rwlock)->raw_lock) # define _raw_read_trylock(rwlock) __raw_read_trylock(&(rwlock)->raw_lock) +# define _raw_read_unlock(rwlock) __raw_read_unlock(&(rwlock)->raw_lock) +# define _raw_write_lock(rwlock) __raw_write_lock(&(rwlock)->raw_lock) # define _raw_write_trylock(rwlock) __raw_write_trylock(&(rwlock)->raw_lock) +# define _raw_write_unlock(rwlock) __raw_write_unlock(&(rwlock)->raw_lock) #endif #define read_can_lock(rwlock) __raw_read_can_lock(&(rwlock)->raw_lock) @@ -147,6 +172,13 @@ extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock); #define write_trylock(lock) __cond_lock(_write_trylock(lock)) #define spin_lock(lock) _spin_lock(lock) + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define spin_lock_nested(lock, subclass) _spin_lock_nested(lock, subclass) +#else +# define spin_lock_nested(lock, subclass) _spin_lock(lock) +#endif + #define write_lock(lock) _write_lock(lock) #define read_lock(lock) _read_lock(lock) @@ -172,21 +204,18 @@ extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock); /* * We inline the unlock functions in the nondebug case: */ -#if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP) +#if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT) || \ + !defined(CONFIG_SMP) # define spin_unlock(lock) _spin_unlock(lock) # define read_unlock(lock) _read_unlock(lock) # define write_unlock(lock) _write_unlock(lock) -#else -# define spin_unlock(lock) __raw_spin_unlock(&(lock)->raw_lock) -# define read_unlock(lock) __raw_read_unlock(&(lock)->raw_lock) -# define write_unlock(lock) __raw_write_unlock(&(lock)->raw_lock) -#endif - -#if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP) # define spin_unlock_irq(lock) _spin_unlock_irq(lock) # define read_unlock_irq(lock) _read_unlock_irq(lock) # define write_unlock_irq(lock) _write_unlock_irq(lock) #else +# define spin_unlock(lock) __raw_spin_unlock(&(lock)->raw_lock) +# define read_unlock(lock) __raw_read_unlock(&(lock)->raw_lock) +# define write_unlock(lock) __raw_write_unlock(&(lock)->raw_lock) # define spin_unlock_irq(lock) \ do { __raw_spin_unlock(&(lock)->raw_lock); local_irq_enable(); } while (0) # define read_unlock_irq(lock) \ diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h index 78e6989ffb54..b2c4f8299464 100644 --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -20,6 +20,8 @@ int in_lock_functions(unsigned long addr); #define assert_spin_locked(x) BUG_ON(!spin_is_locked(x)) void __lockfunc _spin_lock(spinlock_t *lock) __acquires(spinlock_t); +void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass) + __acquires(spinlock_t); void __lockfunc _read_lock(rwlock_t *lock) __acquires(rwlock_t); void __lockfunc _write_lock(rwlock_t *lock) __acquires(rwlock_t); void __lockfunc _spin_lock_bh(spinlock_t *lock) __acquires(spinlock_t); diff --git a/include/linux/spinlock_api_up.h b/include/linux/spinlock_api_up.h index cd81cee566f4..67faa044c5f5 100644 --- a/include/linux/spinlock_api_up.h +++ b/include/linux/spinlock_api_up.h @@ -49,6 +49,7 @@ do { local_irq_restore(flags); __UNLOCK(lock); } while (0) #define _spin_lock(lock) __LOCK(lock) +#define _spin_lock_nested(lock, subclass) __LOCK(lock) #define _read_lock(lock) __LOCK(lock) #define _write_lock(lock) __LOCK(lock) #define _spin_lock_bh(lock) __LOCK_BH(lock) diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h index 9cb51e070390..dc5fb69e4de9 100644 --- a/include/linux/spinlock_types.h +++ b/include/linux/spinlock_types.h @@ -9,6 +9,8 @@ * Released under the General Public License (GPL). */ +#include <linux/lockdep.h> + #if defined(CONFIG_SMP) # include <asm/spinlock_types.h> #else @@ -24,6 +26,9 @@ typedef struct { unsigned int magic, owner_cpu; void *owner; #endif +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif } spinlock_t; #define SPINLOCK_MAGIC 0xdead4ead @@ -37,31 +42,53 @@ typedef struct { unsigned int magic, owner_cpu; void *owner; #endif +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif } rwlock_t; #define RWLOCK_MAGIC 0xdeaf1eed #define SPINLOCK_OWNER_INIT ((void *)-1L) +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } +#else +# define SPIN_DEP_MAP_INIT(lockname) +#endif + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } +#else +# define RW_DEP_MAP_INIT(lockname) +#endif + #ifdef CONFIG_DEBUG_SPINLOCK -# define SPIN_LOCK_UNLOCKED \ +# define __SPIN_LOCK_UNLOCKED(lockname) \ (spinlock_t) { .raw_lock = __RAW_SPIN_LOCK_UNLOCKED, \ .magic = SPINLOCK_MAGIC, \ .owner = SPINLOCK_OWNER_INIT, \ - .owner_cpu = -1 } -#define RW_LOCK_UNLOCKED \ + .owner_cpu = -1, \ + SPIN_DEP_MAP_INIT(lockname) } +#define __RW_LOCK_UNLOCKED(lockname) \ (rwlock_t) { .raw_lock = __RAW_RW_LOCK_UNLOCKED, \ .magic = RWLOCK_MAGIC, \ .owner = SPINLOCK_OWNER_INIT, \ - .owner_cpu = -1 } + .owner_cpu = -1, \ + RW_DEP_MAP_INIT(lockname) } #else -# define SPIN_LOCK_UNLOCKED \ - (spinlock_t) { .raw_lock = __RAW_SPIN_LOCK_UNLOCKED } -#define RW_LOCK_UNLOCKED \ - (rwlock_t) { .raw_lock = __RAW_RW_LOCK_UNLOCKED } +# define __SPIN_LOCK_UNLOCKED(lockname) \ + (spinlock_t) { .raw_lock = __RAW_SPIN_LOCK_UNLOCKED, \ + SPIN_DEP_MAP_INIT(lockname) } +#define __RW_LOCK_UNLOCKED(lockname) \ + (rwlock_t) { .raw_lock = __RAW_RW_LOCK_UNLOCKED, \ + RW_DEP_MAP_INIT(lockname) } #endif -#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED -#define DEFINE_RWLOCK(x) rwlock_t x = RW_LOCK_UNLOCKED +#define SPIN_LOCK_UNLOCKED __SPIN_LOCK_UNLOCKED(old_style_spin_init) +#define RW_LOCK_UNLOCKED __RW_LOCK_UNLOCKED(old_style_rw_init) + +#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) +#define DEFINE_RWLOCK(x) rwlock_t x = __RW_LOCK_UNLOCKED(x) #endif /* __LINUX_SPINLOCK_TYPES_H */ diff --git a/include/linux/spinlock_types_up.h b/include/linux/spinlock_types_up.h index 04135b0e198e..27644af20b7c 100644 --- a/include/linux/spinlock_types_up.h +++ b/include/linux/spinlock_types_up.h @@ -12,10 +12,14 @@ * Released under the General Public License (GPL). */ -#ifdef CONFIG_DEBUG_SPINLOCK +#if defined(CONFIG_DEBUG_SPINLOCK) || \ + defined(CONFIG_DEBUG_LOCK_ALLOC) typedef struct { volatile unsigned int slock; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif } raw_spinlock_t; #define __RAW_SPIN_LOCK_UNLOCKED { 1 } @@ -30,6 +34,9 @@ typedef struct { } raw_spinlock_t; typedef struct { /* no debug version on UP */ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif } raw_rwlock_t; #define __RAW_RW_LOCK_UNLOCKED { } diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h index 31accf2f0b13..ea54c4c9a4ec 100644 --- a/include/linux/spinlock_up.h +++ b/include/linux/spinlock_up.h @@ -18,7 +18,6 @@ */ #ifdef CONFIG_DEBUG_SPINLOCK - #define __raw_spin_is_locked(x) ((x)->slock == 0) static inline void __raw_spin_lock(raw_spinlock_t *lock) diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h new file mode 100644 index 000000000000..9cc81e572224 --- /dev/null +++ b/include/linux/stacktrace.h @@ -0,0 +1,20 @@ +#ifndef __LINUX_STACKTRACE_H +#define __LINUX_STACKTRACE_H + +#ifdef CONFIG_STACKTRACE +struct stack_trace { + unsigned int nr_entries, max_entries; + unsigned long *entries; +}; + +extern void save_stack_trace(struct stack_trace *trace, + struct task_struct *task, int all_contexts, + unsigned int skip); + +extern void print_stack_trace(struct stack_trace *trace, int spaces); +#else +# define save_stack_trace(trace, task, all, skip) do { } while (0) +# define print_stack_trace(trace) do { } while (0) +#endif + +#endif diff --git a/include/linux/swap.h b/include/linux/swap.h index cf6ca6e377bd..5e59184c9096 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -189,6 +189,7 @@ extern long vm_total_pages; #ifdef CONFIG_NUMA extern int zone_reclaim_mode; +extern int sysctl_min_unmapped_ratio; extern int zone_reclaim(struct zone *, gfp_t, unsigned int); #else #define zone_reclaim_mode 0 diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 46e4d8f2771f..e4b1a4d4dcf3 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -188,7 +188,7 @@ enum VM_DROP_PAGECACHE=29, /* int: nuke lots of pagecache */ VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */ VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off node */ - VM_ZONE_RECLAIM_INTERVAL=32, /* time period to wait after reclaim failure */ + VM_MIN_UNMAPPED=32, /* Set min percent of unmapped pages */ VM_PANIC_ON_OOM=33, /* panic at out-of-memory */ VM_VDSO_ENABLED=34, /* map VDSO into new processes? */ }; diff --git a/include/linux/wait.h b/include/linux/wait.h index 544e855c7c02..794be7af58ae 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -68,7 +68,7 @@ struct task_struct; wait_queue_t name = __WAITQUEUE_INITIALIZER(name, tsk) #define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \ - .lock = SPIN_LOCK_UNLOCKED, \ + .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ .task_list = { &(name).task_list, &(name).task_list } } #define DECLARE_WAIT_QUEUE_HEAD(name) \ @@ -77,9 +77,15 @@ struct task_struct; #define __WAIT_BIT_KEY_INITIALIZER(word, bit) \ { .flags = word, .bit_nr = bit, } +/* + * lockdep: we want one lock-class for all waitqueue locks. + */ +extern struct lock_class_key waitqueue_lock_key; + static inline void init_waitqueue_head(wait_queue_head_t *q) { spin_lock_init(&q->lock); + lockdep_set_class(&q->lock, &waitqueue_lock_key); INIT_LIST_HEAD(&q->task_list); } diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 5ba72d95280c..2fec827c8801 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -67,6 +67,9 @@ struct unix_skb_parms { #define unix_state_rlock(s) spin_lock(&unix_sk(s)->lock) #define unix_state_runlock(s) spin_unlock(&unix_sk(s)->lock) #define unix_state_wlock(s) spin_lock(&unix_sk(s)->lock) +#define unix_state_wlock_nested(s) \ + spin_lock_nested(&unix_sk(s)->lock, \ + SINGLE_DEPTH_NESTING) #define unix_state_wunlock(s) spin_unlock(&unix_sk(s)->lock) #ifdef __KERNEL__ diff --git a/include/net/sock.h b/include/net/sock.h index 7b3d6b856946..324b3ea233d6 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -44,6 +44,7 @@ #include <linux/timer.h> #include <linux/cache.h> #include <linux/module.h> +#include <linux/lockdep.h> #include <linux/netdevice.h> #include <linux/skbuff.h> /* struct sk_buff */ #include <linux/security.h> @@ -78,14 +79,17 @@ typedef struct { spinlock_t slock; struct sock_iocb *owner; wait_queue_head_t wq; + /* + * We express the mutex-alike socket_lock semantics + * to the lock validator by explicitly managing + * the slock as a lock variant (in addition to + * the slock itself): + */ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif } socket_lock_t; -#define sock_lock_init(__sk) \ -do { spin_lock_init(&((__sk)->sk_lock.slock)); \ - (__sk)->sk_lock.owner = NULL; \ - init_waitqueue_head(&((__sk)->sk_lock.wq)); \ -} while(0) - struct sock; struct proto; @@ -747,6 +751,9 @@ extern void FASTCALL(release_sock(struct sock *sk)); /* BH context may only use the following locking interface. */ #define bh_lock_sock(__sk) spin_lock(&((__sk)->sk_lock.slock)) +#define bh_lock_sock_nested(__sk) \ + spin_lock_nested(&((__sk)->sk_lock.slock), \ + SINGLE_DEPTH_NESTING) #define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock)) extern struct sock *sk_alloc(int family, diff --git a/init/main.c b/init/main.c index b2f3b566790e..628b8e9e841a 100644 --- a/init/main.c +++ b/init/main.c @@ -47,6 +47,8 @@ #include <linux/key.h> #include <linux/unwind.h> #include <linux/buffer_head.h> +#include <linux/debug_locks.h> +#include <linux/lockdep.h> #include <asm/io.h> #include <asm/bugs.h> @@ -456,6 +458,16 @@ asmlinkage void __init start_kernel(void) smp_setup_processor_id(); + /* + * Need to run as early as possible, to initialize the + * lockdep hash: + */ + lockdep_init(); + + local_irq_disable(); + early_boot_irqs_off(); + early_init_irq_lock_class(); + /* * Interrupts are still disabled. Do necessary setups, then * enable them @@ -496,8 +508,13 @@ asmlinkage void __init start_kernel(void) init_timers(); hrtimers_init(); softirq_init(); - time_init(); timekeeping_init(); + time_init(); + profile_init(); + if (!irqs_disabled()) + printk("start_kernel(): bug: interrupts were enabled early\n"); + early_boot_irqs_on(); + local_irq_enable(); /* * HACK ALERT! This is early. We're enabling the console before @@ -507,8 +524,16 @@ asmlinkage void __init start_kernel(void) console_init(); if (panic_later) panic(panic_later, panic_param); - profile_init(); - local_irq_enable(); + + lockdep_info(); + + /* + * Need to run this when irqs are enabled, because it wants + * to self-test [hard/soft]-irqs on/off lock inversion bugs + * too: + */ + locking_selftest(); + #ifdef CONFIG_BLK_DEV_INITRD if (initrd_start && !initrd_below_start_ok && initrd_start < min_low_pfn << PAGE_SHIFT) { diff --git a/kernel/Makefile b/kernel/Makefile index 82fb182f6f61..47dbcd570cd8 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -8,10 +8,15 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ signal.o sys.o kmod.o workqueue.o pid.o \ rcupdate.o extable.o params.o posix-timers.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ - hrtimer.o + hrtimer.o rwsem.o +obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += time/ obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o +obj-$(CONFIG_LOCKDEP) += lockdep.o +ifeq ($(CONFIG_PROC_FS),y) +obj-$(CONFIG_LOCKDEP) += lockdep_proc.o +endif obj-$(CONFIG_FUTEX) += futex.o ifeq ($(CONFIG_COMPAT),y) obj-$(CONFIG_FUTEX) += futex_compat.o @@ -22,6 +27,7 @@ obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o obj-$(CONFIG_SMP) += cpu.o spinlock.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o +obj-$(CONFIG_PROVE_LOCKING) += spinlock.o obj-$(CONFIG_UID16) += uid16.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_KALLSYMS) += kallsyms.o diff --git a/kernel/capability.c b/kernel/capability.c index 1a4d8a40d3f9..c7685ad00a97 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -46,7 +46,7 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr) int ret = 0; pid_t pid; __u32 version; - task_t *target; + struct task_struct *target; struct __user_cap_data_struct data; if (get_user(version, &header->version)) @@ -96,7 +96,7 @@ static inline int cap_set_pg(int pgrp, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted) { - task_t *g, *target; + struct task_struct *g, *target; int ret = -EPERM; int found = 0; @@ -128,7 +128,7 @@ static inline int cap_set_all(kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted) { - task_t *g, *target; + struct task_struct *g, *target; int ret = -EPERM; int found = 0; @@ -172,7 +172,7 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) { kernel_cap_t inheritable, permitted, effective; __u32 version; - task_t *target; + struct task_struct *target; int ret; pid_t pid; diff --git a/kernel/exit.c b/kernel/exit.c index 7f7ef2258553..6664c084783d 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -134,8 +134,8 @@ static void delayed_put_task_struct(struct rcu_head *rhp) void release_task(struct task_struct * p) { + struct task_struct *leader; int zap_leader; - task_t *leader; repeat: atomic_dec(&p->user->processes); write_lock_irq(&tasklist_lock); @@ -209,7 +209,7 @@ out: * * "I ask you, have you ever known what it is to be an orphan?" */ -static int will_become_orphaned_pgrp(int pgrp, task_t *ignored_task) +static int will_become_orphaned_pgrp(int pgrp, struct task_struct *ignored_task) { struct task_struct *p; int ret = 1; @@ -582,7 +582,8 @@ static void exit_mm(struct task_struct * tsk) mmput(mm); } -static inline void choose_new_parent(task_t *p, task_t *reaper) +static inline void +choose_new_parent(struct task_struct *p, struct task_struct *reaper) { /* * Make sure we're not reparenting to ourselves and that @@ -592,7 +593,8 @@ static inline void choose_new_parent(task_t *p, task_t *reaper) p->real_parent = reaper; } -static void reparent_thread(task_t *p, task_t *father, int traced) +static void +reparent_thread(struct task_struct *p, struct task_struct *father, int traced) { /* We don't want people slaying init. */ if (p->exit_signal != -1) @@ -656,8 +658,8 @@ static void reparent_thread(task_t *p, task_t *father, int traced) * group, and if no such member exists, give it to * the global child reaper process (ie "init") */ -static void forget_original_parent(struct task_struct * father, - struct list_head *to_release) +static void +forget_original_parent(struct task_struct *father, struct list_head *to_release) { struct task_struct *p, *reaper = father; struct list_head *_p, *_n; @@ -680,7 +682,7 @@ static void forget_original_parent(struct task_struct * father, */ list_for_each_safe(_p, _n, &father->children) { int ptrace; - p = list_entry(_p,struct task_struct,sibling); + p = list_entry(_p, struct task_struct, sibling); ptrace = p->ptrace; @@ -709,7 +711,7 @@ static void forget_original_parent(struct task_struct * father, list_add(&p->ptrace_list, to_release); } list_for_each_safe(_p, _n, &father->ptrace_children) { - p = list_entry(_p,struct task_struct,ptrace_list); + p = list_entry(_p, struct task_struct, ptrace_list); choose_new_parent(p, reaper); reparent_thread(p, father, 1); } @@ -829,7 +831,7 @@ static void exit_notify(struct task_struct *tsk) list_for_each_safe(_p, _n, &ptrace_dead) { list_del_init(_p); - t = list_entry(_p,struct task_struct,ptrace_list); + t = list_entry(_p, struct task_struct, ptrace_list); release_task(t); } @@ -933,10 +935,9 @@ fastcall NORET_TYPE void do_exit(long code) if (unlikely(current->pi_state_cache)) kfree(current->pi_state_cache); /* - * If DEBUG_MUTEXES is on, make sure we are holding no locks: + * Make sure we are holding no locks: */ - mutex_debug_check_no_locks_held(tsk); - rt_mutex_debug_check_no_locks_held(tsk); + debug_check_no_locks_held(tsk); if (tsk->io_context) exit_io_context(); @@ -1011,7 +1012,7 @@ asmlinkage void sys_exit_group(int error_code) do_group_exit((error_code & 0xff) << 8); } -static int eligible_child(pid_t pid, int options, task_t *p) +static int eligible_child(pid_t pid, int options, struct task_struct *p) { if (pid > 0) { if (p->pid != pid) @@ -1052,12 +1053,13 @@ static int eligible_child(pid_t pid, int options, task_t *p) return 1; } -static int wait_noreap_copyout(task_t *p, pid_t pid, uid_t uid, +static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid, int why, int status, struct siginfo __user *infop, struct rusage __user *rusagep) { int retval = rusagep ? getrusage(p, RUSAGE_BOTH, rusagep) : 0; + put_task_struct(p); if (!retval) retval = put_user(SIGCHLD, &infop->si_signo); @@ -1082,7 +1084,7 @@ static int wait_noreap_copyout(task_t *p, pid_t pid, uid_t uid, * the lock and this task is uninteresting. If we return nonzero, we have * released the lock and the system call should return. */ -static int wait_task_zombie(task_t *p, int noreap, +static int wait_task_zombie(struct task_struct *p, int noreap, struct siginfo __user *infop, int __user *stat_addr, struct rusage __user *ru) { @@ -1244,8 +1246,8 @@ static int wait_task_zombie(task_t *p, int noreap, * the lock and this task is uninteresting. If we return nonzero, we have * released the lock and the system call should return. */ -static int wait_task_stopped(task_t *p, int delayed_group_leader, int noreap, - struct siginfo __user *infop, +static int wait_task_stopped(struct task_struct *p, int delayed_group_leader, + int noreap, struct siginfo __user *infop, int __user *stat_addr, struct rusage __user *ru) { int retval, exit_code; @@ -1359,7 +1361,7 @@ bail_ref: * the lock and this task is uninteresting. If we return nonzero, we have * released the lock and the system call should return. */ -static int wait_task_continued(task_t *p, int noreap, +static int wait_task_continued(struct task_struct *p, int noreap, struct siginfo __user *infop, int __user *stat_addr, struct rusage __user *ru) { @@ -1445,7 +1447,7 @@ repeat: int ret; list_for_each(_p,&tsk->children) { - p = list_entry(_p,struct task_struct,sibling); + p = list_entry(_p, struct task_struct, sibling); ret = eligible_child(pid, options, p); if (!ret) diff --git a/kernel/fork.c b/kernel/fork.c index 9064bf9e131b..56e4e07e45f7 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -193,7 +193,10 @@ static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) down_write(&oldmm->mmap_sem); flush_cache_mm(oldmm); - down_write(&mm->mmap_sem); + /* + * Not linked in yet - no deadlock potential: + */ + down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING); mm->locked_vm = 0; mm->mmap = NULL; @@ -919,10 +922,6 @@ static inline void rt_mutex_init_task(struct task_struct *p) spin_lock_init(&p->pi_lock); plist_head_init(&p->pi_waiters, &p->pi_lock); p->pi_blocked_on = NULL; -# ifdef CONFIG_DEBUG_RT_MUTEXES - spin_lock_init(&p->held_list_lock); - INIT_LIST_HEAD(&p->held_list_head); -# endif #endif } @@ -934,13 +933,13 @@ static inline void rt_mutex_init_task(struct task_struct *p) * parts of the process environment (as per the clone * flags). The actual kick-off is left to the caller. */ -static task_t *copy_process(unsigned long clone_flags, - unsigned long stack_start, - struct pt_regs *regs, - unsigned long stack_size, - int __user *parent_tidptr, - int __user *child_tidptr, - int pid) +static struct task_struct *copy_process(unsigned long clone_flags, + unsigned long stack_start, + struct pt_regs *regs, + unsigned long stack_size, + int __user *parent_tidptr, + int __user *child_tidptr, + int pid) { int retval; struct task_struct *p = NULL; @@ -972,6 +971,10 @@ static task_t *copy_process(unsigned long clone_flags, if (!p) goto fork_out; +#ifdef CONFIG_TRACE_IRQFLAGS + DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); + DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); +#endif retval = -EAGAIN; if (atomic_read(&p->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { @@ -1046,6 +1049,26 @@ static task_t *copy_process(unsigned long clone_flags, } mpol_fix_fork_child_flag(p); #endif +#ifdef CONFIG_TRACE_IRQFLAGS + p->irq_events = 0; + p->hardirqs_enabled = 0; + p->hardirq_enable_ip = 0; + p->hardirq_enable_event = 0; + p->hardirq_disable_ip = _THIS_IP_; + p->hardirq_disable_event = 0; + p->softirqs_enabled = 1; + p->softirq_enable_ip = _THIS_IP_; + p->softirq_enable_event = 0; + p->softirq_disable_ip = 0; + p->softirq_disable_event = 0; + p->hardirq_context = 0; + p->softirq_context = 0; +#endif +#ifdef CONFIG_LOCKDEP + p->lockdep_depth = 0; /* no locks held yet */ + p->curr_chain_key = 0; + p->lockdep_recursion = 0; +#endif rt_mutex_init_task(p); @@ -1271,9 +1294,9 @@ struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs *regs) return regs; } -task_t * __devinit fork_idle(int cpu) +struct task_struct * __devinit fork_idle(int cpu) { - task_t *task; + struct task_struct *task; struct pt_regs regs; task = copy_process(CLONE_VM, 0, idle_regs(®s), 0, NULL, NULL, 0); diff --git a/kernel/futex.c b/kernel/futex.c index 15caf93e4a43..1dc98e4dd287 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -607,6 +607,22 @@ static int unlock_futex_pi(u32 __user *uaddr, u32 uval) } /* + * Express the locking dependencies for lockdep: + */ +static inline void +double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2) +{ + if (hb1 <= hb2) { + spin_lock(&hb1->lock); + if (hb1 < hb2) + spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING); + } else { /* hb1 > hb2 */ + spin_lock(&hb2->lock); + spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING); + } +} + +/* * Wake up all waiters hashed on the physical page that is mapped * to this virtual address: */ @@ -674,11 +690,7 @@ retryfull: hb2 = hash_futex(&key2); retry: - if (hb1 < hb2) - spin_lock(&hb1->lock); - spin_lock(&hb2->lock); - if (hb1 > hb2) - spin_lock(&hb1->lock); + double_lock_hb(hb1, hb2); op_ret = futex_atomic_op_inuser(op, uaddr2); if (unlikely(op_ret < 0)) { @@ -787,11 +799,7 @@ static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2, hb1 = hash_futex(&key1); hb2 = hash_futex(&key2); - if (hb1 < hb2) - spin_lock(&hb1->lock); - spin_lock(&hb2->lock); - if (hb1 > hb2) - spin_lock(&hb1->lock); + double_lock_hb(hb1, hb2); if (likely(cmpval != NULL)) { u32 curval; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 8d3dc29ef41a..d17766d40dab 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -669,7 +669,7 @@ static int hrtimer_wakeup(struct hrtimer *timer) return HRTIMER_NORESTART; } -void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, task_t *task) +void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task) { sl->timer.function = hrtimer_wakeup; sl->task = task; @@ -782,8 +782,10 @@ static void __devinit init_hrtimers_cpu(int cpu) struct hrtimer_base *base = per_cpu(hrtimer_bases, cpu); int i; - for (i = 0; i < MAX_HRTIMER_BASES; i++, base++) + for (i = 0; i < MAX_HRTIMER_BASES; i++, base++) { spin_lock_init(&base->lock); + lockdep_set_class(&base->lock, &base->lock_key); + } } #ifdef CONFIG_HOTPLUG_CPU diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 54105bdfe20d..9336f2e89e40 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -261,10 +261,13 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs) * keep it masked and get out of here */ action = desc->action; - if (unlikely(!action || (desc->status & IRQ_DISABLED))) + if (unlikely(!action || (desc->status & IRQ_DISABLED))) { + desc->status |= IRQ_PENDING; goto out; + } desc->status |= IRQ_INPROGRESS; + desc->status &= ~IRQ_PENDING; spin_unlock(&desc->lock); action_ret = handle_IRQ_event(irq, regs, action); diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index aeb6e391276c..fc4e906aedbd 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -132,7 +132,7 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct pt_regs *regs, handle_dynamic_tick(action); if (!(action->flags & IRQF_DISABLED)) - local_irq_enable(); + local_irq_enable_in_hardirq(); do { ret = action->handler(irq, action->dev_id, regs); @@ -249,3 +249,19 @@ out: return 1; } +#ifdef CONFIG_TRACE_IRQFLAGS + +/* + * lockdep: we want to handle all irq_desc locks as a single lock-class: + */ +static struct lock_class_key irq_desc_lock_class; + +void early_init_irq_lock_class(void) +{ + int i; + + for (i = 0; i < NR_IRQS; i++) + lockdep_set_class(&irq_desc[i].lock, &irq_desc_lock_class); +} + +#endif diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index c911c6ec4dd6..4e461438e48b 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -410,6 +410,12 @@ int request_irq(unsigned int irq, struct irqaction *action; int retval; +#ifdef CONFIG_LOCKDEP + /* + * Lockdep wants atomic interrupt handlers: + */ + irqflags |= SA_INTERRUPT; +#endif /* * Sanity-check: shared interrupts must pass in a real dev-ID, * otherwise we'll have trouble later trying to figure out diff --git a/kernel/kmod.c b/kernel/kmod.c index 1b7157af051c..1d32defa38ab 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -233,7 +233,7 @@ static void __call_usermodehelper(void *data) int call_usermodehelper_keys(char *path, char **argv, char **envp, struct key *session_keyring, int wait) { - DECLARE_COMPLETION(done); + DECLARE_COMPLETION_ONSTACK(done); struct subprocess_info sub_info = { .complete = &done, .path = path, diff --git a/kernel/lockdep.c b/kernel/lockdep.c new file mode 100644 index 000000000000..f32ca78c198d --- /dev/null +++ b/kernel/lockdep.c @@ -0,0 +1,2702 @@ +/* + * kernel/lockdep.c + * + * Runtime locking correctness validator + * + * Started by Ingo Molnar: + * + * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + * + * this code maps all the lock dependencies as they occur in a live kernel + * and will warn about the following classes of locking bugs: + * + * - lock inversion scenarios + * - circular lock dependencies + * - hardirq/softirq safe/unsafe locking bugs + * + * Bugs are reported even if the current locking scenario does not cause + * any deadlock at this point. + * + * I.e. if anytime in the past two locks were taken in a different order, + * even if it happened for another task, even if those were different + * locks (but of the same class as this lock), this code will detect it. + * + * Thanks to Arjan van de Ven for coming up with the initial idea of + * mapping lock dependencies runtime. + */ +#include <linux/mutex.h> +#include <linux/sched.h> +#include <linux/delay.h> +#include <linux/module.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/spinlock.h> +#include <linux/kallsyms.h> +#include <linux/interrupt.h> +#include <linux/stacktrace.h> +#include <linux/debug_locks.h> +#include <linux/irqflags.h> + +#include <asm/sections.h> + +#include "lockdep_internals.h" + +/* + * hash_lock: protects the lockdep hashes and class/list/hash allocators. + * + * This is one of the rare exceptions where it's justified + * to use a raw spinlock - we really dont want the spinlock + * code to recurse back into the lockdep code. + */ +static raw_spinlock_t hash_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + +static int lockdep_initialized; + +unsigned long nr_list_entries; +static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES]; + +/* + * Allocate a lockdep entry. (assumes hash_lock held, returns + * with NULL on failure) + */ +static struct lock_list *alloc_list_entry(void) +{ + if (nr_list_entries >= MAX_LOCKDEP_ENTRIES) { + __raw_spin_unlock(&hash_lock); + debug_locks_off(); + printk("BUG: MAX_LOCKDEP_ENTRIES too low!\n"); + printk("turning off the locking correctness validator.\n"); + return NULL; + } + return list_entries + nr_list_entries++; +} + +/* + * All data structures here are protected by the global debug_lock. + * + * Mutex key structs only get allocated, once during bootup, and never + * get freed - this significantly simplifies the debugging code. + */ +unsigned long nr_lock_classes; +static struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; + +/* + * We keep a global list of all lock classes. The list only grows, + * never shrinks. The list is only accessed with the lockdep + * spinlock lock held. + */ +LIST_HEAD(all_lock_classes); + +/* + * The lockdep classes are in a hash-table as well, for fast lookup: + */ +#define CLASSHASH_BITS (MAX_LOCKDEP_KEYS_BITS - 1) +#define CLASSHASH_SIZE (1UL << CLASSHASH_BITS) +#define CLASSHASH_MASK (CLASSHASH_SIZE - 1) +#define __classhashfn(key) ((((unsigned long)key >> CLASSHASH_BITS) + (unsigned long)key) & CLASSHASH_MASK) +#define classhashentry(key) (classhash_table + __classhashfn((key))) + +static struct list_head classhash_table[CLASSHASH_SIZE]; + +unsigned long nr_lock_chains; +static struct lock_chain lock_chains[MAX_LOCKDEP_CHAINS]; + +/* + * We put the lock dependency chains into a hash-table as well, to cache + * their existence: + */ +#define CHAINHASH_BITS (MAX_LOCKDEP_CHAINS_BITS-1) +#define CHAINHASH_SIZE (1UL << CHAINHASH_BITS) +#define CHAINHASH_MASK (CHAINHASH_SIZE - 1) +#define __chainhashfn(chain) \ + (((chain >> CHAINHASH_BITS) + chain) & CHAINHASH_MASK) +#define chainhashentry(chain) (chainhash_table + __chainhashfn((chain))) + +static struct list_head chainhash_table[CHAINHASH_SIZE]; + +/* + * The hash key of the lock dependency chains is a hash itself too: + * it's a hash of all locks taken up to that lock, including that lock. + * It's a 64-bit hash, because it's important for the keys to be + * unique. + */ +#define iterate_chain_key(key1, key2) \ + (((key1) << MAX_LOCKDEP_KEYS_BITS/2) ^ \ + ((key1) >> (64-MAX_LOCKDEP_KEYS_BITS/2)) ^ \ + (key2)) + +void lockdep_off(void) +{ + current->lockdep_recursion++; +} + +EXPORT_SYMBOL(lockdep_off); + +void lockdep_on(void) +{ + current->lockdep_recursion--; +} + +EXPORT_SYMBOL(lockdep_on); + +int lockdep_internal(void) +{ + return current->lockdep_recursion != 0; +} + +EXPORT_SYMBOL(lockdep_internal); + +/* + * Debugging switches: + */ + +#define VERBOSE 0 +#ifdef VERBOSE +# define VERY_VERBOSE 0 +#endif + +#if VERBOSE +# define HARDIRQ_VERBOSE 1 +# define SOFTIRQ_VERBOSE 1 +#else +# define HARDIRQ_VERBOSE 0 +# define SOFTIRQ_VERBOSE 0 +#endif + +#if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE +/* + * Quick filtering for interesting events: + */ +static int class_filter(struct lock_class *class) +{ + if (class->name_version == 1 && + !strcmp(class->name, "&rl->lock")) + return 1; + if (class->name_version == 1 && + !strcmp(class->name, "&ni->mrec_lock")) + return 1; + if (class->name_version == 1 && + !strcmp(class->name, "mft_ni_runlist_lock")) + return 1; + if (class->name_version == 1 && + !strcmp(class->name, "mft_ni_mrec_lock")) + return 1; + if (class->name_version == 1 && + !strcmp(class->name, "&vol->lcnbmp_lock")) + return 1; + return 0; +} +#endif + +static int verbose(struct lock_class *class) +{ +#if VERBOSE + return class_filter(class); +#endif + return 0; +} + +#ifdef CONFIG_TRACE_IRQFLAGS + +static int hardirq_verbose(struct lock_class *class) +{ +#if HARDIRQ_VERBOSE + return class_filter(class); +#endif + return 0; +} + +static int softirq_verbose(struct lock_class *class) +{ +#if SOFTIRQ_VERBOSE + return class_filter(class); +#endif + return 0; +} + +#endif + +/* + * Stack-trace: tightly packed array of stack backtrace + * addresses. Protected by the hash_lock. + */ +unsigned long nr_stack_trace_entries; +static unsigned long stack_trace[MAX_STACK_TRACE_ENTRIES]; + +static int save_trace(struct stack_trace *trace) +{ + trace->nr_entries = 0; + trace->max_entries = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries; + trace->entries = stack_trace + nr_stack_trace_entries; + + save_stack_trace(trace, NULL, 0, 3); + + trace->max_entries = trace->nr_entries; + + nr_stack_trace_entries += trace->nr_entries; + if (DEBUG_LOCKS_WARN_ON(nr_stack_trace_entries > MAX_STACK_TRACE_ENTRIES)) + return 0; + + if (nr_stack_trace_entries == MAX_STACK_TRACE_ENTRIES) { + __raw_spin_unlock(&hash_lock); + if (debug_locks_off()) { + printk("BUG: MAX_STACK_TRACE_ENTRIES too low!\n"); + printk("turning off the locking correctness validator.\n"); + dump_stack(); + } + return 0; + } + + return 1; +} + +unsigned int nr_hardirq_chains; +unsigned int nr_softirq_chains; +unsigned int nr_process_chains; +unsigned int max_lockdep_depth; +unsigned int max_recursion_depth; + +#ifdef CONFIG_DEBUG_LOCKDEP +/* + * We cannot printk in early bootup code. Not even early_printk() + * might work. So we mark any initialization errors and printk + * about it later on, in lockdep_info(). + */ +static int lockdep_init_error; + +/* + * Various lockdep statistics: + */ +atomic_t chain_lookup_hits; +atomic_t chain_lookup_misses; +atomic_t hardirqs_on_events; +atomic_t hardirqs_off_events; +atomic_t redundant_hardirqs_on; +atomic_t redundant_hardirqs_off; +atomic_t softirqs_on_events; +atomic_t softirqs_off_events; +atomic_t redundant_softirqs_on; +atomic_t redundant_softirqs_off; +atomic_t nr_unused_locks; +atomic_t nr_cyclic_checks; +atomic_t nr_cyclic_check_recursions; +atomic_t nr_find_usage_forwards_checks; +atomic_t nr_find_usage_forwards_recursions; +atomic_t nr_find_usage_backwards_checks; +atomic_t nr_find_usage_backwards_recursions; +# define debug_atomic_inc(ptr) atomic_inc(ptr) +# define debug_atomic_dec(ptr) atomic_dec(ptr) +# define debug_atomic_read(ptr) atomic_read(ptr) +#else +# define debug_atomic_inc(ptr) do { } while (0) +# define debug_atomic_dec(ptr) do { } while (0) +# define debug_atomic_read(ptr) 0 +#endif + +/* + * Locking printouts: + */ + +static const char *usage_str[] = +{ + [LOCK_USED] = "initial-use ", + [LOCK_USED_IN_HARDIRQ] = "in-hardirq-W", + [LOCK_USED_IN_SOFTIRQ] = "in-softirq-W", + [LOCK_ENABLED_SOFTIRQS] = "softirq-on-W", + [LOCK_ENABLED_HARDIRQS] = "hardirq-on-W", + [LOCK_USED_IN_HARDIRQ_READ] = "in-hardirq-R", + [LOCK_USED_IN_SOFTIRQ_READ] = "in-softirq-R", + [LOCK_ENABLED_SOFTIRQS_READ] = "softirq-on-R", + [LOCK_ENABLED_HARDIRQS_READ] = "hardirq-on-R", +}; + +const char * __get_key_name(struct lockdep_subclass_key *key, char *str) +{ + unsigned long offs, size; + char *modname; + + return kallsyms_lookup((unsigned long)key, &size, &offs, &modname, str); +} + +void +get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4) +{ + *c1 = '.', *c2 = '.', *c3 = '.', *c4 = '.'; + + if (class->usage_mask & LOCKF_USED_IN_HARDIRQ) + *c1 = '+'; + else + if (class->usage_mask & LOCKF_ENABLED_HARDIRQS) + *c1 = '-'; + + if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ) + *c2 = '+'; + else + if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS) + *c2 = '-'; + + if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ) + *c3 = '-'; + if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ) { + *c3 = '+'; + if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ) + *c3 = '?'; + } + + if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ) + *c4 = '-'; + if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ) { + *c4 = '+'; + if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ) + *c4 = '?'; + } +} + +static void print_lock_name(struct lock_class *class) +{ + char str[128], c1, c2, c3, c4; + const char *name; + + get_usage_chars(class, &c1, &c2, &c3, &c4); + + name = class->name; + if (!name) { + name = __get_key_name(class->key, str); + printk(" (%s", name); + } else { + printk(" (%s", name); + if (class->name_version > 1) + printk("#%d", class->name_version); + if (class->subclass) + printk("/%d", class->subclass); + } + printk("){%c%c%c%c}", c1, c2, c3, c4); +} + +static void print_lockdep_cache(struct lockdep_map *lock) +{ + const char *name; + char str[128]; + + name = lock->name; + if (!name) + name = __get_key_name(lock->key->subkeys, str); + + printk("%s", name); +} + +static void print_lock(struct held_lock *hlock) +{ + print_lock_name(hlock->class); + printk(", at: "); + print_ip_sym(hlock->acquire_ip); +} + +static void lockdep_print_held_locks(struct task_struct *curr) +{ + int i, depth = curr->lockdep_depth; + + if (!depth) { + printk("no locks held by %s/%d.\n", curr->comm, curr->pid); + return; + } + printk("%d lock%s held by %s/%d:\n", + depth, depth > 1 ? "s" : "", curr->comm, curr->pid); + + for (i = 0; i < depth; i++) { + printk(" #%d: ", i); + print_lock(curr->held_locks + i); + } +} +/* + * Helper to print a nice hierarchy of lock dependencies: + */ +static void print_spaces(int nr) +{ + int i; + + for (i = 0; i < nr; i++) + printk(" "); +} + +static void print_lock_class_header(struct lock_class *class, int depth) +{ + int bit; + + print_spaces(depth); + printk("->"); + print_lock_name(class); + printk(" ops: %lu", class->ops); + printk(" {\n"); + + for (bit = 0; bit < LOCK_USAGE_STATES; bit++) { + if (class->usage_mask & (1 << bit)) { + int len = depth; + + print_spaces(depth); + len += printk(" %s", usage_str[bit]); + len += printk(" at:\n"); + print_stack_trace(class->usage_traces + bit, len); + } + } + print_spaces(depth); + printk(" }\n"); + + print_spaces(depth); + printk(" ... key at: "); + print_ip_sym((unsigned long)class->key); +} + +/* + * printk all lock dependencies starting at <entry>: + */ +static void print_lock_dependencies(struct lock_class *class, int depth) +{ + struct lock_list *entry; + + if (DEBUG_LOCKS_WARN_ON(depth >= 20)) + return; + + print_lock_class_header(class, depth); + + list_for_each_entry(entry, &class->locks_after, entry) { + DEBUG_LOCKS_WARN_ON(!entry->class); + print_lock_dependencies(entry->class, depth + 1); + + print_spaces(depth); + printk(" ... acquired at:\n"); + print_stack_trace(&entry->trace, 2); + printk("\n"); + } +} + +/* + * Add a new dependency to the head of the list: + */ +static int add_lock_to_list(struct lock_class *class, struct lock_class *this, + struct list_head *head, unsigned long ip) +{ + struct lock_list *entry; + /* + * Lock not present yet - get a new dependency struct and + * add it to the list: + */ + entry = alloc_list_entry(); + if (!entry) + return 0; + + entry->class = this; + save_trace(&entry->trace); + + /* + * Since we never remove from the dependency list, the list can + * be walked lockless by other CPUs, it's only allocation + * that must be protected by the spinlock. But this also means + * we must make new entries visible only once writes to the + * entry become visible - hence the RCU op: + */ + list_add_tail_rcu(&entry->entry, head); + + return 1; +} + +/* + * Recursive, forwards-direction lock-dependency checking, used for + * both noncyclic checking and for hardirq-unsafe/softirq-unsafe + * checking. + * + * (to keep the stackframe of the recursive functions small we + * use these global variables, and we also mark various helper + * functions as noinline.) + */ +static struct held_lock *check_source, *check_target; + +/* + * Print a dependency chain entry (this is only done when a deadlock + * has been detected): + */ +static noinline int +print_circular_bug_entry(struct lock_list *target, unsigned int depth) +{ + if (debug_locks_silent) + return 0; + printk("\n-> #%u", depth); + print_lock_name(target->class); + printk(":\n"); + print_stack_trace(&target->trace, 6); + + return 0; +} + +/* + * When a circular dependency is detected, print the + * header first: + */ +static noinline int +print_circular_bug_header(struct lock_list *entry, unsigned int depth) +{ + struct task_struct *curr = current; + + __raw_spin_unlock(&hash_lock); + debug_locks_off(); + if (debug_locks_silent) + return 0; + + printk("\n=======================================================\n"); + printk( "[ INFO: possible circular locking dependency detected ]\n"); + printk( "-------------------------------------------------------\n"); + printk("%s/%d is trying to acquire lock:\n", + curr->comm, curr->pid); + print_lock(check_source); + printk("\nbut task is already holding lock:\n"); + print_lock(check_target); + printk("\nwhich lock already depends on the new lock.\n\n"); + printk("\nthe existing dependency chain (in reverse order) is:\n"); + + print_circular_bug_entry(entry, depth); + + return 0; +} + +static noinline int print_circular_bug_tail(void) +{ + struct task_struct *curr = current; + struct lock_list this; + + if (debug_locks_silent) + return 0; + + this.class = check_source->class; + save_trace(&this.trace); + print_circular_bug_entry(&this, 0); + + printk("\nother info that might help us debug this:\n\n"); + lockdep_print_held_locks(curr); + + printk("\nstack backtrace:\n"); + dump_stack(); + + return 0; +} + +static int noinline print_infinite_recursion_bug(void) +{ + __raw_spin_unlock(&hash_lock); + DEBUG_LOCKS_WARN_ON(1); + + return 0; +} + +/* + * Prove that the dependency graph starting at <entry> can not + * lead to <target>. Print an error and return 0 if it does. + */ +static noinline int +check_noncircular(struct lock_class *source, unsigned int depth) +{ + struct lock_list *entry; + + debug_atomic_inc(&nr_cyclic_check_recursions); + if (depth > max_recursion_depth) + max_recursion_depth = depth; + if (depth >= 20) + return print_infinite_recursion_bug(); + /* + * Check this lock's dependency list: + */ + list_for_each_entry(entry, &source->locks_after, entry) { + if (entry->class == check_target->class) + return print_circular_bug_header(entry, depth+1); + debug_atomic_inc(&nr_cyclic_checks); + if (!check_noncircular(entry->class, depth+1)) + return print_circular_bug_entry(entry, depth+1); + } + return 1; +} + +static int very_verbose(struct lock_class *class) +{ +#if VERY_VERBOSE + return class_filter(class); +#endif + return 0; +} +#ifdef CONFIG_TRACE_IRQFLAGS + +/* + * Forwards and backwards subgraph searching, for the purposes of + * proving that two subgraphs can be connected by a new dependency + * without creating any illegal irq-safe -> irq-unsafe lock dependency. + */ +static enum lock_usage_bit find_usage_bit; +static struct lock_class *forwards_match, *backwards_match; + +/* + * Find a node in the forwards-direction dependency sub-graph starting + * at <source> that matches <find_usage_bit>. + * + * Return 2 if such a node exists in the subgraph, and put that node + * into <forwards_match>. + * + * Return 1 otherwise and keep <forwards_match> unchanged. + * Return 0 on error. + */ +static noinline int +find_usage_forwards(struct lock_class *source, unsigned int depth) +{ + struct lock_list *entry; + int ret; + + if (depth > max_recursion_depth) + max_recursion_depth = depth; + if (depth >= 20) + return print_infinite_recursion_bug(); + + debug_atomic_inc(&nr_find_usage_forwards_checks); + if (source->usage_mask & (1 << find_usage_bit)) { + forwards_match = source; + return 2; + } + + /* + * Check this lock's dependency list: + */ + list_for_each_entry(entry, &source->locks_after, entry) { + debug_atomic_inc(&nr_find_usage_forwards_recursions); + ret = find_usage_forwards(entry->class, depth+1); + if (ret == 2 || ret == 0) + return ret; + } + return 1; +} + +/* + * Find a node in the backwards-direction dependency sub-graph starting + * at <source> that matches <find_usage_bit>. + * + * Return 2 if such a node exists in the subgraph, and put that node + * into <backwards_match>. + * + * Return 1 otherwise and keep <backwards_match> unchanged. + * Return 0 on error. + */ +static noinline int +find_usage_backwards(struct lock_class *source, unsigned int depth) +{ + struct lock_list *entry; + int ret; + + if (depth > max_recursion_depth) + max_recursion_depth = depth; + if (depth >= 20) + return print_infinite_recursion_bug(); + + debug_atomic_inc(&nr_find_usage_backwards_checks); + if (source->usage_mask & (1 << find_usage_bit)) { + backwards_match = source; + return 2; + } + + /* + * Check this lock's dependency list: + */ + list_for_each_entry(entry, &source->locks_before, entry) { + debug_atomic_inc(&nr_find_usage_backwards_recursions); + ret = find_usage_backwards(entry->class, depth+1); + if (ret == 2 || ret == 0) + return ret; + } + return 1; +} + +static int +print_bad_irq_dependency(struct task_struct *curr, + struct held_lock *prev, + struct held_lock *next, + enum lock_usage_bit bit1, + enum lock_usage_bit bit2, + const char *irqclass) +{ + __raw_spin_unlock(&hash_lock); + debug_locks_off(); + if (debug_locks_silent) + return 0; + + printk("\n======================================================\n"); + printk( "[ INFO: %s-safe -> %s-unsafe lock order detected ]\n", + irqclass, irqclass); + printk( "------------------------------------------------------\n"); + printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n", + curr->comm, curr->pid, + curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT, + curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT, + curr->hardirqs_enabled, + curr->softirqs_enabled); + print_lock(next); + + printk("\nand this task is already holding:\n"); + print_lock(prev); + printk("which would create a new lock dependency:\n"); + print_lock_name(prev->class); + printk(" ->"); + print_lock_name(next->class); + printk("\n"); + + printk("\nbut this new dependency connects a %s-irq-safe lock:\n", + irqclass); + print_lock_name(backwards_match); + printk("\n... which became %s-irq-safe at:\n", irqclass); + + print_stack_trace(backwards_match->usage_traces + bit1, 1); + + printk("\nto a %s-irq-unsafe lock:\n", irqclass); + print_lock_name(forwards_match); + printk("\n... which became %s-irq-unsafe at:\n", irqclass); + printk("..."); + + print_stack_trace(forwards_match->usage_traces + bit2, 1); + + printk("\nother info that might help us debug this:\n\n"); + lockdep_print_held_locks(curr); + + printk("\nthe %s-irq-safe lock's dependencies:\n", irqclass); + print_lock_dependencies(backwards_match, 0); + + printk("\nthe %s-irq-unsafe lock's dependencies:\n", irqclass); + print_lock_dependencies(forwards_match, 0); + + printk("\nstack backtrace:\n"); + dump_stack(); + + return 0; +} + +static int +check_usage(struct task_struct *curr, struct held_lock *prev, + struct held_lock *next, enum lock_usage_bit bit_backwards, + enum lock_usage_bit bit_forwards, const char *irqclass) +{ + int ret; + + find_usage_bit = bit_backwards; + /* fills in <backwards_match> */ + ret = find_usage_backwards(prev->class, 0); + if (!ret || ret == 1) + return ret; + + find_usage_bit = bit_forwards; + ret = find_usage_forwards(next->class, 0); + if (!ret || ret == 1) + return ret; + /* ret == 2 */ + return print_bad_irq_dependency(curr, prev, next, + bit_backwards, bit_forwards, irqclass); +} + +#endif + +static int +print_deadlock_bug(struct task_struct *curr, struct held_lock *prev, + struct held_lock *next) +{ + debug_locks_off(); + __raw_spin_unlock(&hash_lock); + if (debug_locks_silent) + return 0; + + printk("\n=============================================\n"); + printk( "[ INFO: possible recursive locking detected ]\n"); + printk( "---------------------------------------------\n"); + printk("%s/%d is trying to acquire lock:\n", + curr->comm, curr->pid); + print_lock(next); + printk("\nbut task is already holding lock:\n"); + print_lock(prev); + + printk("\nother info that might help us debug this:\n"); + lockdep_print_held_locks(curr); + + printk("\nstack backtrace:\n"); + dump_stack(); + + return 0; +} + +/* + * Check whether we are holding such a class already. + * + * (Note that this has to be done separately, because the graph cannot + * detect such classes of deadlocks.) + * + * Returns: 0 on deadlock detected, 1 on OK, 2 on recursive read + */ +static int +check_deadlock(struct task_struct *curr, struct held_lock *next, + struct lockdep_map *next_instance, int read) +{ + struct held_lock *prev; + int i; + + for (i = 0; i < curr->lockdep_depth; i++) { + prev = curr->held_locks + i; + if (prev->class != next->class) + continue; + /* + * Allow read-after-read recursion of the same + * lock class (i.e. read_lock(lock)+read_lock(lock)): + */ + if ((read == 2) && prev->read) + return 2; + return print_deadlock_bug(curr, prev, next); + } + return 1; +} + +/* + * There was a chain-cache miss, and we are about to add a new dependency + * to a previous lock. We recursively validate the following rules: + * + * - would the adding of the <prev> -> <next> dependency create a + * circular dependency in the graph? [== circular deadlock] + * + * - does the new prev->next dependency connect any hardirq-safe lock + * (in the full backwards-subgraph starting at <prev>) with any + * hardirq-unsafe lock (in the full forwards-subgraph starting at + * <next>)? [== illegal lock inversion with hardirq contexts] + * + * - does the new prev->next dependency connect any softirq-safe lock + * (in the full backwards-subgraph starting at <prev>) with any + * softirq-unsafe lock (in the full forwards-subgraph starting at + * <next>)? [== illegal lock inversion with softirq contexts] + * + * any of these scenarios could lead to a deadlock. + * + * Then if all the validations pass, we add the forwards and backwards + * dependency. + */ +static int +check_prev_add(struct task_struct *curr, struct held_lock *prev, + struct held_lock *next) +{ + struct lock_list *entry; + int ret; + + /* + * Prove that the new <prev> -> <next> dependency would not + * create a circular dependency in the graph. (We do this by + * forward-recursing into the graph starting at <next>, and + * checking whether we can reach <prev>.) + * + * We are using global variables to control the recursion, to + * keep the stackframe size of the recursive functions low: + */ + check_source = next; + check_target = prev; + if (!(check_noncircular(next->class, 0))) + return print_circular_bug_tail(); + +#ifdef CONFIG_TRACE_IRQFLAGS + /* + * Prove that the new dependency does not connect a hardirq-safe + * lock with a hardirq-unsafe lock - to achieve this we search + * the backwards-subgraph starting at <prev>, and the + * forwards-subgraph starting at <next>: + */ + if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ, + LOCK_ENABLED_HARDIRQS, "hard")) + return 0; + + /* + * Prove that the new dependency does not connect a hardirq-safe-read + * lock with a hardirq-unsafe lock - to achieve this we search + * the backwards-subgraph starting at <prev>, and the + * forwards-subgraph starting at <next>: + */ + if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ_READ, + LOCK_ENABLED_HARDIRQS, "hard-read")) + return 0; + + /* + * Prove that the new dependency does not connect a softirq-safe + * lock with a softirq-unsafe lock - to achieve this we search + * the backwards-subgraph starting at <prev>, and the + * forwards-subgraph starting at <next>: + */ + if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ, + LOCK_ENABLED_SOFTIRQS, "soft")) + return 0; + /* + * Prove that the new dependency does not connect a softirq-safe-read + * lock with a softirq-unsafe lock - to achieve this we search + * the backwards-subgraph starting at <prev>, and the + * forwards-subgraph starting at <next>: + */ + if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ_READ, + LOCK_ENABLED_SOFTIRQS, "soft")) + return 0; +#endif + /* + * For recursive read-locks we do all the dependency checks, + * but we dont store read-triggered dependencies (only + * write-triggered dependencies). This ensures that only the + * write-side dependencies matter, and that if for example a + * write-lock never takes any other locks, then the reads are + * equivalent to a NOP. + */ + if (next->read == 2 || prev->read == 2) + return 1; + /* + * Is the <prev> -> <next> dependency already present? + * + * (this may occur even though this is a new chain: consider + * e.g. the L1 -> L2 -> L3 -> L4 and the L5 -> L1 -> L2 -> L3 + * chains - the second one will be new, but L1 already has + * L2 added to its dependency list, due to the first chain.) + */ + list_for_each_entry(entry, &prev->class->locks_after, entry) { + if (entry->class == next->class) + return 2; + } + + /* + * Ok, all validations passed, add the new lock + * to the previous lock's dependency list: + */ + ret = add_lock_to_list(prev->class, next->class, + &prev->class->locks_after, next->acquire_ip); + if (!ret) + return 0; + /* + * Return value of 2 signals 'dependency already added', + * in that case we dont have to add the backlink either. + */ + if (ret == 2) + return 2; + ret = add_lock_to_list(next->class, prev->class, + &next->class->locks_before, next->acquire_ip); + + /* + * Debugging printouts: + */ + if (verbose(prev->class) || verbose(next->class)) { + __raw_spin_unlock(&hash_lock); + printk("\n new dependency: "); + print_lock_name(prev->class); + printk(" => "); + print_lock_name(next->class); + printk("\n"); + dump_stack(); + __raw_spin_lock(&hash_lock); + } + return 1; +} + +/* + * Add the dependency to all directly-previous locks that are 'relevant'. + * The ones that are relevant are (in increasing distance from curr): + * all consecutive trylock entries and the final non-trylock entry - or + * the end of this context's lock-chain - whichever comes first. + */ +static int +check_prevs_add(struct task_struct *curr, struct held_lock *next) +{ + int depth = curr->lockdep_depth; + struct held_lock *hlock; + + /* + * Debugging checks. + * + * Depth must not be zero for a non-head lock: + */ + if (!depth) + goto out_bug; + /* + * At least two relevant locks must exist for this + * to be a head: + */ + if (curr->held_locks[depth].irq_context != + curr->held_locks[depth-1].irq_context) + goto out_bug; + + for (;;) { + hlock = curr->held_locks + depth-1; + /* + * Only non-recursive-read entries get new dependencies + * added: + */ + if (hlock->read != 2) { + check_prev_add(curr, hlock, next); + /* + * Stop after the first non-trylock entry, + * as non-trylock entries have added their + * own direct dependencies already, so this + * lock is connected to them indirectly: + */ + if (!hlock->trylock) + break; + } + depth--; + /* + * End of lock-stack? + */ + if (!depth) + break; + /* + * Stop the search if we cross into another context: + */ + if (curr->held_locks[depth].irq_context != + curr->held_locks[depth-1].irq_context) + break; + } + return 1; +out_bug: + __raw_spin_unlock(&hash_lock); + DEBUG_LOCKS_WARN_ON(1); + + return 0; +} + + +/* + * Is this the address of a static object: + */ +static int static_obj(void *obj) +{ + unsigned long start = (unsigned long) &_stext, + end = (unsigned long) &_end, + addr = (unsigned long) obj; +#ifdef CONFIG_SMP + int i; +#endif + + /* + * static variable? + */ + if ((addr >= start) && (addr < end)) + return 1; + +#ifdef CONFIG_SMP + /* + * percpu var? + */ + for_each_possible_cpu(i) { + start = (unsigned long) &__per_cpu_start + per_cpu_offset(i); + end = (unsigned long) &__per_cpu_end + per_cpu_offset(i); + + if ((addr >= start) && (addr < end)) + return 1; + } +#endif + + /* + * module var? + */ + return is_module_address(addr); +} + +/* + * To make lock name printouts unique, we calculate a unique + * class->name_version generation counter: + */ +static int count_matching_names(struct lock_class *new_class) +{ + struct lock_class *class; + int count = 0; + + if (!new_class->name) + return 0; + + list_for_each_entry(class, &all_lock_classes, lock_entry) { + if (new_class->key - new_class->subclass == class->key) + return class->name_version; + if (class->name && !strcmp(class->name, new_class->name)) + count = max(count, class->name_version); + } + + return count + 1; +} + +extern void __error_too_big_MAX_LOCKDEP_SUBCLASSES(void); + +/* + * Register a lock's class in the hash-table, if the class is not present + * yet. Otherwise we look it up. We cache the result in the lock object + * itself, so actual lookup of the hash should be once per lock object. + */ +static inline struct lock_class * +register_lock_class(struct lockdep_map *lock, unsigned int subclass) +{ + struct lockdep_subclass_key *key; + struct list_head *hash_head; + struct lock_class *class; + +#ifdef CONFIG_DEBUG_LOCKDEP + /* + * If the architecture calls into lockdep before initializing + * the hashes then we'll warn about it later. (we cannot printk + * right now) + */ + if (unlikely(!lockdep_initialized)) { + lockdep_init(); + lockdep_init_error = 1; + } +#endif + + /* + * Static locks do not have their class-keys yet - for them the key + * is the lock object itself: + */ + if (unlikely(!lock->key)) + lock->key = (void *)lock; + + /* + * NOTE: the class-key must be unique. For dynamic locks, a static + * lock_class_key variable is passed in through the mutex_init() + * (or spin_lock_init()) call - which acts as the key. For static + * locks we use the lock object itself as the key. + */ + if (sizeof(struct lock_class_key) > sizeof(struct lock_class)) + __error_too_big_MAX_LOCKDEP_SUBCLASSES(); + + key = lock->key->subkeys + subclass; + + hash_head = classhashentry(key); + + /* + * We can walk the hash lockfree, because the hash only + * grows, and we are careful when adding entries to the end: + */ + list_for_each_entry(class, hash_head, hash_entry) + if (class->key == key) + goto out_set; + + /* + * Debug-check: all keys must be persistent! + */ + if (!static_obj(lock->key)) { + debug_locks_off(); + printk("INFO: trying to register non-static key.\n"); + printk("the code is fine but needs lockdep annotation.\n"); + printk("turning off the locking correctness validator.\n"); + dump_stack(); + + return NULL; + } + + __raw_spin_lock(&hash_lock); + /* + * We have to do the hash-walk again, to avoid races + * with another CPU: + */ + list_for_each_entry(class, hash_head, hash_entry) + if (class->key == key) + goto out_unlock_set; + /* + * Allocate a new key from the static array, and add it to + * the hash: + */ + if (nr_lock_classes >= MAX_LOCKDEP_KEYS) { + __raw_spin_unlock(&hash_lock); + debug_locks_off(); + printk("BUG: MAX_LOCKDEP_KEYS too low!\n"); + printk("turning off the locking correctness validator.\n"); + return NULL; + } + class = lock_classes + nr_lock_classes++; + debug_atomic_inc(&nr_unused_locks); + class->key = key; + class->name = lock->name; + class->subclass = subclass; + INIT_LIST_HEAD(&class->lock_entry); + INIT_LIST_HEAD(&class->locks_before); + INIT_LIST_HEAD(&class->locks_after); + class->name_version = count_matching_names(class); + /* + * We use RCU's safe list-add method to make + * parallel walking of the hash-list safe: + */ + list_add_tail_rcu(&class->hash_entry, hash_head); + + if (verbose(class)) { + __raw_spin_unlock(&hash_lock); + printk("\nnew class %p: %s", class->key, class->name); + if (class->name_version > 1) + printk("#%d", class->name_version); + printk("\n"); + dump_stack(); + __raw_spin_lock(&hash_lock); + } +out_unlock_set: + __raw_spin_unlock(&hash_lock); + +out_set: + lock->class[subclass] = class; + + DEBUG_LOCKS_WARN_ON(class->subclass != subclass); + + return class; +} + +/* + * Look up a dependency chain. If the key is not present yet then + * add it and return 0 - in this case the new dependency chain is + * validated. If the key is already hashed, return 1. + */ +static inline int lookup_chain_cache(u64 chain_key) +{ + struct list_head *hash_head = chainhashentry(chain_key); + struct lock_chain *chain; + + DEBUG_LOCKS_WARN_ON(!irqs_disabled()); + /* + * We can walk it lock-free, because entries only get added + * to the hash: + */ + list_for_each_entry(chain, hash_head, entry) { + if (chain->chain_key == chain_key) { +cache_hit: + debug_atomic_inc(&chain_lookup_hits); + /* + * In the debugging case, force redundant checking + * by returning 1: + */ +#ifdef CONFIG_DEBUG_LOCKDEP + __raw_spin_lock(&hash_lock); + return 1; +#endif + return 0; + } + } + /* + * Allocate a new chain entry from the static array, and add + * it to the hash: + */ + __raw_spin_lock(&hash_lock); + /* + * We have to walk the chain again locked - to avoid duplicates: + */ + list_for_each_entry(chain, hash_head, entry) { + if (chain->chain_key == chain_key) { + __raw_spin_unlock(&hash_lock); + goto cache_hit; + } + } + if (unlikely(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) { + __raw_spin_unlock(&hash_lock); + debug_locks_off(); + printk("BUG: MAX_LOCKDEP_CHAINS too low!\n"); + printk("turning off the locking correctness validator.\n"); + return 0; + } + chain = lock_chains + nr_lock_chains++; + chain->chain_key = chain_key; + list_add_tail_rcu(&chain->entry, hash_head); + debug_atomic_inc(&chain_lookup_misses); +#ifdef CONFIG_TRACE_IRQFLAGS + if (current->hardirq_context) + nr_hardirq_chains++; + else { + if (current->softirq_context) + nr_softirq_chains++; + else + nr_process_chains++; + } +#else + nr_process_chains++; +#endif + + return 1; +} + +/* + * We are building curr_chain_key incrementally, so double-check + * it from scratch, to make sure that it's done correctly: + */ +static void check_chain_key(struct task_struct *curr) +{ +#ifdef CONFIG_DEBUG_LOCKDEP + struct held_lock *hlock, *prev_hlock = NULL; + unsigned int i, id; + u64 chain_key = 0; + + for (i = 0; i < curr->lockdep_depth; i++) { + hlock = curr->held_locks + i; + if (chain_key != hlock->prev_chain_key) { + debug_locks_off(); + printk("hm#1, depth: %u [%u], %016Lx != %016Lx\n", + curr->lockdep_depth, i, + (unsigned long long)chain_key, + (unsigned long long)hlock->prev_chain_key); + WARN_ON(1); + return; + } + id = hlock->class - lock_classes; + DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS); + if (prev_hlock && (prev_hlock->irq_context != + hlock->irq_context)) + chain_key = 0; + chain_key = iterate_chain_key(chain_key, id); + prev_hlock = hlock; + } + if (chain_key != curr->curr_chain_key) { + debug_locks_off(); + printk("hm#2, depth: %u [%u], %016Lx != %016Lx\n", + curr->lockdep_depth, i, + (unsigned long long)chain_key, + (unsigned long long)curr->curr_chain_key); + WARN_ON(1); + } +#endif +} + +#ifdef CONFIG_TRACE_IRQFLAGS + +/* + * print irq inversion bug: + */ +static int +print_irq_inversion_bug(struct task_struct *curr, struct lock_class *other, + struct held_lock *this, int forwards, + const char *irqclass) +{ + __raw_spin_unlock(&hash_lock); + debug_locks_off(); + if (debug_locks_silent) + return 0; + + printk("\n=========================================================\n"); + printk( "[ INFO: possible irq lock inversion dependency detected ]\n"); + printk( "---------------------------------------------------------\n"); + printk("%s/%d just changed the state of lock:\n", + curr->comm, curr->pid); + print_lock(this); + if (forwards) + printk("but this lock took another, %s-irq-unsafe lock in the past:\n", irqclass); + else + printk("but this lock was taken by another, %s-irq-safe lock in the past:\n", irqclass); + print_lock_name(other); + printk("\n\nand interrupts could create inverse lock ordering between them.\n\n"); + + printk("\nother info that might help us debug this:\n"); + lockdep_print_held_locks(curr); + + printk("\nthe first lock's dependencies:\n"); + print_lock_dependencies(this->class, 0); + + printk("\nthe second lock's dependencies:\n"); + print_lock_dependencies(other, 0); + + printk("\nstack backtrace:\n"); + dump_stack(); + + return 0; +} + +/* + * Prove that in the forwards-direction subgraph starting at <this> + * there is no lock matching <mask>: + */ +static int +check_usage_forwards(struct task_struct *curr, struct held_lock *this, + enum lock_usage_bit bit, const char *irqclass) +{ + int ret; + + find_usage_bit = bit; + /* fills in <forwards_match> */ + ret = find_usage_forwards(this->class, 0); + if (!ret || ret == 1) + return ret; + + return print_irq_inversion_bug(curr, forwards_match, this, 1, irqclass); +} + +/* + * Prove that in the backwards-direction subgraph starting at <this> + * there is no lock matching <mask>: + */ +static int +check_usage_backwards(struct task_struct *curr, struct held_lock *this, + enum lock_usage_bit bit, const char *irqclass) +{ + int ret; + + find_usage_bit = bit; + /* fills in <backwards_match> */ + ret = find_usage_backwards(this->class, 0); + if (!ret || ret == 1) + return ret; + + return print_irq_inversion_bug(curr, backwards_match, this, 0, irqclass); +} + +static inline void print_irqtrace_events(struct task_struct *curr) +{ + printk("irq event stamp: %u\n", curr->irq_events); + printk("hardirqs last enabled at (%u): ", curr->hardirq_enable_event); + print_ip_sym(curr->hardirq_enable_ip); + printk("hardirqs last disabled at (%u): ", curr->hardirq_disable_event); + print_ip_sym(curr->hardirq_disable_ip); + printk("softirqs last enabled at (%u): ", curr->softirq_enable_event); + print_ip_sym(curr->softirq_enable_ip); + printk("softirqs last disabled at (%u): ", curr->softirq_disable_event); + print_ip_sym(curr->softirq_disable_ip); +} + +#else +static inline void print_irqtrace_events(struct task_struct *curr) +{ +} +#endif + +static int +print_usage_bug(struct task_struct *curr, struct held_lock *this, + enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit) +{ + __raw_spin_unlock(&hash_lock); + debug_locks_off(); + if (debug_locks_silent) + return 0; + + printk("\n=================================\n"); + printk( "[ INFO: inconsistent lock state ]\n"); + printk( "---------------------------------\n"); + + printk("inconsistent {%s} -> {%s} usage.\n", + usage_str[prev_bit], usage_str[new_bit]); + + printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n", + curr->comm, curr->pid, + trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT, + trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT, + trace_hardirqs_enabled(curr), + trace_softirqs_enabled(curr)); + print_lock(this); + + printk("{%s} state was registered at:\n", usage_str[prev_bit]); + print_stack_trace(this->class->usage_traces + prev_bit, 1); + + print_irqtrace_events(curr); + printk("\nother info that might help us debug this:\n"); + lockdep_print_held_locks(curr); + + printk("\nstack backtrace:\n"); + dump_stack(); + + return 0; +} + +/* + * Print out an error if an invalid bit is set: + */ +static inline int +valid_state(struct task_struct *curr, struct held_lock *this, + enum lock_usage_bit new_bit, enum lock_usage_bit bad_bit) +{ + if (unlikely(this->class->usage_mask & (1 << bad_bit))) + return print_usage_bug(curr, this, bad_bit, new_bit); + return 1; +} + +#define STRICT_READ_CHECKS 1 + +/* + * Mark a lock with a usage bit, and validate the state transition: + */ +static int mark_lock(struct task_struct *curr, struct held_lock *this, + enum lock_usage_bit new_bit, unsigned long ip) +{ + unsigned int new_mask = 1 << new_bit, ret = 1; + + /* + * If already set then do not dirty the cacheline, + * nor do any checks: + */ + if (likely(this->class->usage_mask & new_mask)) + return 1; + + __raw_spin_lock(&hash_lock); + /* + * Make sure we didnt race: + */ + if (unlikely(this->class->usage_mask & new_mask)) { + __raw_spin_unlock(&hash_lock); + return 1; + } + + this->class->usage_mask |= new_mask; + +#ifdef CONFIG_TRACE_IRQFLAGS + if (new_bit == LOCK_ENABLED_HARDIRQS || + new_bit == LOCK_ENABLED_HARDIRQS_READ) + ip = curr->hardirq_enable_ip; + else if (new_bit == LOCK_ENABLED_SOFTIRQS || + new_bit == LOCK_ENABLED_SOFTIRQS_READ) + ip = curr->softirq_enable_ip; +#endif + if (!save_trace(this->class->usage_traces + new_bit)) + return 0; + + switch (new_bit) { +#ifdef CONFIG_TRACE_IRQFLAGS + case LOCK_USED_IN_HARDIRQ: + if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS)) + return 0; + if (!valid_state(curr, this, new_bit, + LOCK_ENABLED_HARDIRQS_READ)) + return 0; + /* + * just marked it hardirq-safe, check that this lock + * took no hardirq-unsafe lock in the past: + */ + if (!check_usage_forwards(curr, this, + LOCK_ENABLED_HARDIRQS, "hard")) + return 0; +#if STRICT_READ_CHECKS + /* + * just marked it hardirq-safe, check that this lock + * took no hardirq-unsafe-read lock in the past: + */ + if (!check_usage_forwards(curr, this, + LOCK_ENABLED_HARDIRQS_READ, "hard-read")) + return 0; +#endif + if (hardirq_verbose(this->class)) + ret = 2; + break; + case LOCK_USED_IN_SOFTIRQ: + if (!valid_state(curr, this, new_bit, LOCK_ENABLED_SOFTIRQS)) + return 0; + if (!valid_state(curr, this, new_bit, + LOCK_ENABLED_SOFTIRQS_READ)) + return 0; + /* + * just marked it softirq-safe, check that this lock + * took no softirq-unsafe lock in the past: + */ + if (!check_usage_forwards(curr, this, + LOCK_ENABLED_SOFTIRQS, "soft")) + return 0; +#if STRICT_READ_CHECKS + /* + * just marked it softirq-safe, check that this lock + * took no softirq-unsafe-read lock in the past: + */ + if (!check_usage_forwards(curr, this, + LOCK_ENABLED_SOFTIRQS_READ, "soft-read")) + return 0; +#endif + if (softirq_verbose(this->class)) + ret = 2; + break; + case LOCK_USED_IN_HARDIRQ_READ: + if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS)) + return 0; + /* + * just marked it hardirq-read-safe, check that this lock + * took no hardirq-unsafe lock in the past: + */ + if (!check_usage_forwards(curr, this, + LOCK_ENABLED_HARDIRQS, "hard")) + return 0; + if (hardirq_verbose(this->class)) + ret = 2; + break; + case LOCK_USED_IN_SOFTIRQ_READ: + if (!valid_state(curr, this, new_bit, LOCK_ENABLED_SOFTIRQS)) + return 0; + /* + * just marked it softirq-read-safe, check that this lock + * took no softirq-unsafe lock in the past: + */ + if (!check_usage_forwards(curr, this, + LOCK_ENABLED_SOFTIRQS, "soft")) + return 0; + if (softirq_verbose(this->class)) + ret = 2; + break; + case LOCK_ENABLED_HARDIRQS: + if (!valid_state(curr, this, new_bit, LOCK_USED_IN_HARDIRQ)) + return 0; + if (!valid_state(curr, this, new_bit, + LOCK_USED_IN_HARDIRQ_READ)) + return 0; + /* + * just marked it hardirq-unsafe, check that no hardirq-safe + * lock in the system ever took it in the past: + */ + if (!check_usage_backwards(curr, this, + LOCK_USED_IN_HARDIRQ, "hard")) + return 0; +#if STRICT_READ_CHECKS + /* + * just marked it hardirq-unsafe, check that no + * hardirq-safe-read lock in the system ever took + * it in the past: + */ + if (!check_usage_backwards(curr, this, + LOCK_USED_IN_HARDIRQ_READ, "hard-read")) + return 0; +#endif + if (hardirq_verbose(this->class)) + ret = 2; + break; + case LOCK_ENABLED_SOFTIRQS: + if (!valid_state(curr, this, new_bit, LOCK_USED_IN_SOFTIRQ)) + return 0; + if (!valid_state(curr, this, new_bit, + LOCK_USED_IN_SOFTIRQ_READ)) + return 0; + /* + * just marked it softirq-unsafe, check that no softirq-safe + * lock in the system ever took it in the past: + */ + if (!check_usage_backwards(curr, this, + LOCK_USED_IN_SOFTIRQ, "soft")) + return 0; +#if STRICT_READ_CHECKS + /* + * just marked it softirq-unsafe, check that no + * softirq-safe-read lock in the system ever took + * it in the past: + */ + if (!check_usage_backwards(curr, this, + LOCK_USED_IN_SOFTIRQ_READ, "soft-read")) + return 0; +#endif + if (softirq_verbose(this->class)) + ret = 2; + break; + case LOCK_ENABLED_HARDIRQS_READ: + if (!valid_state(curr, this, new_bit, LOCK_USED_IN_HARDIRQ)) + return 0; +#if STRICT_READ_CHECKS + /* + * just marked it hardirq-read-unsafe, check that no + * hardirq-safe lock in the system ever took it in the past: + */ + if (!check_usage_backwards(curr, this, + LOCK_USED_IN_HARDIRQ, "hard")) + return 0; +#endif + if (hardirq_verbose(this->class)) + ret = 2; + break; + case LOCK_ENABLED_SOFTIRQS_READ: + if (!valid_state(curr, this, new_bit, LOCK_USED_IN_SOFTIRQ)) + return 0; +#if STRICT_READ_CHECKS + /* + * just marked it softirq-read-unsafe, check that no + * softirq-safe lock in the system ever took it in the past: + */ + if (!check_usage_backwards(curr, this, + LOCK_USED_IN_SOFTIRQ, "soft")) + return 0; +#endif + if (softirq_verbose(this->class)) + ret = 2; + break; +#endif + case LOCK_USED: + /* + * Add it to the global list of classes: + */ + list_add_tail_rcu(&this->class->lock_entry, &all_lock_classes); + debug_atomic_dec(&nr_unused_locks); + break; + default: + debug_locks_off(); + WARN_ON(1); + return 0; + } + + __raw_spin_unlock(&hash_lock); + + /* + * We must printk outside of the hash_lock: + */ + if (ret == 2) { + printk("\nmarked lock as {%s}:\n", usage_str[new_bit]); + print_lock(this); + print_irqtrace_events(curr); + dump_stack(); + } + + return ret; +} + +#ifdef CONFIG_TRACE_IRQFLAGS +/* + * Mark all held locks with a usage bit: + */ +static int +mark_held_locks(struct task_struct *curr, int hardirq, unsigned long ip) +{ + enum lock_usage_bit usage_bit; + struct held_lock *hlock; + int i; + + for (i = 0; i < curr->lockdep_depth; i++) { + hlock = curr->held_locks + i; + + if (hardirq) { + if (hlock->read) + usage_bit = LOCK_ENABLED_HARDIRQS_READ; + else + usage_bit = LOCK_ENABLED_HARDIRQS; + } else { + if (hlock->read) + usage_bit = LOCK_ENABLED_SOFTIRQS_READ; + else + usage_bit = LOCK_ENABLED_SOFTIRQS; + } + if (!mark_lock(curr, hlock, usage_bit, ip)) + return 0; + } + + return 1; +} + +/* + * Debugging helper: via this flag we know that we are in + * 'early bootup code', and will warn about any invalid irqs-on event: + */ +static int early_boot_irqs_enabled; + +void early_boot_irqs_off(void) +{ + early_boot_irqs_enabled = 0; +} + +void early_boot_irqs_on(void) +{ + early_boot_irqs_enabled = 1; +} + +/* + * Hardirqs will be enabled: + */ +void trace_hardirqs_on(void) +{ + struct task_struct *curr = current; + unsigned long ip; + + if (unlikely(!debug_locks || current->lockdep_recursion)) + return; + + if (DEBUG_LOCKS_WARN_ON(unlikely(!early_boot_irqs_enabled))) + return; + + if (unlikely(curr->hardirqs_enabled)) { + debug_atomic_inc(&redundant_hardirqs_on); + return; + } + /* we'll do an OFF -> ON transition: */ + curr->hardirqs_enabled = 1; + ip = (unsigned long) __builtin_return_address(0); + + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + return; + if (DEBUG_LOCKS_WARN_ON(current->hardirq_context)) + return; + /* + * We are going to turn hardirqs on, so set the + * usage bit for all held locks: + */ + if (!mark_held_locks(curr, 1, ip)) + return; + /* + * If we have softirqs enabled, then set the usage + * bit for all held locks. (disabled hardirqs prevented + * this bit from being set before) + */ + if (curr->softirqs_enabled) + if (!mark_held_locks(curr, 0, ip)) + return; + + curr->hardirq_enable_ip = ip; + curr->hardirq_enable_event = ++curr->irq_events; + debug_atomic_inc(&hardirqs_on_events); +} + +EXPORT_SYMBOL(trace_hardirqs_on); + +/* + * Hardirqs were disabled: + */ +void trace_hardirqs_off(void) +{ + struct task_struct *curr = current; + + if (unlikely(!debug_locks || current->lockdep_recursion)) + return; + + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + return; + + if (curr->hardirqs_enabled) { + /* + * We have done an ON -> OFF transition: + */ + curr->hardirqs_enabled = 0; + curr->hardirq_disable_ip = _RET_IP_; + curr->hardirq_disable_event = ++curr->irq_events; + debug_atomic_inc(&hardirqs_off_events); + } else + debug_atomic_inc(&redundant_hardirqs_off); +} + +EXPORT_SYMBOL(trace_hardirqs_off); + +/* + * Softirqs will be enabled: + */ +void trace_softirqs_on(unsigned long ip) +{ + struct task_struct *curr = current; + + if (unlikely(!debug_locks)) + return; + + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + return; + + if (curr->softirqs_enabled) { + debug_atomic_inc(&redundant_softirqs_on); + return; + } + + /* + * We'll do an OFF -> ON transition: + */ + curr->softirqs_enabled = 1; + curr->softirq_enable_ip = ip; + curr->softirq_enable_event = ++curr->irq_events; + debug_atomic_inc(&softirqs_on_events); + /* + * We are going to turn softirqs on, so set the + * usage bit for all held locks, if hardirqs are + * enabled too: + */ + if (curr->hardirqs_enabled) + mark_held_locks(curr, 0, ip); +} + +/* + * Softirqs were disabled: + */ +void trace_softirqs_off(unsigned long ip) +{ + struct task_struct *curr = current; + + if (unlikely(!debug_locks)) + return; + + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + return; + + if (curr->softirqs_enabled) { + /* + * We have done an ON -> OFF transition: + */ + curr->softirqs_enabled = 0; + curr->softirq_disable_ip = ip; + curr->softirq_disable_event = ++curr->irq_events; + debug_atomic_inc(&softirqs_off_events); + DEBUG_LOCKS_WARN_ON(!softirq_count()); + } else + debug_atomic_inc(&redundant_softirqs_off); +} + +#endif + +/* + * Initialize a lock instance's lock-class mapping info: + */ +void lockdep_init_map(struct lockdep_map *lock, const char *name, + struct lock_class_key *key) +{ + if (unlikely(!debug_locks)) + return; + + if (DEBUG_LOCKS_WARN_ON(!key)) + return; + if (DEBUG_LOCKS_WARN_ON(!name)) + return; + /* + * Sanity check, the lock-class key must be persistent: + */ + if (!static_obj(key)) { + printk("BUG: key %p not in .data!\n", key); + DEBUG_LOCKS_WARN_ON(1); + return; + } + lock->name = name; + lock->key = key; + memset(lock->class, 0, sizeof(lock->class[0])*MAX_LOCKDEP_SUBCLASSES); +} + +EXPORT_SYMBOL_GPL(lockdep_init_map); + +/* + * This gets called for every mutex_lock*()/spin_lock*() operation. + * We maintain the dependency maps and validate the locking attempt: + */ +static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, + int trylock, int read, int check, int hardirqs_off, + unsigned long ip) +{ + struct task_struct *curr = current; + struct held_lock *hlock; + struct lock_class *class; + unsigned int depth, id; + int chain_head = 0; + u64 chain_key; + + if (unlikely(!debug_locks)) + return 0; + + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + return 0; + + if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) { + debug_locks_off(); + printk("BUG: MAX_LOCKDEP_SUBCLASSES too low!\n"); + printk("turning off the locking correctness validator.\n"); + return 0; + } + + class = lock->class[subclass]; + /* not cached yet? */ + if (unlikely(!class)) { + class = register_lock_class(lock, subclass); + if (!class) + return 0; + } + debug_atomic_inc((atomic_t *)&class->ops); + if (very_verbose(class)) { + printk("\nacquire class [%p] %s", class->key, class->name); + if (class->name_version > 1) + printk("#%d", class->name_version); + printk("\n"); + dump_stack(); + } + + /* + * Add the lock to the list of currently held locks. + * (we dont increase the depth just yet, up until the + * dependency checks are done) + */ + depth = curr->lockdep_depth; + if (DEBUG_LOCKS_WARN_ON(depth >= MAX_LOCK_DEPTH)) + return 0; + + hlock = curr->held_locks + depth; + + hlock->class = class; + hlock->acquire_ip = ip; + hlock->instance = lock; + hlock->trylock = trylock; + hlock->read = read; + hlock->check = check; + hlock->hardirqs_off = hardirqs_off; + + if (check != 2) + goto out_calc_hash; +#ifdef CONFIG_TRACE_IRQFLAGS + /* + * If non-trylock use in a hardirq or softirq context, then + * mark the lock as used in these contexts: + */ + if (!trylock) { + if (read) { + if (curr->hardirq_context) + if (!mark_lock(curr, hlock, + LOCK_USED_IN_HARDIRQ_READ, ip)) + return 0; + if (curr->softirq_context) + if (!mark_lock(curr, hlock, + LOCK_USED_IN_SOFTIRQ_READ, ip)) + return 0; + } else { + if (curr->hardirq_context) + if (!mark_lock(curr, hlock, LOCK_USED_IN_HARDIRQ, ip)) + return 0; + if (curr->softirq_context) + if (!mark_lock(curr, hlock, LOCK_USED_IN_SOFTIRQ, ip)) + return 0; + } + } + if (!hardirqs_off) { + if (read) { + if (!mark_lock(curr, hlock, + LOCK_ENABLED_HARDIRQS_READ, ip)) + return 0; + if (curr->softirqs_enabled) + if (!mark_lock(curr, hlock, + LOCK_ENABLED_SOFTIRQS_READ, ip)) + return 0; + } else { + if (!mark_lock(curr, hlock, + LOCK_ENABLED_HARDIRQS, ip)) + return 0; + if (curr->softirqs_enabled) + if (!mark_lock(curr, hlock, + LOCK_ENABLED_SOFTIRQS, ip)) + return 0; + } + } +#endif + /* mark it as used: */ + if (!mark_lock(curr, hlock, LOCK_USED, ip)) + return 0; +out_calc_hash: + /* + * Calculate the chain hash: it's the combined has of all the + * lock keys along the dependency chain. We save the hash value + * at every step so that we can get the current hash easily + * after unlock. The chain hash is then used to cache dependency + * results. + * + * The 'key ID' is what is the most compact key value to drive + * the hash, not class->key. + */ + id = class - lock_classes; + if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS)) + return 0; + + chain_key = curr->curr_chain_key; + if (!depth) { + if (DEBUG_LOCKS_WARN_ON(chain_key != 0)) + return 0; + chain_head = 1; + } + + hlock->prev_chain_key = chain_key; + +#ifdef CONFIG_TRACE_IRQFLAGS + /* + * Keep track of points where we cross into an interrupt context: + */ + hlock->irq_context = 2*(curr->hardirq_context ? 1 : 0) + + curr->softirq_context; + if (depth) { + struct held_lock *prev_hlock; + + prev_hlock = curr->held_locks + depth-1; + /* + * If we cross into another context, reset the + * hash key (this also prevents the checking and the + * adding of the dependency to 'prev'): + */ + if (prev_hlock->irq_context != hlock->irq_context) { + chain_key = 0; + chain_head = 1; + } + } +#endif + chain_key = iterate_chain_key(chain_key, id); + curr->curr_chain_key = chain_key; + + /* + * Trylock needs to maintain the stack of held locks, but it + * does not add new dependencies, because trylock can be done + * in any order. + * + * We look up the chain_key and do the O(N^2) check and update of + * the dependencies only if this is a new dependency chain. + * (If lookup_chain_cache() returns with 1 it acquires + * hash_lock for us) + */ + if (!trylock && (check == 2) && lookup_chain_cache(chain_key)) { + /* + * Check whether last held lock: + * + * - is irq-safe, if this lock is irq-unsafe + * - is softirq-safe, if this lock is hardirq-unsafe + * + * And check whether the new lock's dependency graph + * could lead back to the previous lock. + * + * any of these scenarios could lead to a deadlock. If + * All validations + */ + int ret = check_deadlock(curr, hlock, lock, read); + + if (!ret) + return 0; + /* + * Mark recursive read, as we jump over it when + * building dependencies (just like we jump over + * trylock entries): + */ + if (ret == 2) + hlock->read = 2; + /* + * Add dependency only if this lock is not the head + * of the chain, and if it's not a secondary read-lock: + */ + if (!chain_head && ret != 2) + if (!check_prevs_add(curr, hlock)) + return 0; + __raw_spin_unlock(&hash_lock); + } + curr->lockdep_depth++; + check_chain_key(curr); + if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) { + debug_locks_off(); + printk("BUG: MAX_LOCK_DEPTH too low!\n"); + printk("turning off the locking correctness validator.\n"); + return 0; + } + if (unlikely(curr->lockdep_depth > max_lockdep_depth)) + max_lockdep_depth = curr->lockdep_depth; + + return 1; +} + +static int +print_unlock_inbalance_bug(struct task_struct *curr, struct lockdep_map *lock, + unsigned long ip) +{ + if (!debug_locks_off()) + return 0; + if (debug_locks_silent) + return 0; + + printk("\n=====================================\n"); + printk( "[ BUG: bad unlock balance detected! ]\n"); + printk( "-------------------------------------\n"); + printk("%s/%d is trying to release lock (", + curr->comm, curr->pid); + print_lockdep_cache(lock); + printk(") at:\n"); + print_ip_sym(ip); + printk("but there are no more locks to release!\n"); + printk("\nother info that might help us debug this:\n"); + lockdep_print_held_locks(curr); + + printk("\nstack backtrace:\n"); + dump_stack(); + + return 0; +} + +/* + * Common debugging checks for both nested and non-nested unlock: + */ +static int check_unlock(struct task_struct *curr, struct lockdep_map *lock, + unsigned long ip) +{ + if (unlikely(!debug_locks)) + return 0; + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + return 0; + + if (curr->lockdep_depth <= 0) + return print_unlock_inbalance_bug(curr, lock, ip); + + return 1; +} + +/* + * Remove the lock to the list of currently held locks in a + * potentially non-nested (out of order) manner. This is a + * relatively rare operation, as all the unlock APIs default + * to nested mode (which uses lock_release()): + */ +static int +lock_release_non_nested(struct task_struct *curr, + struct lockdep_map *lock, unsigned long ip) +{ + struct held_lock *hlock, *prev_hlock; + unsigned int depth; + int i; + + /* + * Check whether the lock exists in the current stack + * of held locks: + */ + depth = curr->lockdep_depth; + if (DEBUG_LOCKS_WARN_ON(!depth)) + return 0; + + prev_hlock = NULL; + for (i = depth-1; i >= 0; i--) { + hlock = curr->held_locks + i; + /* + * We must not cross into another context: + */ + if (prev_hlock && prev_hlock->irq_context != hlock->irq_context) + break; + if (hlock->instance == lock) + goto found_it; + prev_hlock = hlock; + } + return print_unlock_inbalance_bug(curr, lock, ip); + +found_it: + /* + * We have the right lock to unlock, 'hlock' points to it. + * Now we remove it from the stack, and add back the other + * entries (if any), recalculating the hash along the way: + */ + curr->lockdep_depth = i; + curr->curr_chain_key = hlock->prev_chain_key; + + for (i++; i < depth; i++) { + hlock = curr->held_locks + i; + if (!__lock_acquire(hlock->instance, + hlock->class->subclass, hlock->trylock, + hlock->read, hlock->check, hlock->hardirqs_off, + hlock->acquire_ip)) + return 0; + } + + if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth - 1)) + return 0; + return 1; +} + +/* + * Remove the lock to the list of currently held locks - this gets + * called on mutex_unlock()/spin_unlock*() (or on a failed + * mutex_lock_interruptible()). This is done for unlocks that nest + * perfectly. (i.e. the current top of the lock-stack is unlocked) + */ +static int lock_release_nested(struct task_struct *curr, + struct lockdep_map *lock, unsigned long ip) +{ + struct held_lock *hlock; + unsigned int depth; + + /* + * Pop off the top of the lock stack: + */ + depth = curr->lockdep_depth - 1; + hlock = curr->held_locks + depth; + + /* + * Is the unlock non-nested: + */ + if (hlock->instance != lock) + return lock_release_non_nested(curr, lock, ip); + curr->lockdep_depth--; + + if (DEBUG_LOCKS_WARN_ON(!depth && (hlock->prev_chain_key != 0))) + return 0; + + curr->curr_chain_key = hlock->prev_chain_key; + +#ifdef CONFIG_DEBUG_LOCKDEP + hlock->prev_chain_key = 0; + hlock->class = NULL; + hlock->acquire_ip = 0; + hlock->irq_context = 0; +#endif + return 1; +} + +/* + * Remove the lock to the list of currently held locks - this gets + * called on mutex_unlock()/spin_unlock*() (or on a failed + * mutex_lock_interruptible()). This is done for unlocks that nest + * perfectly. (i.e. the current top of the lock-stack is unlocked) + */ +static void +__lock_release(struct lockdep_map *lock, int nested, unsigned long ip) +{ + struct task_struct *curr = current; + + if (!check_unlock(curr, lock, ip)) + return; + + if (nested) { + if (!lock_release_nested(curr, lock, ip)) + return; + } else { + if (!lock_release_non_nested(curr, lock, ip)) + return; + } + + check_chain_key(curr); +} + +/* + * Check whether we follow the irq-flags state precisely: + */ +static void check_flags(unsigned long flags) +{ +#if defined(CONFIG_DEBUG_LOCKDEP) && defined(CONFIG_TRACE_IRQFLAGS) + if (!debug_locks) + return; + + if (irqs_disabled_flags(flags)) + DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled); + else + DEBUG_LOCKS_WARN_ON(!current->hardirqs_enabled); + + /* + * We dont accurately track softirq state in e.g. + * hardirq contexts (such as on 4KSTACKS), so only + * check if not in hardirq contexts: + */ + if (!hardirq_count()) { + if (softirq_count()) + DEBUG_LOCKS_WARN_ON(current->softirqs_enabled); + else + DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled); + } + + if (!debug_locks) + print_irqtrace_events(current); +#endif +} + +/* + * We are not always called with irqs disabled - do that here, + * and also avoid lockdep recursion: + */ +void lock_acquire(struct lockdep_map *lock, unsigned int subclass, + int trylock, int read, int check, unsigned long ip) +{ + unsigned long flags; + + if (unlikely(current->lockdep_recursion)) + return; + + raw_local_irq_save(flags); + check_flags(flags); + + current->lockdep_recursion = 1; + __lock_acquire(lock, subclass, trylock, read, check, + irqs_disabled_flags(flags), ip); + current->lockdep_recursion = 0; + raw_local_irq_restore(flags); +} + +EXPORT_SYMBOL_GPL(lock_acquire); + +void lock_release(struct lockdep_map *lock, int nested, unsigned long ip) +{ + unsigned long flags; + + if (unlikely(current->lockdep_recursion)) + return; + + raw_local_irq_save(flags); + check_flags(flags); + current->lockdep_recursion = 1; + __lock_release(lock, nested, ip); + current->lockdep_recursion = 0; + raw_local_irq_restore(flags); +} + +EXPORT_SYMBOL_GPL(lock_release); + +/* + * Used by the testsuite, sanitize the validator state + * after a simulated failure: + */ + +void lockdep_reset(void) +{ + unsigned long flags; + + raw_local_irq_save(flags); + current->curr_chain_key = 0; + current->lockdep_depth = 0; + current->lockdep_recursion = 0; + memset(current->held_locks, 0, MAX_LOCK_DEPTH*sizeof(struct held_lock)); + nr_hardirq_chains = 0; + nr_softirq_chains = 0; + nr_process_chains = 0; + debug_locks = 1; + raw_local_irq_restore(flags); +} + +static void zap_class(struct lock_class *class) +{ + int i; + + /* + * Remove all dependencies this lock is + * involved in: + */ + for (i = 0; i < nr_list_entries; i++) { + if (list_entries[i].class == class) + list_del_rcu(&list_entries[i].entry); + } + /* + * Unhash the class and remove it from the all_lock_classes list: + */ + list_del_rcu(&class->hash_entry); + list_del_rcu(&class->lock_entry); + +} + +static inline int within(void *addr, void *start, unsigned long size) +{ + return addr >= start && addr < start + size; +} + +void lockdep_free_key_range(void *start, unsigned long size) +{ + struct lock_class *class, *next; + struct list_head *head; + unsigned long flags; + int i; + + raw_local_irq_save(flags); + __raw_spin_lock(&hash_lock); + + /* + * Unhash all classes that were created by this module: + */ + for (i = 0; i < CLASSHASH_SIZE; i++) { + head = classhash_table + i; + if (list_empty(head)) + continue; + list_for_each_entry_safe(class, next, head, hash_entry) + if (within(class->key, start, size)) + zap_class(class); + } + + __raw_spin_unlock(&hash_lock); + raw_local_irq_restore(flags); +} + +void lockdep_reset_lock(struct lockdep_map *lock) +{ + struct lock_class *class, *next, *entry; + struct list_head *head; + unsigned long flags; + int i, j; + + raw_local_irq_save(flags); + __raw_spin_lock(&hash_lock); + + /* + * Remove all classes this lock has: + */ + for (i = 0; i < CLASSHASH_SIZE; i++) { + head = classhash_table + i; + if (list_empty(head)) + continue; + list_for_each_entry_safe(class, next, head, hash_entry) { + for (j = 0; j < MAX_LOCKDEP_SUBCLASSES; j++) { + entry = lock->class[j]; + if (class == entry) { + zap_class(class); + lock->class[j] = NULL; + break; + } + } + } + } + + /* + * Debug check: in the end all mapped classes should + * be gone. + */ + for (j = 0; j < MAX_LOCKDEP_SUBCLASSES; j++) { + entry = lock->class[j]; + if (!entry) + continue; + __raw_spin_unlock(&hash_lock); + DEBUG_LOCKS_WARN_ON(1); + raw_local_irq_restore(flags); + return; + } + + __raw_spin_unlock(&hash_lock); + raw_local_irq_restore(flags); +} + +void __init lockdep_init(void) +{ + int i; + + /* + * Some architectures have their own start_kernel() + * code which calls lockdep_init(), while we also + * call lockdep_init() from the start_kernel() itself, + * and we want to initialize the hashes only once: + */ + if (lockdep_initialized) + return; + + for (i = 0; i < CLASSHASH_SIZE; i++) + INIT_LIST_HEAD(classhash_table + i); + + for (i = 0; i < CHAINHASH_SIZE; i++) + INIT_LIST_HEAD(chainhash_table + i); + + lockdep_initialized = 1; +} + +void __init lockdep_info(void) +{ + printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n"); + + printk("... MAX_LOCKDEP_SUBCLASSES: %lu\n", MAX_LOCKDEP_SUBCLASSES); + printk("... MAX_LOCK_DEPTH: %lu\n", MAX_LOCK_DEPTH); + printk("... MAX_LOCKDEP_KEYS: %lu\n", MAX_LOCKDEP_KEYS); + printk("... CLASSHASH_SIZE: %lu\n", CLASSHASH_SIZE); + printk("... MAX_LOCKDEP_ENTRIES: %lu\n", MAX_LOCKDEP_ENTRIES); + printk("... MAX_LOCKDEP_CHAINS: %lu\n", MAX_LOCKDEP_CHAINS); + printk("... CHAINHASH_SIZE: %lu\n", CHAINHASH_SIZE); + + printk(" memory used by lock dependency info: %lu kB\n", + (sizeof(struct lock_class) * MAX_LOCKDEP_KEYS + + sizeof(struct list_head) * CLASSHASH_SIZE + + sizeof(struct lock_list) * MAX_LOCKDEP_ENTRIES + + sizeof(struct lock_chain) * MAX_LOCKDEP_CHAINS + + sizeof(struct list_head) * CHAINHASH_SIZE) / 1024); + + printk(" per task-struct memory footprint: %lu bytes\n", + sizeof(struct held_lock) * MAX_LOCK_DEPTH); + +#ifdef CONFIG_DEBUG_LOCKDEP + if (lockdep_init_error) + printk("WARNING: lockdep init error! Arch code didnt call lockdep_init() early enough?\n"); +#endif +} + +static inline int in_range(const void *start, const void *addr, const void *end) +{ + return addr >= start && addr <= end; +} + +static void +print_freed_lock_bug(struct task_struct *curr, const void *mem_from, + const void *mem_to) +{ + if (!debug_locks_off()) + return; + if (debug_locks_silent) + return; + + printk("\n=========================\n"); + printk( "[ BUG: held lock freed! ]\n"); + printk( "-------------------------\n"); + printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n", + curr->comm, curr->pid, mem_from, mem_to-1); + lockdep_print_held_locks(curr); + + printk("\nstack backtrace:\n"); + dump_stack(); +} + +/* + * Called when kernel memory is freed (or unmapped), or if a lock + * is destroyed or reinitialized - this code checks whether there is + * any held lock in the memory range of <from> to <to>: + */ +void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len) +{ + const void *mem_to = mem_from + mem_len, *lock_from, *lock_to; + struct task_struct *curr = current; + struct held_lock *hlock; + unsigned long flags; + int i; + + if (unlikely(!debug_locks)) + return; + + local_irq_save(flags); + for (i = 0; i < curr->lockdep_depth; i++) { + hlock = curr->held_locks + i; + + lock_from = (void *)hlock->instance; + lock_to = (void *)(hlock->instance + 1); + + if (!in_range(mem_from, lock_from, mem_to) && + !in_range(mem_from, lock_to, mem_to)) + continue; + + print_freed_lock_bug(curr, mem_from, mem_to); + break; + } + local_irq_restore(flags); +} + +static void print_held_locks_bug(struct task_struct *curr) +{ + if (!debug_locks_off()) + return; + if (debug_locks_silent) + return; + + printk("\n=====================================\n"); + printk( "[ BUG: lock held at task exit time! ]\n"); + printk( "-------------------------------------\n"); + printk("%s/%d is exiting with locks still held!\n", + curr->comm, curr->pid); + lockdep_print_held_locks(curr); + + printk("\nstack backtrace:\n"); + dump_stack(); +} + +void debug_check_no_locks_held(struct task_struct *task) +{ + if (unlikely(task->lockdep_depth > 0)) + print_held_locks_bug(task); +} + +void debug_show_all_locks(void) +{ + struct task_struct *g, *p; + int count = 10; + int unlock = 1; + + printk("\nShowing all locks held in the system:\n"); + + /* + * Here we try to get the tasklist_lock as hard as possible, + * if not successful after 2 seconds we ignore it (but keep + * trying). This is to enable a debug printout even if a + * tasklist_lock-holding task deadlocks or crashes. + */ +retry: + if (!read_trylock(&tasklist_lock)) { + if (count == 10) + printk("hm, tasklist_lock locked, retrying... "); + if (count) { + count--; + printk(" #%d", 10-count); + mdelay(200); + goto retry; + } + printk(" ignoring it.\n"); + unlock = 0; + } + if (count != 10) + printk(" locked it.\n"); + + do_each_thread(g, p) { + if (p->lockdep_depth) + lockdep_print_held_locks(p); + if (!unlock) + if (read_trylock(&tasklist_lock)) + unlock = 1; + } while_each_thread(g, p); + + printk("\n"); + printk("=============================================\n\n"); + + if (unlock) + read_unlock(&tasklist_lock); +} + +EXPORT_SYMBOL_GPL(debug_show_all_locks); + +void debug_show_held_locks(struct task_struct *task) +{ + lockdep_print_held_locks(task); +} + +EXPORT_SYMBOL_GPL(debug_show_held_locks); + diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h new file mode 100644 index 000000000000..0d355f24fe04 --- /dev/null +++ b/kernel/lockdep_internals.h @@ -0,0 +1,78 @@ +/* + * kernel/lockdep_internals.h + * + * Runtime locking correctness validator + * + * lockdep subsystem internal functions and variables. + */ + +/* + * MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies + * we track. + * + * We use the per-lock dependency maps in two ways: we grow it by adding + * every to-be-taken lock to all currently held lock's own dependency + * table (if it's not there yet), and we check it for lock order + * conflicts and deadlocks. + */ +#define MAX_LOCKDEP_ENTRIES 8192UL + +#define MAX_LOCKDEP_KEYS_BITS 11 +#define MAX_LOCKDEP_KEYS (1UL << MAX_LOCKDEP_KEYS_BITS) + +#define MAX_LOCKDEP_CHAINS_BITS 13 +#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) + +/* + * Stack-trace: tightly packed array of stack backtrace + * addresses. Protected by the hash_lock. + */ +#define MAX_STACK_TRACE_ENTRIES 131072UL + +extern struct list_head all_lock_classes; + +extern void +get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4); + +extern const char * __get_key_name(struct lockdep_subclass_key *key, char *str); + +extern unsigned long nr_lock_classes; +extern unsigned long nr_list_entries; +extern unsigned long nr_lock_chains; +extern unsigned long nr_stack_trace_entries; + +extern unsigned int nr_hardirq_chains; +extern unsigned int nr_softirq_chains; +extern unsigned int nr_process_chains; +extern unsigned int max_lockdep_depth; +extern unsigned int max_recursion_depth; + +#ifdef CONFIG_DEBUG_LOCKDEP +/* + * Various lockdep statistics: + */ +extern atomic_t chain_lookup_hits; +extern atomic_t chain_lookup_misses; +extern atomic_t hardirqs_on_events; +extern atomic_t hardirqs_off_events; +extern atomic_t redundant_hardirqs_on; +extern atomic_t redundant_hardirqs_off; +extern atomic_t softirqs_on_events; +extern atomic_t softirqs_off_events; +extern atomic_t redundant_softirqs_on; +extern atomic_t redundant_softirqs_off; +extern atomic_t nr_unused_locks; +extern atomic_t nr_cyclic_checks; +extern atomic_t nr_cyclic_check_recursions; +extern atomic_t nr_find_usage_forwards_checks; +extern atomic_t nr_find_usage_forwards_recursions; +extern atomic_t nr_find_usage_backwards_checks; +extern atomic_t nr_find_usage_backwards_recursions; +# define debug_atomic_inc(ptr) atomic_inc(ptr) +# define debug_atomic_dec(ptr) atomic_dec(ptr) +# define debug_atomic_read(ptr) atomic_read(ptr) +#else +# define debug_atomic_inc(ptr) do { } while (0) +# define debug_atomic_dec(ptr) do { } while (0) +# define debug_atomic_read(ptr) 0 +#endif diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c new file mode 100644 index 000000000000..f6e72eaab3fa --- /dev/null +++ b/kernel/lockdep_proc.c @@ -0,0 +1,345 @@ +/* + * kernel/lockdep_proc.c + * + * Runtime locking correctness validator + * + * Started by Ingo Molnar: + * + * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + * + * Code for /proc/lockdep and /proc/lockdep_stats: + * + */ +#include <linux/sched.h> +#include <linux/module.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/kallsyms.h> +#include <linux/debug_locks.h> + +#include "lockdep_internals.h" + +static void *l_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct lock_class *class = v; + + (*pos)++; + + if (class->lock_entry.next != &all_lock_classes) + class = list_entry(class->lock_entry.next, struct lock_class, + lock_entry); + else + class = NULL; + m->private = class; + + return class; +} + +static void *l_start(struct seq_file *m, loff_t *pos) +{ + struct lock_class *class = m->private; + + if (&class->lock_entry == all_lock_classes.next) + seq_printf(m, "all lock classes:\n"); + + return class; +} + +static void l_stop(struct seq_file *m, void *v) +{ +} + +static unsigned long count_forward_deps(struct lock_class *class) +{ + struct lock_list *entry; + unsigned long ret = 1; + + /* + * Recurse this class's dependency list: + */ + list_for_each_entry(entry, &class->locks_after, entry) + ret += count_forward_deps(entry->class); + + return ret; +} + +static unsigned long count_backward_deps(struct lock_class *class) +{ + struct lock_list *entry; + unsigned long ret = 1; + + /* + * Recurse this class's dependency list: + */ + list_for_each_entry(entry, &class->locks_before, entry) + ret += count_backward_deps(entry->class); + + return ret; +} + +static int l_show(struct seq_file *m, void *v) +{ + unsigned long nr_forward_deps, nr_backward_deps; + struct lock_class *class = m->private; + char str[128], c1, c2, c3, c4; + const char *name; + + seq_printf(m, "%p", class->key); +#ifdef CONFIG_DEBUG_LOCKDEP + seq_printf(m, " OPS:%8ld", class->ops); +#endif + nr_forward_deps = count_forward_deps(class); + seq_printf(m, " FD:%5ld", nr_forward_deps); + + nr_backward_deps = count_backward_deps(class); + seq_printf(m, " BD:%5ld", nr_backward_deps); + + get_usage_chars(class, &c1, &c2, &c3, &c4); + seq_printf(m, " %c%c%c%c", c1, c2, c3, c4); + + name = class->name; + if (!name) { + name = __get_key_name(class->key, str); + seq_printf(m, ": %s", name); + } else{ + seq_printf(m, ": %s", name); + if (class->name_version > 1) + seq_printf(m, "#%d", class->name_version); + if (class->subclass) + seq_printf(m, "/%d", class->subclass); + } + seq_puts(m, "\n"); + + return 0; +} + +static struct seq_operations lockdep_ops = { + .start = l_start, + .next = l_next, + .stop = l_stop, + .show = l_show, +}; + +static int lockdep_open(struct inode *inode, struct file *file) +{ + int res = seq_open(file, &lockdep_ops); + if (!res) { + struct seq_file *m = file->private_data; + + if (!list_empty(&all_lock_classes)) + m->private = list_entry(all_lock_classes.next, + struct lock_class, lock_entry); + else + m->private = NULL; + } + return res; +} + +static struct file_operations proc_lockdep_operations = { + .open = lockdep_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static void lockdep_stats_debug_show(struct seq_file *m) +{ +#ifdef CONFIG_DEBUG_LOCKDEP + unsigned int hi1 = debug_atomic_read(&hardirqs_on_events), + hi2 = debug_atomic_read(&hardirqs_off_events), + hr1 = debug_atomic_read(&redundant_hardirqs_on), + hr2 = debug_atomic_read(&redundant_hardirqs_off), + si1 = debug_atomic_read(&softirqs_on_events), + si2 = debug_atomic_read(&softirqs_off_events), + sr1 = debug_atomic_read(&redundant_softirqs_on), + sr2 = debug_atomic_read(&redundant_softirqs_off); + + seq_printf(m, " chain lookup misses: %11u\n", + debug_atomic_read(&chain_lookup_misses)); + seq_printf(m, " chain lookup hits: %11u\n", + debug_atomic_read(&chain_lookup_hits)); + seq_printf(m, " cyclic checks: %11u\n", + debug_atomic_read(&nr_cyclic_checks)); + seq_printf(m, " cyclic-check recursions: %11u\n", + debug_atomic_read(&nr_cyclic_check_recursions)); + seq_printf(m, " find-mask forwards checks: %11u\n", + debug_atomic_read(&nr_find_usage_forwards_checks)); + seq_printf(m, " find-mask forwards recursions: %11u\n", + debug_atomic_read(&nr_find_usage_forwards_recursions)); + seq_printf(m, " find-mask backwards checks: %11u\n", + debug_atomic_read(&nr_find_usage_backwards_checks)); + seq_printf(m, " find-mask backwards recursions:%11u\n", + debug_atomic_read(&nr_find_usage_backwards_recursions)); + + seq_printf(m, " hardirq on events: %11u\n", hi1); + seq_printf(m, " hardirq off events: %11u\n", hi2); + seq_printf(m, " redundant hardirq ons: %11u\n", hr1); + seq_printf(m, " redundant hardirq offs: %11u\n", hr2); + seq_printf(m, " softirq on events: %11u\n", si1); + seq_printf(m, " softirq off events: %11u\n", si2); + seq_printf(m, " redundant softirq ons: %11u\n", sr1); + seq_printf(m, " redundant softirq offs: %11u\n", sr2); +#endif +} + +static int lockdep_stats_show(struct seq_file *m, void *v) +{ + struct lock_class *class; + unsigned long nr_unused = 0, nr_uncategorized = 0, + nr_irq_safe = 0, nr_irq_unsafe = 0, + nr_softirq_safe = 0, nr_softirq_unsafe = 0, + nr_hardirq_safe = 0, nr_hardirq_unsafe = 0, + nr_irq_read_safe = 0, nr_irq_read_unsafe = 0, + nr_softirq_read_safe = 0, nr_softirq_read_unsafe = 0, + nr_hardirq_read_safe = 0, nr_hardirq_read_unsafe = 0, + sum_forward_deps = 0, factor = 0; + + list_for_each_entry(class, &all_lock_classes, lock_entry) { + + if (class->usage_mask == 0) + nr_unused++; + if (class->usage_mask == LOCKF_USED) + nr_uncategorized++; + if (class->usage_mask & LOCKF_USED_IN_IRQ) + nr_irq_safe++; + if (class->usage_mask & LOCKF_ENABLED_IRQS) + nr_irq_unsafe++; + if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ) + nr_softirq_safe++; + if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS) + nr_softirq_unsafe++; + if (class->usage_mask & LOCKF_USED_IN_HARDIRQ) + nr_hardirq_safe++; + if (class->usage_mask & LOCKF_ENABLED_HARDIRQS) + nr_hardirq_unsafe++; + if (class->usage_mask & LOCKF_USED_IN_IRQ_READ) + nr_irq_read_safe++; + if (class->usage_mask & LOCKF_ENABLED_IRQS_READ) + nr_irq_read_unsafe++; + if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ) + nr_softirq_read_safe++; + if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ) + nr_softirq_read_unsafe++; + if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ) + nr_hardirq_read_safe++; + if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ) + nr_hardirq_read_unsafe++; + + sum_forward_deps += count_forward_deps(class); + } +#ifdef CONFIG_LOCKDEP_DEBUG + DEBUG_LOCKS_WARN_ON(debug_atomic_read(&nr_unused_locks) != nr_unused); +#endif + seq_printf(m, " lock-classes: %11lu [max: %lu]\n", + nr_lock_classes, MAX_LOCKDEP_KEYS); + seq_printf(m, " direct dependencies: %11lu [max: %lu]\n", + nr_list_entries, MAX_LOCKDEP_ENTRIES); + seq_printf(m, " indirect dependencies: %11lu\n", + sum_forward_deps); + + /* + * Total number of dependencies: + * + * All irq-safe locks may nest inside irq-unsafe locks, + * plus all the other known dependencies: + */ + seq_printf(m, " all direct dependencies: %11lu\n", + nr_irq_unsafe * nr_irq_safe + + nr_hardirq_unsafe * nr_hardirq_safe + + nr_list_entries); + + /* + * Estimated factor between direct and indirect + * dependencies: + */ + if (nr_list_entries) + factor = sum_forward_deps / nr_list_entries; + + seq_printf(m, " dependency chains: %11lu [max: %lu]\n", + nr_lock_chains, MAX_LOCKDEP_CHAINS); + +#ifdef CONFIG_TRACE_IRQFLAGS + seq_printf(m, " in-hardirq chains: %11u\n", + nr_hardirq_chains); + seq_printf(m, " in-softirq chains: %11u\n", + nr_softirq_chains); +#endif + seq_printf(m, " in-process chains: %11u\n", + nr_process_chains); + seq_printf(m, " stack-trace entries: %11lu [max: %lu]\n", + nr_stack_trace_entries, MAX_STACK_TRACE_ENTRIES); + seq_printf(m, " combined max dependencies: %11u\n", + (nr_hardirq_chains + 1) * + (nr_softirq_chains + 1) * + (nr_process_chains + 1) + ); + seq_printf(m, " hardirq-safe locks: %11lu\n", + nr_hardirq_safe); + seq_printf(m, " hardirq-unsafe locks: %11lu\n", + nr_hardirq_unsafe); + seq_printf(m, " softirq-safe locks: %11lu\n", + nr_softirq_safe); + seq_printf(m, " softirq-unsafe locks: %11lu\n", + nr_softirq_unsafe); + seq_printf(m, " irq-safe locks: %11lu\n", + nr_irq_safe); + seq_printf(m, " irq-unsafe locks: %11lu\n", + nr_irq_unsafe); + + seq_printf(m, " hardirq-read-safe locks: %11lu\n", + nr_hardirq_read_safe); + seq_printf(m, " hardirq-read-unsafe locks: %11lu\n", + nr_hardirq_read_unsafe); + seq_printf(m, " softirq-read-safe locks: %11lu\n", + nr_softirq_read_safe); + seq_printf(m, " softirq-read-unsafe locks: %11lu\n", + nr_softirq_read_unsafe); + seq_printf(m, " irq-read-safe locks: %11lu\n", + nr_irq_read_safe); + seq_printf(m, " irq-read-unsafe locks: %11lu\n", + nr_irq_read_unsafe); + + seq_printf(m, " uncategorized locks: %11lu\n", + nr_uncategorized); + seq_printf(m, " unused locks: %11lu\n", + nr_unused); + seq_printf(m, " max locking depth: %11u\n", + max_lockdep_depth); + seq_printf(m, " max recursion depth: %11u\n", + max_recursion_depth); + lockdep_stats_debug_show(m); + seq_printf(m, " debug_locks: %11u\n", + debug_locks); + + return 0; +} + +static int lockdep_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, lockdep_stats_show, NULL); +} + +static struct file_operations proc_lockdep_stats_operations = { + .open = lockdep_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int __init lockdep_proc_init(void) +{ + struct proc_dir_entry *entry; + + entry = create_proc_entry("lockdep", S_IRUSR, NULL); + if (entry) + entry->proc_fops = &proc_lockdep_operations; + + entry = create_proc_entry("lockdep_stats", S_IRUSR, NULL); + if (entry) + entry->proc_fops = &proc_lockdep_stats_operations; + + return 0; +} + +__initcall(lockdep_proc_init); + diff --git a/kernel/module.c b/kernel/module.c index 281172f01e9a..35e1b1f859d7 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1121,6 +1121,9 @@ static void free_module(struct module *mod) if (mod->percpu) percpu_modfree(mod->percpu); + /* Free lock-classes: */ + lockdep_free_key_range(mod->module_core, mod->core_size); + /* Finally, free the core (containing the module structure) */ module_free(mod, mod->module_core); } @@ -2159,6 +2162,29 @@ const struct exception_table_entry *search_module_extables(unsigned long addr) return e; } +/* + * Is this a valid module address? + */ +int is_module_address(unsigned long addr) +{ + unsigned long flags; + struct module *mod; + + spin_lock_irqsave(&modlist_lock, flags); + + list_for_each_entry(mod, &modules, list) { + if (within(addr, mod->module_core, mod->core_size)) { + spin_unlock_irqrestore(&modlist_lock, flags); + return 1; + } + } + + spin_unlock_irqrestore(&modlist_lock, flags); + + return 0; +} + + /* Is this a valid kernel address? We don't grab the lock: we are oopsing. */ struct module *__module_text_address(unsigned long addr) { diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c index e38e4bac97ca..e3203c654dda 100644 --- a/kernel/mutex-debug.c +++ b/kernel/mutex-debug.c @@ -20,367 +20,19 @@ #include <linux/spinlock.h> #include <linux/kallsyms.h> #include <linux/interrupt.h> +#include <linux/debug_locks.h> #include "mutex-debug.h" /* - * We need a global lock when we walk through the multi-process - * lock tree. Only used in the deadlock-debugging case. - */ -DEFINE_SPINLOCK(debug_mutex_lock); - -/* - * All locks held by all tasks, in a single global list: - */ -LIST_HEAD(debug_mutex_held_locks); - -/* - * In the debug case we carry the caller's instruction pointer into - * other functions, but we dont want the function argument overhead - * in the nondebug case - hence these macros: - */ -#define __IP_DECL__ , unsigned long ip -#define __IP__ , ip -#define __RET_IP__ , (unsigned long)__builtin_return_address(0) - -/* - * "mutex debugging enabled" flag. We turn it off when we detect - * the first problem because we dont want to recurse back - * into the tracing code when doing error printk or - * executing a BUG(): - */ -int debug_mutex_on = 1; - -static void printk_task(struct task_struct *p) -{ - if (p) - printk("%16s:%5d [%p, %3d]", p->comm, p->pid, p, p->prio); - else - printk("<none>"); -} - -static void printk_ti(struct thread_info *ti) -{ - if (ti) - printk_task(ti->task); - else - printk("<none>"); -} - -static void printk_task_short(struct task_struct *p) -{ - if (p) - printk("%s/%d [%p, %3d]", p->comm, p->pid, p, p->prio); - else - printk("<none>"); -} - -static void printk_lock(struct mutex *lock, int print_owner) -{ - printk(" [%p] {%s}\n", lock, lock->name); - - if (print_owner && lock->owner) { - printk(".. held by: "); - printk_ti(lock->owner); - printk("\n"); - } - if (lock->owner) { - printk("... acquired at: "); - print_symbol("%s\n", lock->acquire_ip); - } -} - -/* - * printk locks held by a task: - */ -static void show_task_locks(struct task_struct *p) -{ - switch (p->state) { - case TASK_RUNNING: printk("R"); break; - case TASK_INTERRUPTIBLE: printk("S"); break; - case TASK_UNINTERRUPTIBLE: printk("D"); break; - case TASK_STOPPED: printk("T"); break; - case EXIT_ZOMBIE: printk("Z"); break; - case EXIT_DEAD: printk("X"); break; - default: printk("?"); break; - } - printk_task(p); - if (p->blocked_on) { - struct mutex *lock = p->blocked_on->lock; - - printk(" blocked on mutex:"); - printk_lock(lock, 1); - } else - printk(" (not blocked on mutex)\n"); -} - -/* - * printk all locks held in the system (if filter == NULL), - * or all locks belonging to a single task (if filter != NULL): - */ -void show_held_locks(struct task_struct *filter) -{ - struct list_head *curr, *cursor = NULL; - struct mutex *lock; - struct thread_info *t; - unsigned long flags; - int count = 0; - - if (filter) { - printk("------------------------------\n"); - printk("| showing all locks held by: | ("); - printk_task_short(filter); - printk("):\n"); - printk("------------------------------\n"); - } else { - printk("---------------------------\n"); - printk("| showing all locks held: |\n"); - printk("---------------------------\n"); - } - - /* - * Play safe and acquire the global trace lock. We - * cannot printk with that lock held so we iterate - * very carefully: - */ -next: - debug_spin_lock_save(&debug_mutex_lock, flags); - list_for_each(curr, &debug_mutex_held_locks) { - if (cursor && curr != cursor) - continue; - lock = list_entry(curr, struct mutex, held_list); - t = lock->owner; - if (filter && (t != filter->thread_info)) - continue; - count++; - cursor = curr->next; - debug_spin_unlock_restore(&debug_mutex_lock, flags); - - printk("\n#%03d: ", count); - printk_lock(lock, filter ? 0 : 1); - goto next; - } - debug_spin_unlock_restore(&debug_mutex_lock, flags); - printk("\n"); -} - -void mutex_debug_show_all_locks(void) -{ - struct task_struct *g, *p; - int count = 10; - int unlock = 1; - - printk("\nShowing all blocking locks in the system:\n"); - - /* - * Here we try to get the tasklist_lock as hard as possible, - * if not successful after 2 seconds we ignore it (but keep - * trying). This is to enable a debug printout even if a - * tasklist_lock-holding task deadlocks or crashes. - */ -retry: - if (!read_trylock(&tasklist_lock)) { - if (count == 10) - printk("hm, tasklist_lock locked, retrying... "); - if (count) { - count--; - printk(" #%d", 10-count); - mdelay(200); - goto retry; - } - printk(" ignoring it.\n"); - unlock = 0; - } - if (count != 10) - printk(" locked it.\n"); - - do_each_thread(g, p) { - show_task_locks(p); - if (!unlock) - if (read_trylock(&tasklist_lock)) - unlock = 1; - } while_each_thread(g, p); - - printk("\n"); - show_held_locks(NULL); - printk("=============================================\n\n"); - - if (unlock) - read_unlock(&tasklist_lock); -} - -static void report_deadlock(struct task_struct *task, struct mutex *lock, - struct mutex *lockblk, unsigned long ip) -{ - printk("\n%s/%d is trying to acquire this lock:\n", - current->comm, current->pid); - printk_lock(lock, 1); - printk("... trying at: "); - print_symbol("%s\n", ip); - show_held_locks(current); - - if (lockblk) { - printk("but %s/%d is deadlocking current task %s/%d!\n\n", - task->comm, task->pid, current->comm, current->pid); - printk("\n%s/%d is blocked on this lock:\n", - task->comm, task->pid); - printk_lock(lockblk, 1); - - show_held_locks(task); - - printk("\n%s/%d's [blocked] stackdump:\n\n", - task->comm, task->pid); - show_stack(task, NULL); - } - - printk("\n%s/%d's [current] stackdump:\n\n", - current->comm, current->pid); - dump_stack(); - mutex_debug_show_all_locks(); - printk("[ turning off deadlock detection. Please report this. ]\n\n"); - local_irq_disable(); -} - -/* - * Recursively check for mutex deadlocks: - */ -static int check_deadlock(struct mutex *lock, int depth, - struct thread_info *ti, unsigned long ip) -{ - struct mutex *lockblk; - struct task_struct *task; - - if (!debug_mutex_on) - return 0; - - ti = lock->owner; - if (!ti) - return 0; - - task = ti->task; - lockblk = NULL; - if (task->blocked_on) - lockblk = task->blocked_on->lock; - - /* Self-deadlock: */ - if (current == task) { - DEBUG_OFF(); - if (depth) - return 1; - printk("\n==========================================\n"); - printk( "[ BUG: lock recursion deadlock detected! |\n"); - printk( "------------------------------------------\n"); - report_deadlock(task, lock, NULL, ip); - return 0; - } - - /* Ugh, something corrupted the lock data structure? */ - if (depth > 20) { - DEBUG_OFF(); - printk("\n===========================================\n"); - printk( "[ BUG: infinite lock dependency detected!? |\n"); - printk( "-------------------------------------------\n"); - report_deadlock(task, lock, lockblk, ip); - return 0; - } - - /* Recursively check for dependencies: */ - if (lockblk && check_deadlock(lockblk, depth+1, ti, ip)) { - printk("\n============================================\n"); - printk( "[ BUG: circular locking deadlock detected! ]\n"); - printk( "--------------------------------------------\n"); - report_deadlock(task, lock, lockblk, ip); - return 0; - } - return 0; -} - -/* - * Called when a task exits, this function checks whether the - * task is holding any locks, and reports the first one if so: - */ -void mutex_debug_check_no_locks_held(struct task_struct *task) -{ - struct list_head *curr, *next; - struct thread_info *t; - unsigned long flags; - struct mutex *lock; - - if (!debug_mutex_on) - return; - - debug_spin_lock_save(&debug_mutex_lock, flags); - list_for_each_safe(curr, next, &debug_mutex_held_locks) { - lock = list_entry(curr, struct mutex, held_list); - t = lock->owner; - if (t != task->thread_info) - continue; - list_del_init(curr); - DEBUG_OFF(); - debug_spin_unlock_restore(&debug_mutex_lock, flags); - - printk("BUG: %s/%d, lock held at task exit time!\n", - task->comm, task->pid); - printk_lock(lock, 1); - if (lock->owner != task->thread_info) - printk("exiting task is not even the owner??\n"); - return; - } - debug_spin_unlock_restore(&debug_mutex_lock, flags); -} - -/* - * Called when kernel memory is freed (or unmapped), or if a mutex - * is destroyed or reinitialized - this code checks whether there is - * any held lock in the memory range of <from> to <to>: - */ -void mutex_debug_check_no_locks_freed(const void *from, unsigned long len) -{ - struct list_head *curr, *next; - const void *to = from + len; - unsigned long flags; - struct mutex *lock; - void *lock_addr; - - if (!debug_mutex_on) - return; - - debug_spin_lock_save(&debug_mutex_lock, flags); - list_for_each_safe(curr, next, &debug_mutex_held_locks) { - lock = list_entry(curr, struct mutex, held_list); - lock_addr = lock; - if (lock_addr < from || lock_addr >= to) - continue; - list_del_init(curr); - DEBUG_OFF(); - debug_spin_unlock_restore(&debug_mutex_lock, flags); - - printk("BUG: %s/%d, active lock [%p(%p-%p)] freed!\n", - current->comm, current->pid, lock, from, to); - dump_stack(); - printk_lock(lock, 1); - if (lock->owner != current_thread_info()) - printk("freeing task is not even the owner??\n"); - return; - } - debug_spin_unlock_restore(&debug_mutex_lock, flags); -} - -/* * Must be called with lock->wait_lock held. */ -void debug_mutex_set_owner(struct mutex *lock, - struct thread_info *new_owner __IP_DECL__) +void debug_mutex_set_owner(struct mutex *lock, struct thread_info *new_owner) { lock->owner = new_owner; - DEBUG_WARN_ON(!list_empty(&lock->held_list)); - if (debug_mutex_on) { - list_add_tail(&lock->held_list, &debug_mutex_held_locks); - lock->acquire_ip = ip; - } } -void debug_mutex_init_waiter(struct mutex_waiter *waiter) +void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter) { memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter)); waiter->magic = waiter; @@ -389,23 +41,23 @@ void debug_mutex_init_waiter(struct mutex_waiter *waiter) void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter) { - SMP_DEBUG_WARN_ON(!spin_is_locked(&lock->wait_lock)); - DEBUG_WARN_ON(list_empty(&lock->wait_list)); - DEBUG_WARN_ON(waiter->magic != waiter); - DEBUG_WARN_ON(list_empty(&waiter->list)); + SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock)); + DEBUG_LOCKS_WARN_ON(list_empty(&lock->wait_list)); + DEBUG_LOCKS_WARN_ON(waiter->magic != waiter); + DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list)); } void debug_mutex_free_waiter(struct mutex_waiter *waiter) { - DEBUG_WARN_ON(!list_empty(&waiter->list)); + DEBUG_LOCKS_WARN_ON(!list_empty(&waiter->list)); memset(waiter, MUTEX_DEBUG_FREE, sizeof(*waiter)); } void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, - struct thread_info *ti __IP_DECL__) + struct thread_info *ti) { - SMP_DEBUG_WARN_ON(!spin_is_locked(&lock->wait_lock)); - check_deadlock(lock, 0, ti, ip); + SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock)); + /* Mark the current thread as blocked on the lock: */ ti->task->blocked_on = waiter; waiter->lock = lock; @@ -414,9 +66,9 @@ void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, struct thread_info *ti) { - DEBUG_WARN_ON(list_empty(&waiter->list)); - DEBUG_WARN_ON(waiter->task != ti->task); - DEBUG_WARN_ON(ti->task->blocked_on != waiter); + DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list)); + DEBUG_LOCKS_WARN_ON(waiter->task != ti->task); + DEBUG_LOCKS_WARN_ON(ti->task->blocked_on != waiter); ti->task->blocked_on = NULL; list_del_init(&waiter->list); @@ -425,24 +77,23 @@ void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, void debug_mutex_unlock(struct mutex *lock) { - DEBUG_WARN_ON(lock->magic != lock); - DEBUG_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); - DEBUG_WARN_ON(lock->owner != current_thread_info()); - if (debug_mutex_on) { - DEBUG_WARN_ON(list_empty(&lock->held_list)); - list_del_init(&lock->held_list); - } + DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info()); + DEBUG_LOCKS_WARN_ON(lock->magic != lock); + DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); + DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info()); } -void debug_mutex_init(struct mutex *lock, const char *name) +void debug_mutex_init(struct mutex *lock, const char *name, + struct lock_class_key *key) { +#ifdef CONFIG_DEBUG_LOCK_ALLOC /* * Make sure we are not reinitializing a held lock: */ - mutex_debug_check_no_locks_freed((void *)lock, sizeof(*lock)); + debug_check_no_locks_freed((void *)lock, sizeof(*lock)); + lockdep_init_map(&lock->dep_map, name, key); +#endif lock->owner = NULL; - INIT_LIST_HEAD(&lock->held_list); - lock->name = name; lock->magic = lock; } @@ -456,7 +107,7 @@ void debug_mutex_init(struct mutex *lock, const char *name) */ void fastcall mutex_destroy(struct mutex *lock) { - DEBUG_WARN_ON(mutex_is_locked(lock)); + DEBUG_LOCKS_WARN_ON(mutex_is_locked(lock)); lock->magic = NULL; } diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h index a5196c36a5fd..babfbdfc534b 100644 --- a/kernel/mutex-debug.h +++ b/kernel/mutex-debug.h @@ -10,110 +10,44 @@ * More details are in kernel/mutex-debug.c. */ -extern spinlock_t debug_mutex_lock; -extern struct list_head debug_mutex_held_locks; -extern int debug_mutex_on; - -/* - * In the debug case we carry the caller's instruction pointer into - * other functions, but we dont want the function argument overhead - * in the nondebug case - hence these macros: - */ -#define __IP_DECL__ , unsigned long ip -#define __IP__ , ip -#define __RET_IP__ , (unsigned long)__builtin_return_address(0) - /* * This must be called with lock->wait_lock held. */ -extern void debug_mutex_set_owner(struct mutex *lock, - struct thread_info *new_owner __IP_DECL__); +extern void +debug_mutex_set_owner(struct mutex *lock, struct thread_info *new_owner); static inline void debug_mutex_clear_owner(struct mutex *lock) { lock->owner = NULL; } -extern void debug_mutex_init_waiter(struct mutex_waiter *waiter); +extern void debug_mutex_lock_common(struct mutex *lock, + struct mutex_waiter *waiter); extern void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter); extern void debug_mutex_free_waiter(struct mutex_waiter *waiter); extern void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, - struct thread_info *ti __IP_DECL__); + struct thread_info *ti); extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, struct thread_info *ti); extern void debug_mutex_unlock(struct mutex *lock); -extern void debug_mutex_init(struct mutex *lock, const char *name); - -#define debug_spin_lock_save(lock, flags) \ - do { \ - local_irq_save(flags); \ - if (debug_mutex_on) \ - spin_lock(lock); \ - } while (0) - -#define debug_spin_unlock_restore(lock, flags) \ - do { \ - if (debug_mutex_on) \ - spin_unlock(lock); \ - local_irq_restore(flags); \ - preempt_check_resched(); \ - } while (0) +extern void debug_mutex_init(struct mutex *lock, const char *name, + struct lock_class_key *key); #define spin_lock_mutex(lock, flags) \ do { \ struct mutex *l = container_of(lock, struct mutex, wait_lock); \ \ - DEBUG_WARN_ON(in_interrupt()); \ - debug_spin_lock_save(&debug_mutex_lock, flags); \ - spin_lock(lock); \ - DEBUG_WARN_ON(l->magic != l); \ + DEBUG_LOCKS_WARN_ON(in_interrupt()); \ + local_irq_save(flags); \ + __raw_spin_lock(&(lock)->raw_lock); \ + DEBUG_LOCKS_WARN_ON(l->magic != l); \ } while (0) #define spin_unlock_mutex(lock, flags) \ do { \ - spin_unlock(lock); \ - debug_spin_unlock_restore(&debug_mutex_lock, flags); \ + __raw_spin_unlock(&(lock)->raw_lock); \ + local_irq_restore(flags); \ + preempt_check_resched(); \ } while (0) - -#define DEBUG_OFF() \ -do { \ - if (debug_mutex_on) { \ - debug_mutex_on = 0; \ - console_verbose(); \ - if (spin_is_locked(&debug_mutex_lock)) \ - spin_unlock(&debug_mutex_lock); \ - } \ -} while (0) - -#define DEBUG_BUG() \ -do { \ - if (debug_mutex_on) { \ - DEBUG_OFF(); \ - BUG(); \ - } \ -} while (0) - -#define DEBUG_WARN_ON(c) \ -do { \ - if (unlikely(c && debug_mutex_on)) { \ - DEBUG_OFF(); \ - WARN_ON(1); \ - } \ -} while (0) - -# define DEBUG_BUG_ON(c) \ -do { \ - if (unlikely(c)) \ - DEBUG_BUG(); \ -} while (0) - -#ifdef CONFIG_SMP -# define SMP_DEBUG_WARN_ON(c) DEBUG_WARN_ON(c) -# define SMP_DEBUG_BUG_ON(c) DEBUG_BUG_ON(c) -#else -# define SMP_DEBUG_WARN_ON(c) do { } while (0) -# define SMP_DEBUG_BUG_ON(c) do { } while (0) -#endif - diff --git a/kernel/mutex.c b/kernel/mutex.c index 7043db21bbce..8c71cf72a497 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -17,6 +17,7 @@ #include <linux/module.h> #include <linux/spinlock.h> #include <linux/interrupt.h> +#include <linux/debug_locks.h> /* * In the DEBUG case we are using the "NULL fastpath" for mutexes, @@ -38,13 +39,14 @@ * * It is not allowed to initialize an already locked mutex. */ -void fastcall __mutex_init(struct mutex *lock, const char *name) +void +__mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) { atomic_set(&lock->count, 1); spin_lock_init(&lock->wait_lock); INIT_LIST_HEAD(&lock->wait_list); - debug_mutex_init(lock, name); + debug_mutex_init(lock, name, key); } EXPORT_SYMBOL(__mutex_init); @@ -56,7 +58,7 @@ EXPORT_SYMBOL(__mutex_init); * branch is predicted by the CPU as default-untaken. */ static void fastcall noinline __sched -__mutex_lock_slowpath(atomic_t *lock_count __IP_DECL__); +__mutex_lock_slowpath(atomic_t *lock_count); /*** * mutex_lock - acquire the mutex @@ -79,7 +81,7 @@ __mutex_lock_slowpath(atomic_t *lock_count __IP_DECL__); * * This function is similar to (but not equivalent to) down(). */ -void fastcall __sched mutex_lock(struct mutex *lock) +void inline fastcall __sched mutex_lock(struct mutex *lock) { might_sleep(); /* @@ -92,7 +94,7 @@ void fastcall __sched mutex_lock(struct mutex *lock) EXPORT_SYMBOL(mutex_lock); static void fastcall noinline __sched -__mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__); +__mutex_unlock_slowpath(atomic_t *lock_count); /*** * mutex_unlock - release the mutex @@ -120,18 +122,18 @@ EXPORT_SYMBOL(mutex_unlock); * Lock a mutex (possibly interruptible), slowpath: */ static inline int __sched -__mutex_lock_common(struct mutex *lock, long state __IP_DECL__) +__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass) { struct task_struct *task = current; struct mutex_waiter waiter; unsigned int old_val; unsigned long flags; - debug_mutex_init_waiter(&waiter); - spin_lock_mutex(&lock->wait_lock, flags); - debug_mutex_add_waiter(lock, &waiter, task->thread_info, ip); + debug_mutex_lock_common(lock, &waiter); + mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); + debug_mutex_add_waiter(lock, &waiter, task->thread_info); /* add waiting tasks to the end of the waitqueue (FIFO): */ list_add_tail(&waiter.list, &lock->wait_list); @@ -158,6 +160,7 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__) if (unlikely(state == TASK_INTERRUPTIBLE && signal_pending(task))) { mutex_remove_waiter(lock, &waiter, task->thread_info); + mutex_release(&lock->dep_map, 1, _RET_IP_); spin_unlock_mutex(&lock->wait_lock, flags); debug_mutex_free_waiter(&waiter); @@ -173,7 +176,7 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__) /* got the lock - rejoice! */ mutex_remove_waiter(lock, &waiter, task->thread_info); - debug_mutex_set_owner(lock, task->thread_info __IP__); + debug_mutex_set_owner(lock, task->thread_info); /* set it to 0 if there are no waiters left: */ if (likely(list_empty(&lock->wait_list))) @@ -183,32 +186,40 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__) debug_mutex_free_waiter(&waiter); - DEBUG_WARN_ON(list_empty(&lock->held_list)); - DEBUG_WARN_ON(lock->owner != task->thread_info); - return 0; } static void fastcall noinline __sched -__mutex_lock_slowpath(atomic_t *lock_count __IP_DECL__) +__mutex_lock_slowpath(atomic_t *lock_count) { struct mutex *lock = container_of(lock_count, struct mutex, count); - __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE __IP__); + __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0); +} + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +void __sched +mutex_lock_nested(struct mutex *lock, unsigned int subclass) +{ + might_sleep(); + __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass); } +EXPORT_SYMBOL_GPL(mutex_lock_nested); +#endif + /* * Release the lock, slowpath: */ -static fastcall noinline void -__mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__) +static fastcall inline void +__mutex_unlock_common_slowpath(atomic_t *lock_count, int nested) { struct mutex *lock = container_of(lock_count, struct mutex, count); unsigned long flags; - DEBUG_WARN_ON(lock->owner != current_thread_info()); - spin_lock_mutex(&lock->wait_lock, flags); + mutex_release(&lock->dep_map, nested, _RET_IP_); + debug_mutex_unlock(lock); /* * some architectures leave the lock unlocked in the fastpath failure @@ -218,8 +229,6 @@ __mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__) if (__mutex_slowpath_needs_to_unlock()) atomic_set(&lock->count, 1); - debug_mutex_unlock(lock); - if (!list_empty(&lock->wait_list)) { /* get the first entry from the wait-list: */ struct mutex_waiter *waiter = @@ -237,11 +246,20 @@ __mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__) } /* + * Release the lock, slowpath: + */ +static fastcall noinline void +__mutex_unlock_slowpath(atomic_t *lock_count) +{ + __mutex_unlock_common_slowpath(lock_count, 1); +} + +/* * Here come the less common (and hence less performance-critical) APIs: * mutex_lock_interruptible() and mutex_trylock(). */ static int fastcall noinline __sched -__mutex_lock_interruptible_slowpath(atomic_t *lock_count __IP_DECL__); +__mutex_lock_interruptible_slowpath(atomic_t *lock_count); /*** * mutex_lock_interruptible - acquire the mutex, interruptable @@ -264,11 +282,11 @@ int fastcall __sched mutex_lock_interruptible(struct mutex *lock) EXPORT_SYMBOL(mutex_lock_interruptible); static int fastcall noinline __sched -__mutex_lock_interruptible_slowpath(atomic_t *lock_count __IP_DECL__) +__mutex_lock_interruptible_slowpath(atomic_t *lock_count) { struct mutex *lock = container_of(lock_count, struct mutex, count); - return __mutex_lock_common(lock, TASK_INTERRUPTIBLE __IP__); + return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0); } /* @@ -284,8 +302,10 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count) spin_lock_mutex(&lock->wait_lock, flags); prev = atomic_xchg(&lock->count, -1); - if (likely(prev == 1)) - debug_mutex_set_owner(lock, current_thread_info() __RET_IP__); + if (likely(prev == 1)) { + debug_mutex_set_owner(lock, current_thread_info()); + mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); + } /* Set it back to 0 if there are no waiters: */ if (likely(list_empty(&lock->wait_list))) atomic_set(&lock->count, 0); @@ -309,7 +329,7 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count) * This function must not be used in interrupt context. The * mutex must be released by the same task that acquired it. */ -int fastcall mutex_trylock(struct mutex *lock) +int fastcall __sched mutex_trylock(struct mutex *lock) { return __mutex_fastpath_trylock(&lock->count, __mutex_trylock_slowpath); diff --git a/kernel/mutex.h b/kernel/mutex.h index 069189947257..a075dafbb290 100644 --- a/kernel/mutex.h +++ b/kernel/mutex.h @@ -16,22 +16,15 @@ #define mutex_remove_waiter(lock, waiter, ti) \ __list_del((waiter)->list.prev, (waiter)->list.next) -#define DEBUG_WARN_ON(c) do { } while (0) #define debug_mutex_set_owner(lock, new_owner) do { } while (0) #define debug_mutex_clear_owner(lock) do { } while (0) -#define debug_mutex_init_waiter(waiter) do { } while (0) #define debug_mutex_wake_waiter(lock, waiter) do { } while (0) #define debug_mutex_free_waiter(waiter) do { } while (0) -#define debug_mutex_add_waiter(lock, waiter, ti, ip) do { } while (0) +#define debug_mutex_add_waiter(lock, waiter, ti) do { } while (0) #define debug_mutex_unlock(lock) do { } while (0) -#define debug_mutex_init(lock, name) do { } while (0) - -/* - * Return-address parameters/declarations. They are very useful for - * debugging, but add overhead in the !DEBUG case - so we go the - * trouble of using this not too elegant but zero-cost solution: - */ -#define __IP_DECL__ -#define __IP__ -#define __RET_IP__ +#define debug_mutex_init(lock, name, key) do { } while (0) +static inline void +debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter) +{ +} diff --git a/kernel/pid.c b/kernel/pid.c index eeb836b65ca4..93e212f20671 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -218,7 +218,7 @@ struct pid * fastcall find_pid(int nr) return NULL; } -int fastcall attach_pid(task_t *task, enum pid_type type, int nr) +int fastcall attach_pid(struct task_struct *task, enum pid_type type, int nr) { struct pid_link *link; struct pid *pid; @@ -233,7 +233,7 @@ int fastcall attach_pid(task_t *task, enum pid_type type, int nr) return 0; } -void fastcall detach_pid(task_t *task, enum pid_type type) +void fastcall detach_pid(struct task_struct *task, enum pid_type type) { struct pid_link *link; struct pid *pid; @@ -267,7 +267,7 @@ struct task_struct * fastcall pid_task(struct pid *pid, enum pid_type type) /* * Must be called under rcu_read_lock() or with tasklist_lock read-held. */ -task_t *find_task_by_pid_type(int type, int nr) +struct task_struct *find_task_by_pid_type(int type, int nr) { return pid_task(find_pid(nr), type); } diff --git a/kernel/printk.c b/kernel/printk.c index 39ae24d2a415..bdba5d80496c 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -518,7 +518,9 @@ asmlinkage int vprintk(const char *fmt, va_list args) zap_locks(); /* This stops the holder of console_sem just where we want him */ - spin_lock_irqsave(&logbuf_lock, flags); + local_irq_save(flags); + lockdep_off(); + spin_lock(&logbuf_lock); printk_cpu = smp_processor_id(); /* Emit the output into the temporary buffer */ @@ -588,7 +590,7 @@ asmlinkage int vprintk(const char *fmt, va_list args) */ console_locked = 1; printk_cpu = UINT_MAX; - spin_unlock_irqrestore(&logbuf_lock, flags); + spin_unlock(&logbuf_lock); /* * Console drivers may assume that per-cpu resources have @@ -604,6 +606,8 @@ asmlinkage int vprintk(const char *fmt, va_list args) console_locked = 0; up(&console_sem); } + lockdep_on(); + local_irq_restore(flags); } else { /* * Someone else owns the drivers. We drop the spinlock, which @@ -611,7 +615,9 @@ asmlinkage int vprintk(const char *fmt, va_list args) * console drivers with the output which we just produced. */ printk_cpu = UINT_MAX; - spin_unlock_irqrestore(&logbuf_lock, flags); + spin_unlock(&logbuf_lock); + lockdep_on(); + local_irq_restore(flags); } preempt_enable(); @@ -809,8 +815,15 @@ void release_console_sem(void) console_may_schedule = 0; up(&console_sem); spin_unlock_irqrestore(&logbuf_lock, flags); - if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait)) - wake_up_interruptible(&log_wait); + if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait)) { + /* + * If we printk from within the lock dependency code, + * from within the scheduler code, then do not lock + * up due to self-recursion: + */ + if (!lockdep_internal()) + wake_up_interruptible(&log_wait); + } } EXPORT_SYMBOL(release_console_sem); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 335c5b932e14..9a111f70145c 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -28,7 +28,7 @@ * * Must be called with the tasklist lock write-held. */ -void __ptrace_link(task_t *child, task_t *new_parent) +void __ptrace_link(struct task_struct *child, struct task_struct *new_parent) { BUG_ON(!list_empty(&child->ptrace_list)); if (child->parent == new_parent) @@ -46,7 +46,7 @@ void __ptrace_link(task_t *child, task_t *new_parent) * TASK_TRACED, resume it now. * Requires that irqs be disabled. */ -void ptrace_untrace(task_t *child) +void ptrace_untrace(struct task_struct *child) { spin_lock(&child->sighand->siglock); if (child->state == TASK_TRACED) { @@ -65,7 +65,7 @@ void ptrace_untrace(task_t *child) * * Must be called with the tasklist lock write-held. */ -void __ptrace_unlink(task_t *child) +void __ptrace_unlink(struct task_struct *child) { BUG_ON(!child->ptrace); diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index f464f5ae3f11..759805c9859a 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -53,13 +53,13 @@ static struct rcu_ctrlblk rcu_ctrlblk = { .cur = -300, .completed = -300, - .lock = SPIN_LOCK_UNLOCKED, + .lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock), .cpumask = CPU_MASK_NONE, }; static struct rcu_ctrlblk rcu_bh_ctrlblk = { .cur = -300, .completed = -300, - .lock = SPIN_LOCK_UNLOCKED, + .lock = __SPIN_LOCK_UNLOCKED(&rcu_bh_ctrlblk.lock), .cpumask = CPU_MASK_NONE, }; diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c index 4aa8a2c9f453..0c1faa950af7 100644 --- a/kernel/rtmutex-debug.c +++ b/kernel/rtmutex-debug.c @@ -26,6 +26,7 @@ #include <linux/interrupt.h> #include <linux/plist.h> #include <linux/fs.h> +#include <linux/debug_locks.h> #include "rtmutex_common.h" @@ -45,8 +46,6 @@ do { \ console_verbose(); \ if (spin_is_locked(¤t->pi_lock)) \ spin_unlock(¤t->pi_lock); \ - if (spin_is_locked(¤t->held_list_lock)) \ - spin_unlock(¤t->held_list_lock); \ } \ } while (0) @@ -97,7 +96,7 @@ void deadlock_trace_off(void) rt_trace_on = 0; } -static void printk_task(task_t *p) +static void printk_task(struct task_struct *p) { if (p) printk("%16s:%5d [%p, %3d]", p->comm, p->pid, p, p->prio); @@ -105,14 +104,6 @@ static void printk_task(task_t *p) printk("<none>"); } -static void printk_task_short(task_t *p) -{ - if (p) - printk("%s/%d [%p, %3d]", p->comm, p->pid, p, p->prio); - else - printk("<none>"); -} - static void printk_lock(struct rt_mutex *lock, int print_owner) { if (lock->name) @@ -128,222 +119,6 @@ static void printk_lock(struct rt_mutex *lock, int print_owner) printk_task(rt_mutex_owner(lock)); printk("\n"); } - if (rt_mutex_owner(lock)) { - printk("... acquired at: "); - print_symbol("%s\n", lock->acquire_ip); - } -} - -static void printk_waiter(struct rt_mutex_waiter *w) -{ - printk("-------------------------\n"); - printk("| waiter struct %p:\n", w); - printk("| w->list_entry: [DP:%p/%p|SP:%p/%p|PRI:%d]\n", - w->list_entry.plist.prio_list.prev, w->list_entry.plist.prio_list.next, - w->list_entry.plist.node_list.prev, w->list_entry.plist.node_list.next, - w->list_entry.prio); - printk("| w->pi_list_entry: [DP:%p/%p|SP:%p/%p|PRI:%d]\n", - w->pi_list_entry.plist.prio_list.prev, w->pi_list_entry.plist.prio_list.next, - w->pi_list_entry.plist.node_list.prev, w->pi_list_entry.plist.node_list.next, - w->pi_list_entry.prio); - printk("\n| lock:\n"); - printk_lock(w->lock, 1); - printk("| w->ti->task:\n"); - printk_task(w->task); - printk("| blocked at: "); - print_symbol("%s\n", w->ip); - printk("-------------------------\n"); -} - -static void show_task_locks(task_t *p) -{ - switch (p->state) { - case TASK_RUNNING: printk("R"); break; - case TASK_INTERRUPTIBLE: printk("S"); break; - case TASK_UNINTERRUPTIBLE: printk("D"); break; - case TASK_STOPPED: printk("T"); break; - case EXIT_ZOMBIE: printk("Z"); break; - case EXIT_DEAD: printk("X"); break; - default: printk("?"); break; - } - printk_task(p); - if (p->pi_blocked_on) { - struct rt_mutex *lock = p->pi_blocked_on->lock; - - printk(" blocked on:"); - printk_lock(lock, 1); - } else - printk(" (not blocked)\n"); -} - -void rt_mutex_show_held_locks(task_t *task, int verbose) -{ - struct list_head *curr, *cursor = NULL; - struct rt_mutex *lock; - task_t *t; - unsigned long flags; - int count = 0; - - if (!rt_trace_on) - return; - - if (verbose) { - printk("------------------------------\n"); - printk("| showing all locks held by: | ("); - printk_task_short(task); - printk("):\n"); - printk("------------------------------\n"); - } - -next: - spin_lock_irqsave(&task->held_list_lock, flags); - list_for_each(curr, &task->held_list_head) { - if (cursor && curr != cursor) - continue; - lock = list_entry(curr, struct rt_mutex, held_list_entry); - t = rt_mutex_owner(lock); - WARN_ON(t != task); - count++; - cursor = curr->next; - spin_unlock_irqrestore(&task->held_list_lock, flags); - - printk("\n#%03d: ", count); - printk_lock(lock, 0); - goto next; - } - spin_unlock_irqrestore(&task->held_list_lock, flags); - - printk("\n"); -} - -void rt_mutex_show_all_locks(void) -{ - task_t *g, *p; - int count = 10; - int unlock = 1; - - printk("\n"); - printk("----------------------\n"); - printk("| showing all tasks: |\n"); - printk("----------------------\n"); - - /* - * Here we try to get the tasklist_lock as hard as possible, - * if not successful after 2 seconds we ignore it (but keep - * trying). This is to enable a debug printout even if a - * tasklist_lock-holding task deadlocks or crashes. - */ -retry: - if (!read_trylock(&tasklist_lock)) { - if (count == 10) - printk("hm, tasklist_lock locked, retrying... "); - if (count) { - count--; - printk(" #%d", 10-count); - mdelay(200); - goto retry; - } - printk(" ignoring it.\n"); - unlock = 0; - } - if (count != 10) - printk(" locked it.\n"); - - do_each_thread(g, p) { - show_task_locks(p); - if (!unlock) - if (read_trylock(&tasklist_lock)) - unlock = 1; - } while_each_thread(g, p); - - printk("\n"); - - printk("-----------------------------------------\n"); - printk("| showing all locks held in the system: |\n"); - printk("-----------------------------------------\n"); - - do_each_thread(g, p) { - rt_mutex_show_held_locks(p, 0); - if (!unlock) - if (read_trylock(&tasklist_lock)) - unlock = 1; - } while_each_thread(g, p); - - - printk("=============================================\n\n"); - - if (unlock) - read_unlock(&tasklist_lock); -} - -void rt_mutex_debug_check_no_locks_held(task_t *task) -{ - struct rt_mutex_waiter *w; - struct list_head *curr; - struct rt_mutex *lock; - - if (!rt_trace_on) - return; - if (!rt_prio(task->normal_prio) && rt_prio(task->prio)) { - printk("BUG: PI priority boost leaked!\n"); - printk_task(task); - printk("\n"); - } - if (list_empty(&task->held_list_head)) - return; - - spin_lock(&task->pi_lock); - plist_for_each_entry(w, &task->pi_waiters, pi_list_entry) { - TRACE_OFF(); - - printk("hm, PI interest held at exit time? Task:\n"); - printk_task(task); - printk_waiter(w); - return; - } - spin_unlock(&task->pi_lock); - - list_for_each(curr, &task->held_list_head) { - lock = list_entry(curr, struct rt_mutex, held_list_entry); - - printk("BUG: %s/%d, lock held at task exit time!\n", - task->comm, task->pid); - printk_lock(lock, 1); - if (rt_mutex_owner(lock) != task) - printk("exiting task is not even the owner??\n"); - } -} - -int rt_mutex_debug_check_no_locks_freed(const void *from, unsigned long len) -{ - const void *to = from + len; - struct list_head *curr; - struct rt_mutex *lock; - unsigned long flags; - void *lock_addr; - - if (!rt_trace_on) - return 0; - - spin_lock_irqsave(¤t->held_list_lock, flags); - list_for_each(curr, ¤t->held_list_head) { - lock = list_entry(curr, struct rt_mutex, held_list_entry); - lock_addr = lock; - if (lock_addr < from || lock_addr >= to) - continue; - TRACE_OFF(); - - printk("BUG: %s/%d, active lock [%p(%p-%p)] freed!\n", - current->comm, current->pid, lock, from, to); - dump_stack(); - printk_lock(lock, 1); - if (rt_mutex_owner(lock) != current) - printk("freeing task is not even the owner??\n"); - return 1; - } - spin_unlock_irqrestore(¤t->held_list_lock, flags); - - return 0; } void rt_mutex_debug_task_free(struct task_struct *task) @@ -395,85 +170,41 @@ void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter) current->comm, current->pid); printk_lock(waiter->lock, 1); - printk("... trying at: "); - print_symbol("%s\n", waiter->ip); - printk("\n2) %s/%d is blocked on this lock:\n", task->comm, task->pid); printk_lock(waiter->deadlock_lock, 1); - rt_mutex_show_held_locks(current, 1); - rt_mutex_show_held_locks(task, 1); + debug_show_held_locks(current); + debug_show_held_locks(task); printk("\n%s/%d's [blocked] stackdump:\n\n", task->comm, task->pid); show_stack(task, NULL); printk("\n%s/%d's [current] stackdump:\n\n", current->comm, current->pid); dump_stack(); - rt_mutex_show_all_locks(); + debug_show_all_locks(); + printk("[ turning off deadlock detection." "Please report this trace. ]\n\n"); local_irq_disable(); } -void debug_rt_mutex_lock(struct rt_mutex *lock __IP_DECL__) +void debug_rt_mutex_lock(struct rt_mutex *lock) { - unsigned long flags; - - if (rt_trace_on) { - TRACE_WARN_ON_LOCKED(!list_empty(&lock->held_list_entry)); - - spin_lock_irqsave(¤t->held_list_lock, flags); - list_add_tail(&lock->held_list_entry, ¤t->held_list_head); - spin_unlock_irqrestore(¤t->held_list_lock, flags); - - lock->acquire_ip = ip; - } } void debug_rt_mutex_unlock(struct rt_mutex *lock) { - unsigned long flags; - - if (rt_trace_on) { - TRACE_WARN_ON_LOCKED(rt_mutex_owner(lock) != current); - TRACE_WARN_ON_LOCKED(list_empty(&lock->held_list_entry)); - - spin_lock_irqsave(¤t->held_list_lock, flags); - list_del_init(&lock->held_list_entry); - spin_unlock_irqrestore(¤t->held_list_lock, flags); - } + TRACE_WARN_ON_LOCKED(rt_mutex_owner(lock) != current); } -void debug_rt_mutex_proxy_lock(struct rt_mutex *lock, - struct task_struct *powner __IP_DECL__) +void +debug_rt_mutex_proxy_lock(struct rt_mutex *lock, struct task_struct *powner) { - unsigned long flags; - - if (rt_trace_on) { - TRACE_WARN_ON_LOCKED(!list_empty(&lock->held_list_entry)); - - spin_lock_irqsave(&powner->held_list_lock, flags); - list_add_tail(&lock->held_list_entry, &powner->held_list_head); - spin_unlock_irqrestore(&powner->held_list_lock, flags); - - lock->acquire_ip = ip; - } } void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock) { - unsigned long flags; - - if (rt_trace_on) { - struct task_struct *owner = rt_mutex_owner(lock); - - TRACE_WARN_ON_LOCKED(!owner); - TRACE_WARN_ON_LOCKED(list_empty(&lock->held_list_entry)); - - spin_lock_irqsave(&owner->held_list_lock, flags); - list_del_init(&lock->held_list_entry); - spin_unlock_irqrestore(&owner->held_list_lock, flags); - } + TRACE_WARN_ON_LOCKED(!rt_mutex_owner(lock)); } void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) @@ -493,17 +224,15 @@ void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter) void debug_rt_mutex_init(struct rt_mutex *lock, const char *name) { - void *addr = lock; - - if (rt_trace_on) { - rt_mutex_debug_check_no_locks_freed(addr, - sizeof(struct rt_mutex)); - INIT_LIST_HEAD(&lock->held_list_entry); - lock->name = name; - } + /* + * Make sure we are not reinitializing a held lock: + */ + debug_check_no_locks_freed((void *)lock, sizeof(*lock)); + lock->name = name; } -void rt_mutex_deadlock_account_lock(struct rt_mutex *lock, task_t *task) +void +rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task) { } diff --git a/kernel/rtmutex-debug.h b/kernel/rtmutex-debug.h index 7612fbc62d70..14193d596d78 100644 --- a/kernel/rtmutex-debug.h +++ b/kernel/rtmutex-debug.h @@ -9,20 +9,16 @@ * This file contains macros used solely by rtmutex.c. Debug version. */ -#define __IP_DECL__ , unsigned long ip -#define __IP__ , ip -#define __RET_IP__ , (unsigned long)__builtin_return_address(0) - extern void rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task); extern void rt_mutex_deadlock_account_unlock(struct task_struct *task); extern void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter); extern void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter); extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name); -extern void debug_rt_mutex_lock(struct rt_mutex *lock __IP_DECL__); +extern void debug_rt_mutex_lock(struct rt_mutex *lock); extern void debug_rt_mutex_unlock(struct rt_mutex *lock); extern void debug_rt_mutex_proxy_lock(struct rt_mutex *lock, - struct task_struct *powner __IP_DECL__); + struct task_struct *powner); extern void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock); extern void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *waiter, struct rt_mutex *lock); diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c index e82c2f848249..494dac872a13 100644 --- a/kernel/rtmutex-tester.c +++ b/kernel/rtmutex-tester.c @@ -33,7 +33,7 @@ struct test_thread_data { }; static struct test_thread_data thread_data[MAX_RT_TEST_THREADS]; -static task_t *threads[MAX_RT_TEST_THREADS]; +static struct task_struct *threads[MAX_RT_TEST_THREADS]; static struct rt_mutex mutexes[MAX_RT_TEST_MUTEXES]; enum test_opcodes { @@ -361,8 +361,8 @@ static ssize_t sysfs_test_command(struct sys_device *dev, const char *buf, static ssize_t sysfs_test_status(struct sys_device *dev, char *buf) { struct test_thread_data *td; + struct task_struct *tsk; char *curr = buf; - task_t *tsk; int i; td = container_of(dev, struct test_thread_data, sysdev); diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index 45d61016da57..d2ef13b485e7 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c @@ -157,12 +157,11 @@ int max_lock_depth = 1024; * Decreases task's usage by one - may thus free the task. * Returns 0 or -EDEADLK. */ -static int rt_mutex_adjust_prio_chain(task_t *task, +static int rt_mutex_adjust_prio_chain(struct task_struct *task, int deadlock_detect, struct rt_mutex *orig_lock, struct rt_mutex_waiter *orig_waiter, - struct task_struct *top_task - __IP_DECL__) + struct task_struct *top_task) { struct rt_mutex *lock; struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; @@ -283,6 +282,7 @@ static int rt_mutex_adjust_prio_chain(task_t *task, spin_unlock_irqrestore(&task->pi_lock, flags); out_put_task: put_task_struct(task); + return ret; } @@ -357,7 +357,7 @@ static inline int try_to_steal_lock(struct rt_mutex *lock) * * Must be called with lock->wait_lock held. */ -static int try_to_take_rt_mutex(struct rt_mutex *lock __IP_DECL__) +static int try_to_take_rt_mutex(struct rt_mutex *lock) { /* * We have to be careful here if the atomic speedups are @@ -384,7 +384,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock __IP_DECL__) return 0; /* We got the lock. */ - debug_rt_mutex_lock(lock __IP__); + debug_rt_mutex_lock(lock); rt_mutex_set_owner(lock, current, 0); @@ -402,13 +402,12 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock __IP_DECL__) */ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, - int detect_deadlock - __IP_DECL__) + int detect_deadlock) { + struct task_struct *owner = rt_mutex_owner(lock); struct rt_mutex_waiter *top_waiter = waiter; - task_t *owner = rt_mutex_owner(lock); - int boost = 0, res; unsigned long flags; + int boost = 0, res; spin_lock_irqsave(¤t->pi_lock, flags); __rt_mutex_adjust_prio(current); @@ -454,7 +453,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, spin_unlock(&lock->wait_lock); res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter, - current __IP__); + current); spin_lock(&lock->wait_lock); @@ -526,12 +525,12 @@ static void wakeup_next_waiter(struct rt_mutex *lock) * Must be called with lock->wait_lock held */ static void remove_waiter(struct rt_mutex *lock, - struct rt_mutex_waiter *waiter __IP_DECL__) + struct rt_mutex_waiter *waiter) { int first = (waiter == rt_mutex_top_waiter(lock)); - int boost = 0; - task_t *owner = rt_mutex_owner(lock); + struct task_struct *owner = rt_mutex_owner(lock); unsigned long flags; + int boost = 0; spin_lock_irqsave(¤t->pi_lock, flags); plist_del(&waiter->list_entry, &lock->wait_list); @@ -568,7 +567,7 @@ static void remove_waiter(struct rt_mutex *lock, spin_unlock(&lock->wait_lock); - rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current __IP__); + rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current); spin_lock(&lock->wait_lock); } @@ -595,7 +594,7 @@ void rt_mutex_adjust_pi(struct task_struct *task) get_task_struct(task); spin_unlock_irqrestore(&task->pi_lock, flags); - rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task __RET_IP__); + rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task); } /* @@ -604,7 +603,7 @@ void rt_mutex_adjust_pi(struct task_struct *task) static int __sched rt_mutex_slowlock(struct rt_mutex *lock, int state, struct hrtimer_sleeper *timeout, - int detect_deadlock __IP_DECL__) + int detect_deadlock) { struct rt_mutex_waiter waiter; int ret = 0; @@ -615,7 +614,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, spin_lock(&lock->wait_lock); /* Try to acquire the lock again: */ - if (try_to_take_rt_mutex(lock __IP__)) { + if (try_to_take_rt_mutex(lock)) { spin_unlock(&lock->wait_lock); return 0; } @@ -629,7 +628,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, for (;;) { /* Try to acquire the lock: */ - if (try_to_take_rt_mutex(lock __IP__)) + if (try_to_take_rt_mutex(lock)) break; /* @@ -653,7 +652,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, */ if (!waiter.task) { ret = task_blocks_on_rt_mutex(lock, &waiter, - detect_deadlock __IP__); + detect_deadlock); /* * If we got woken up by the owner then start loop * all over without going into schedule to try @@ -680,7 +679,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, set_current_state(TASK_RUNNING); if (unlikely(waiter.task)) - remove_waiter(lock, &waiter __IP__); + remove_waiter(lock, &waiter); /* * try_to_take_rt_mutex() sets the waiter bit @@ -711,7 +710,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, * Slow path try-lock function: */ static inline int -rt_mutex_slowtrylock(struct rt_mutex *lock __IP_DECL__) +rt_mutex_slowtrylock(struct rt_mutex *lock) { int ret = 0; @@ -719,7 +718,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lock __IP_DECL__) if (likely(rt_mutex_owner(lock) != current)) { - ret = try_to_take_rt_mutex(lock __IP__); + ret = try_to_take_rt_mutex(lock); /* * try_to_take_rt_mutex() sets the lock waiters * bit unconditionally. Clean this up. @@ -769,13 +768,13 @@ rt_mutex_fastlock(struct rt_mutex *lock, int state, int detect_deadlock, int (*slowfn)(struct rt_mutex *lock, int state, struct hrtimer_sleeper *timeout, - int detect_deadlock __IP_DECL__)) + int detect_deadlock)) { if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) { rt_mutex_deadlock_account_lock(lock, current); return 0; } else - return slowfn(lock, state, NULL, detect_deadlock __RET_IP__); + return slowfn(lock, state, NULL, detect_deadlock); } static inline int @@ -783,24 +782,24 @@ rt_mutex_timed_fastlock(struct rt_mutex *lock, int state, struct hrtimer_sleeper *timeout, int detect_deadlock, int (*slowfn)(struct rt_mutex *lock, int state, struct hrtimer_sleeper *timeout, - int detect_deadlock __IP_DECL__)) + int detect_deadlock)) { if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) { rt_mutex_deadlock_account_lock(lock, current); return 0; } else - return slowfn(lock, state, timeout, detect_deadlock __RET_IP__); + return slowfn(lock, state, timeout, detect_deadlock); } static inline int rt_mutex_fasttrylock(struct rt_mutex *lock, - int (*slowfn)(struct rt_mutex *lock __IP_DECL__)) + int (*slowfn)(struct rt_mutex *lock)) { if (likely(rt_mutex_cmpxchg(lock, NULL, current))) { rt_mutex_deadlock_account_lock(lock, current); return 1; } - return slowfn(lock __RET_IP__); + return slowfn(lock); } static inline void @@ -948,7 +947,7 @@ void rt_mutex_init_proxy_locked(struct rt_mutex *lock, struct task_struct *proxy_owner) { __rt_mutex_init(lock, NULL); - debug_rt_mutex_proxy_lock(lock, proxy_owner __RET_IP__); + debug_rt_mutex_proxy_lock(lock, proxy_owner); rt_mutex_set_owner(lock, proxy_owner, 0); rt_mutex_deadlock_account_lock(lock, proxy_owner); } diff --git a/kernel/rtmutex.h b/kernel/rtmutex.h index 1e0fca13ff72..a1a1dd06421d 100644 --- a/kernel/rtmutex.h +++ b/kernel/rtmutex.h @@ -10,9 +10,6 @@ * Non-debug version. */ -#define __IP_DECL__ -#define __IP__ -#define __RET_IP__ #define rt_mutex_deadlock_check(l) (0) #define rt_mutex_deadlock_account_lock(m, t) do { } while (0) #define rt_mutex_deadlock_account_unlock(l) do { } while (0) diff --git a/kernel/rwsem.c b/kernel/rwsem.c new file mode 100644 index 000000000000..291ded556aa0 --- /dev/null +++ b/kernel/rwsem.c @@ -0,0 +1,147 @@ +/* kernel/rwsem.c: R/W semaphores, public implementation + * + * Written by David Howells (dhowells@redhat.com). + * Derived from asm-i386/semaphore.h + */ + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/rwsem.h> + +#include <asm/system.h> +#include <asm/atomic.h> + +/* + * lock for reading + */ +void down_read(struct rw_semaphore *sem) +{ + might_sleep(); + rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); + + __down_read(sem); +} + +EXPORT_SYMBOL(down_read); + +/* + * trylock for reading -- returns 1 if successful, 0 if contention + */ +int down_read_trylock(struct rw_semaphore *sem) +{ + int ret = __down_read_trylock(sem); + + if (ret == 1) + rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_); + return ret; +} + +EXPORT_SYMBOL(down_read_trylock); + +/* + * lock for writing + */ +void down_write(struct rw_semaphore *sem) +{ + might_sleep(); + rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); + + __down_write(sem); +} + +EXPORT_SYMBOL(down_write); + +/* + * trylock for writing -- returns 1 if successful, 0 if contention + */ +int down_write_trylock(struct rw_semaphore *sem) +{ + int ret = __down_write_trylock(sem); + + if (ret == 1) + rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); + return ret; +} + +EXPORT_SYMBOL(down_write_trylock); + +/* + * release a read lock + */ +void up_read(struct rw_semaphore *sem) +{ + rwsem_release(&sem->dep_map, 1, _RET_IP_); + + __up_read(sem); +} + +EXPORT_SYMBOL(up_read); + +/* + * release a write lock + */ +void up_write(struct rw_semaphore *sem) +{ + rwsem_release(&sem->dep_map, 1, _RET_IP_); + + __up_write(sem); +} + +EXPORT_SYMBOL(up_write); + +/* + * downgrade write lock to read lock + */ +void downgrade_write(struct rw_semaphore *sem) +{ + /* + * lockdep: a downgraded write will live on as a write + * dependency. + */ + __downgrade_write(sem); +} + +EXPORT_SYMBOL(downgrade_write); + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + +void down_read_nested(struct rw_semaphore *sem, int subclass) +{ + might_sleep(); + rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); + + __down_read(sem); +} + +EXPORT_SYMBOL(down_read_nested); + +void down_read_non_owner(struct rw_semaphore *sem) +{ + might_sleep(); + + __down_read(sem); +} + +EXPORT_SYMBOL(down_read_non_owner); + +void down_write_nested(struct rw_semaphore *sem, int subclass) +{ + might_sleep(); + rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); + + __down_write_nested(sem, subclass); +} + +EXPORT_SYMBOL(down_write_nested); + +void up_read_non_owner(struct rw_semaphore *sem) +{ + __up_read(sem); +} + +EXPORT_SYMBOL(up_read_non_owner); + +#endif + + diff --git a/kernel/sched.c b/kernel/sched.c index d5e37072ea54..4ee400f9d56b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -30,6 +30,7 @@ #include <linux/capability.h> #include <linux/completion.h> #include <linux/kernel_stat.h> +#include <linux/debug_locks.h> #include <linux/security.h> #include <linux/notifier.h> #include <linux/profile.h> @@ -178,20 +179,15 @@ static unsigned int static_prio_timeslice(int static_prio) return SCALE_PRIO(DEF_TIMESLICE, static_prio); } -static inline unsigned int task_timeslice(task_t *p) +static inline unsigned int task_timeslice(struct task_struct *p) { return static_prio_timeslice(p->static_prio); } -#define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran) \ - < (long long) (sd)->cache_hot_time) - /* * These are the runqueue data structures: */ -typedef struct runqueue runqueue_t; - struct prio_array { unsigned int nr_active; DECLARE_BITMAP(bitmap, MAX_PRIO+1); /* include 1 bit for delimiter */ @@ -205,7 +201,7 @@ struct prio_array { * (such as the load balancing or the thread migration code), lock * acquire operations must be ordered by ascending &runqueue. */ -struct runqueue { +struct rq { spinlock_t lock; /* @@ -229,9 +225,9 @@ struct runqueue { unsigned long expired_timestamp; unsigned long long timestamp_last_tick; - task_t *curr, *idle; + struct task_struct *curr, *idle; struct mm_struct *prev_mm; - prio_array_t *active, *expired, arrays[2]; + struct prio_array *active, *expired, arrays[2]; int best_expired_prio; atomic_t nr_iowait; @@ -242,7 +238,7 @@ struct runqueue { int active_balance; int push_cpu; - task_t *migration_thread; + struct task_struct *migration_thread; struct list_head migration_queue; #endif @@ -265,9 +261,10 @@ struct runqueue { unsigned long ttwu_cnt; unsigned long ttwu_local; #endif + struct lock_class_key rq_lock_key; }; -static DEFINE_PER_CPU(struct runqueue, runqueues); +static DEFINE_PER_CPU(struct rq, runqueues); /* * The domain tree (rq->sd) is protected by RCU's quiescent state transition. @@ -276,8 +273,8 @@ static DEFINE_PER_CPU(struct runqueue, runqueues); * The domain tree of any CPU may only be accessed from within * preempt-disabled sections. */ -#define for_each_domain(cpu, domain) \ -for (domain = rcu_dereference(cpu_rq(cpu)->sd); domain; domain = domain->parent) +#define for_each_domain(cpu, __sd) \ + for (__sd = rcu_dereference(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent) #define cpu_rq(cpu) (&per_cpu(runqueues, (cpu))) #define this_rq() (&__get_cpu_var(runqueues)) @@ -292,26 +289,33 @@ for (domain = rcu_dereference(cpu_rq(cpu)->sd); domain; domain = domain->parent) #endif #ifndef __ARCH_WANT_UNLOCKED_CTXSW -static inline int task_running(runqueue_t *rq, task_t *p) +static inline int task_running(struct rq *rq, struct task_struct *p) { return rq->curr == p; } -static inline void prepare_lock_switch(runqueue_t *rq, task_t *next) +static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) { } -static inline void finish_lock_switch(runqueue_t *rq, task_t *prev) +static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) { #ifdef CONFIG_DEBUG_SPINLOCK /* this is a valid case when another task releases the spinlock */ rq->lock.owner = current; #endif + /* + * If we are tracking spinlock dependencies then we have to + * fix up the runqueue lock - which gets 'carried over' from + * prev into current: + */ + spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_); + spin_unlock_irq(&rq->lock); } #else /* __ARCH_WANT_UNLOCKED_CTXSW */ -static inline int task_running(runqueue_t *rq, task_t *p) +static inline int task_running(struct rq *rq, struct task_struct *p) { #ifdef CONFIG_SMP return p->oncpu; @@ -320,7 +324,7 @@ static inline int task_running(runqueue_t *rq, task_t *p) #endif } -static inline void prepare_lock_switch(runqueue_t *rq, task_t *next) +static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) { #ifdef CONFIG_SMP /* @@ -337,7 +341,7 @@ static inline void prepare_lock_switch(runqueue_t *rq, task_t *next) #endif } -static inline void finish_lock_switch(runqueue_t *rq, task_t *prev) +static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) { #ifdef CONFIG_SMP /* @@ -358,10 +362,10 @@ static inline void finish_lock_switch(runqueue_t *rq, task_t *prev) * __task_rq_lock - lock the runqueue a given task resides on. * Must be called interrupts disabled. */ -static inline runqueue_t *__task_rq_lock(task_t *p) +static inline struct rq *__task_rq_lock(struct task_struct *p) __acquires(rq->lock) { - struct runqueue *rq; + struct rq *rq; repeat_lock_task: rq = task_rq(p); @@ -378,10 +382,10 @@ repeat_lock_task: * interrupts. Note the ordering: we can safely lookup the task_rq without * explicitly disabling preemption. */ -static runqueue_t *task_rq_lock(task_t *p, unsigned long *flags) +static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) __acquires(rq->lock) { - struct runqueue *rq; + struct rq *rq; repeat_lock_task: local_irq_save(*flags); @@ -394,13 +398,13 @@ repeat_lock_task: return rq; } -static inline void __task_rq_unlock(runqueue_t *rq) +static inline void __task_rq_unlock(struct rq *rq) __releases(rq->lock) { spin_unlock(&rq->lock); } -static inline void task_rq_unlock(runqueue_t *rq, unsigned long *flags) +static inline void task_rq_unlock(struct rq *rq, unsigned long *flags) __releases(rq->lock) { spin_unlock_irqrestore(&rq->lock, *flags); @@ -420,7 +424,7 @@ static int show_schedstat(struct seq_file *seq, void *v) seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION); seq_printf(seq, "timestamp %lu\n", jiffies); for_each_online_cpu(cpu) { - runqueue_t *rq = cpu_rq(cpu); + struct rq *rq = cpu_rq(cpu); #ifdef CONFIG_SMP struct sched_domain *sd; int dcnt = 0; @@ -507,10 +511,10 @@ struct file_operations proc_schedstat_operations = { /* * rq_lock - lock a given runqueue and disable interrupts. */ -static inline runqueue_t *this_rq_lock(void) +static inline struct rq *this_rq_lock(void) __acquires(rq->lock) { - runqueue_t *rq; + struct rq *rq; local_irq_disable(); rq = this_rq(); @@ -535,7 +539,7 @@ static inline runqueue_t *this_rq_lock(void) * long it was from the *first* time it was queued to the time that it * finally hit a cpu. */ -static inline void sched_info_dequeued(task_t *t) +static inline void sched_info_dequeued(struct task_struct *t) { t->sched_info.last_queued = 0; } @@ -545,10 +549,10 @@ static inline void sched_info_dequeued(task_t *t) * long it was waiting to run. We also note when it began so that we * can keep stats on how long its timeslice is. */ -static void sched_info_arrive(task_t *t) +static void sched_info_arrive(struct task_struct *t) { unsigned long now = jiffies, diff = 0; - struct runqueue *rq = task_rq(t); + struct rq *rq = task_rq(t); if (t->sched_info.last_queued) diff = now - t->sched_info.last_queued; @@ -579,7 +583,7 @@ static void sched_info_arrive(task_t *t) * the timestamp if it is already not set. It's assumed that * sched_info_dequeued() will clear that stamp when appropriate. */ -static inline void sched_info_queued(task_t *t) +static inline void sched_info_queued(struct task_struct *t) { if (!t->sched_info.last_queued) t->sched_info.last_queued = jiffies; @@ -589,9 +593,9 @@ static inline void sched_info_queued(task_t *t) * Called when a process ceases being the active-running process, either * voluntarily or involuntarily. Now we can calculate how long we ran. */ -static inline void sched_info_depart(task_t *t) +static inline void sched_info_depart(struct task_struct *t) { - struct runqueue *rq = task_rq(t); + struct rq *rq = task_rq(t); unsigned long diff = jiffies - t->sched_info.last_arrival; t->sched_info.cpu_time += diff; @@ -605,9 +609,10 @@ static inline void sched_info_depart(task_t *t) * their time slice. (This may also be called when switching to or from * the idle task.) We are only called when prev != next. */ -static inline void sched_info_switch(task_t *prev, task_t *next) +static inline void +sched_info_switch(struct task_struct *prev, struct task_struct *next) { - struct runqueue *rq = task_rq(prev); + struct rq *rq = task_rq(prev); /* * prev now departs the cpu. It's not interesting to record @@ -628,7 +633,7 @@ static inline void sched_info_switch(task_t *prev, task_t *next) /* * Adding/removing a task to/from a priority array: */ -static void dequeue_task(struct task_struct *p, prio_array_t *array) +static void dequeue_task(struct task_struct *p, struct prio_array *array) { array->nr_active--; list_del(&p->run_list); @@ -636,7 +641,7 @@ static void dequeue_task(struct task_struct *p, prio_array_t *array) __clear_bit(p->prio, array->bitmap); } -static void enqueue_task(struct task_struct *p, prio_array_t *array) +static void enqueue_task(struct task_struct *p, struct prio_array *array) { sched_info_queued(p); list_add_tail(&p->run_list, array->queue + p->prio); @@ -649,12 +654,13 @@ static void enqueue_task(struct task_struct *p, prio_array_t *array) * Put task to the end of the run list without the overhead of dequeue * followed by enqueue. */ -static void requeue_task(struct task_struct *p, prio_array_t *array) +static void requeue_task(struct task_struct *p, struct prio_array *array) { list_move_tail(&p->run_list, array->queue + p->prio); } -static inline void enqueue_task_head(struct task_struct *p, prio_array_t *array) +static inline void +enqueue_task_head(struct task_struct *p, struct prio_array *array) { list_add(&p->run_list, array->queue + p->prio); __set_bit(p->prio, array->bitmap); @@ -677,7 +683,7 @@ static inline void enqueue_task_head(struct task_struct *p, prio_array_t *array) * Both properties are important to certain workloads. */ -static inline int __normal_prio(task_t *p) +static inline int __normal_prio(struct task_struct *p) { int bonus, prio; @@ -713,7 +719,7 @@ static inline int __normal_prio(task_t *p) #define RTPRIO_TO_LOAD_WEIGHT(rp) \ (PRIO_TO_LOAD_WEIGHT(MAX_RT_PRIO) + LOAD_WEIGHT(rp)) -static void set_load_weight(task_t *p) +static void set_load_weight(struct task_struct *p) { if (has_rt_policy(p)) { #ifdef CONFIG_SMP @@ -731,23 +737,25 @@ static void set_load_weight(task_t *p) p->load_weight = PRIO_TO_LOAD_WEIGHT(p->static_prio); } -static inline void inc_raw_weighted_load(runqueue_t *rq, const task_t *p) +static inline void +inc_raw_weighted_load(struct rq *rq, const struct task_struct *p) { rq->raw_weighted_load += p->load_weight; } -static inline void dec_raw_weighted_load(runqueue_t *rq, const task_t *p) +static inline void +dec_raw_weighted_load(struct rq *rq, const struct task_struct *p) { rq->raw_weighted_load -= p->load_weight; } -static inline void inc_nr_running(task_t *p, runqueue_t *rq) +static inline void inc_nr_running(struct task_struct *p, struct rq *rq) { rq->nr_running++; inc_raw_weighted_load(rq, p); } -static inline void dec_nr_running(task_t *p, runqueue_t *rq) +static inline void dec_nr_running(struct task_struct *p, struct rq *rq) { rq->nr_running--; dec_raw_weighted_load(rq, p); @@ -760,7 +768,7 @@ static inline void dec_nr_running(task_t *p, runqueue_t *rq) * setprio syscalls, and whenever the interactivity * estimator recalculates. */ -static inline int normal_prio(task_t *p) +static inline int normal_prio(struct task_struct *p) { int prio; @@ -778,7 +786,7 @@ static inline int normal_prio(task_t *p) * interactivity modifiers. Will be RT if the task got * RT-boosted. If not then it returns p->normal_prio. */ -static int effective_prio(task_t *p) +static int effective_prio(struct task_struct *p) { p->normal_prio = normal_prio(p); /* @@ -794,9 +802,9 @@ static int effective_prio(task_t *p) /* * __activate_task - move a task to the runqueue. */ -static void __activate_task(task_t *p, runqueue_t *rq) +static void __activate_task(struct task_struct *p, struct rq *rq) { - prio_array_t *target = rq->active; + struct prio_array *target = rq->active; if (batch_task(p)) target = rq->expired; @@ -807,7 +815,7 @@ static void __activate_task(task_t *p, runqueue_t *rq) /* * __activate_idle_task - move idle task to the _front_ of runqueue. */ -static inline void __activate_idle_task(task_t *p, runqueue_t *rq) +static inline void __activate_idle_task(struct task_struct *p, struct rq *rq) { enqueue_task_head(p, rq->active); inc_nr_running(p, rq); @@ -817,7 +825,7 @@ static inline void __activate_idle_task(task_t *p, runqueue_t *rq) * Recalculate p->normal_prio and p->prio after having slept, * updating the sleep-average too: */ -static int recalc_task_prio(task_t *p, unsigned long long now) +static int recalc_task_prio(struct task_struct *p, unsigned long long now) { /* Caller must always ensure 'now >= p->timestamp' */ unsigned long sleep_time = now - p->timestamp; @@ -889,7 +897,7 @@ static int recalc_task_prio(task_t *p, unsigned long long now) * Update all the scheduling statistics stuff. (sleep average * calculation, priority modifiers, etc.) */ -static void activate_task(task_t *p, runqueue_t *rq, int local) +static void activate_task(struct task_struct *p, struct rq *rq, int local) { unsigned long long now; @@ -897,7 +905,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local) #ifdef CONFIG_SMP if (!local) { /* Compensate for drifting sched_clock */ - runqueue_t *this_rq = this_rq(); + struct rq *this_rq = this_rq(); now = (now - this_rq->timestamp_last_tick) + rq->timestamp_last_tick; } @@ -936,7 +944,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local) /* * deactivate_task - remove a task from the runqueue. */ -static void deactivate_task(struct task_struct *p, runqueue_t *rq) +static void deactivate_task(struct task_struct *p, struct rq *rq) { dec_nr_running(p, rq); dequeue_task(p, p->array); @@ -956,7 +964,7 @@ static void deactivate_task(struct task_struct *p, runqueue_t *rq) #define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) #endif -static void resched_task(task_t *p) +static void resched_task(struct task_struct *p) { int cpu; @@ -977,7 +985,7 @@ static void resched_task(task_t *p) smp_send_reschedule(cpu); } #else -static inline void resched_task(task_t *p) +static inline void resched_task(struct task_struct *p) { assert_spin_locked(&task_rq(p)->lock); set_tsk_need_resched(p); @@ -988,7 +996,7 @@ static inline void resched_task(task_t *p) * task_curr - is this task currently executing on a CPU? * @p: the task in question. */ -inline int task_curr(const task_t *p) +inline int task_curr(const struct task_struct *p) { return cpu_curr(task_cpu(p)) == p; } @@ -1000,22 +1008,23 @@ unsigned long weighted_cpuload(const int cpu) } #ifdef CONFIG_SMP -typedef struct { +struct migration_req { struct list_head list; - task_t *task; + struct task_struct *task; int dest_cpu; struct completion done; -} migration_req_t; +}; /* * The task's runqueue lock must be held. * Returns true if you have to wait for migration thread. */ -static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req) +static int +migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req) { - runqueue_t *rq = task_rq(p); + struct rq *rq = task_rq(p); /* * If the task is not on a runqueue (and not running), then @@ -1030,6 +1039,7 @@ static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req) req->task = p; req->dest_cpu = dest_cpu; list_add(&req->list, &rq->migration_queue); + return 1; } @@ -1042,10 +1052,10 @@ static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req) * smp_call_function() if an IPI is sent by the same process we are * waiting to become inactive. */ -void wait_task_inactive(task_t *p) +void wait_task_inactive(struct task_struct *p) { unsigned long flags; - runqueue_t *rq; + struct rq *rq; int preempted; repeat: @@ -1076,7 +1086,7 @@ repeat: * to another CPU then no harm is done and the purpose has been * achieved as well. */ -void kick_process(task_t *p) +void kick_process(struct task_struct *p) { int cpu; @@ -1096,7 +1106,7 @@ void kick_process(task_t *p) */ static inline unsigned long source_load(int cpu, int type) { - runqueue_t *rq = cpu_rq(cpu); + struct rq *rq = cpu_rq(cpu); if (type == 0) return rq->raw_weighted_load; @@ -1110,7 +1120,7 @@ static inline unsigned long source_load(int cpu, int type) */ static inline unsigned long target_load(int cpu, int type) { - runqueue_t *rq = cpu_rq(cpu); + struct rq *rq = cpu_rq(cpu); if (type == 0) return rq->raw_weighted_load; @@ -1123,10 +1133,10 @@ static inline unsigned long target_load(int cpu, int type) */ static inline unsigned long cpu_avg_load_per_task(int cpu) { - runqueue_t *rq = cpu_rq(cpu); + struct rq *rq = cpu_rq(cpu); unsigned long n = rq->nr_running; - return n ? rq->raw_weighted_load / n : SCHED_LOAD_SCALE; + return n ? rq->raw_weighted_load / n : SCHED_LOAD_SCALE; } /* @@ -1279,7 +1289,7 @@ nextlevel: * Returns the CPU we should wake onto. */ #if defined(ARCH_HAS_SCHED_WAKE_IDLE) -static int wake_idle(int cpu, task_t *p) +static int wake_idle(int cpu, struct task_struct *p) { cpumask_t tmp; struct sched_domain *sd; @@ -1302,7 +1312,7 @@ static int wake_idle(int cpu, task_t *p) return cpu; } #else -static inline int wake_idle(int cpu, task_t *p) +static inline int wake_idle(int cpu, struct task_struct *p) { return cpu; } @@ -1322,15 +1332,15 @@ static inline int wake_idle(int cpu, task_t *p) * * returns failure only if the task is already active. */ -static int try_to_wake_up(task_t *p, unsigned int state, int sync) +static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) { int cpu, this_cpu, success = 0; unsigned long flags; long old_state; - runqueue_t *rq; + struct rq *rq; #ifdef CONFIG_SMP - unsigned long load, this_load; struct sched_domain *sd, *this_sd = NULL; + unsigned long load, this_load; int new_cpu; #endif @@ -1480,15 +1490,14 @@ out: return success; } -int fastcall wake_up_process(task_t *p) +int fastcall wake_up_process(struct task_struct *p) { return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED | TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0); } - EXPORT_SYMBOL(wake_up_process); -int fastcall wake_up_state(task_t *p, unsigned int state) +int fastcall wake_up_state(struct task_struct *p, unsigned int state) { return try_to_wake_up(p, state, 0); } @@ -1497,7 +1506,7 @@ int fastcall wake_up_state(task_t *p, unsigned int state) * Perform scheduler related setup for a newly forked process p. * p is forked by current. */ -void fastcall sched_fork(task_t *p, int clone_flags) +void fastcall sched_fork(struct task_struct *p, int clone_flags) { int cpu = get_cpu(); @@ -1565,11 +1574,11 @@ void fastcall sched_fork(task_t *p, int clone_flags) * that must be done for every newly created context, then puts the task * on the runqueue and wakes it. */ -void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags) +void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags) { + struct rq *rq, *this_rq; unsigned long flags; int this_cpu, cpu; - runqueue_t *rq, *this_rq; rq = task_rq_lock(p, &flags); BUG_ON(p->state != TASK_RUNNING); @@ -1649,10 +1658,10 @@ void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags) * artificially, because any timeslice recovered here * was given away by the parent in the first place.) */ -void fastcall sched_exit(task_t *p) +void fastcall sched_exit(struct task_struct *p) { unsigned long flags; - runqueue_t *rq; + struct rq *rq; /* * If the child was a (relative-) CPU hog then decrease @@ -1683,7 +1692,7 @@ void fastcall sched_exit(task_t *p) * prepare_task_switch sets up locking and calls architecture specific * hooks. */ -static inline void prepare_task_switch(runqueue_t *rq, task_t *next) +static inline void prepare_task_switch(struct rq *rq, struct task_struct *next) { prepare_lock_switch(rq, next); prepare_arch_switch(next); @@ -1704,7 +1713,7 @@ static inline void prepare_task_switch(runqueue_t *rq, task_t *next) * with the lock held can cause deadlocks; see schedule() for * details.) */ -static inline void finish_task_switch(runqueue_t *rq, task_t *prev) +static inline void finish_task_switch(struct rq *rq, struct task_struct *prev) __releases(rq->lock) { struct mm_struct *mm = rq->prev_mm; @@ -1742,10 +1751,11 @@ static inline void finish_task_switch(runqueue_t *rq, task_t *prev) * schedule_tail - first thing a freshly forked thread must call. * @prev: the thread we just switched away from. */ -asmlinkage void schedule_tail(task_t *prev) +asmlinkage void schedule_tail(struct task_struct *prev) __releases(rq->lock) { - runqueue_t *rq = this_rq(); + struct rq *rq = this_rq(); + finish_task_switch(rq, prev); #ifdef __ARCH_WANT_UNLOCKED_CTXSW /* In this case, finish_task_switch does not reenable preemption */ @@ -1759,8 +1769,9 @@ asmlinkage void schedule_tail(task_t *prev) * context_switch - switch to the new MM and the new * thread's register state. */ -static inline -task_t * context_switch(runqueue_t *rq, task_t *prev, task_t *next) +static inline struct task_struct * +context_switch(struct rq *rq, struct task_struct *prev, + struct task_struct *next) { struct mm_struct *mm = next->mm; struct mm_struct *oldmm = prev->active_mm; @@ -1777,6 +1788,7 @@ task_t * context_switch(runqueue_t *rq, task_t *prev, task_t *next) WARN_ON(rq->prev_mm); rq->prev_mm = oldmm; } + spin_release(&rq->lock.dep_map, 1, _THIS_IP_); /* Here we just switch the register state and the stack. */ switch_to(prev, next, prev); @@ -1857,12 +1869,21 @@ unsigned long nr_active(void) #ifdef CONFIG_SMP /* + * Is this task likely cache-hot: + */ +static inline int +task_hot(struct task_struct *p, unsigned long long now, struct sched_domain *sd) +{ + return (long long)(now - p->last_ran) < (long long)sd->cache_hot_time; +} + +/* * double_rq_lock - safely lock two runqueues * * Note this does not disable interrupts like task_rq_lock, * you need to do so manually before calling. */ -static void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2) +static void double_rq_lock(struct rq *rq1, struct rq *rq2) __acquires(rq1->lock) __acquires(rq2->lock) { @@ -1886,7 +1907,7 @@ static void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2) * Note this does not restore interrupts like task_rq_unlock, * you need to do so manually after calling. */ -static void double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2) +static void double_rq_unlock(struct rq *rq1, struct rq *rq2) __releases(rq1->lock) __releases(rq2->lock) { @@ -1900,7 +1921,7 @@ static void double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2) /* * double_lock_balance - lock the busiest runqueue, this_rq is locked already. */ -static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest) +static void double_lock_balance(struct rq *this_rq, struct rq *busiest) __releases(this_rq->lock) __acquires(busiest->lock) __acquires(this_rq->lock) @@ -1921,11 +1942,11 @@ static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest) * allow dest_cpu, which will force the cpu onto dest_cpu. Then * the cpu_allowed mask is restored. */ -static void sched_migrate_task(task_t *p, int dest_cpu) +static void sched_migrate_task(struct task_struct *p, int dest_cpu) { - migration_req_t req; - runqueue_t *rq; + struct migration_req req; unsigned long flags; + struct rq *rq; rq = task_rq_lock(p, &flags); if (!cpu_isset(dest_cpu, p->cpus_allowed) @@ -1936,11 +1957,13 @@ static void sched_migrate_task(task_t *p, int dest_cpu) if (migrate_task(p, dest_cpu, &req)) { /* Need to wait for migration thread (might exit: take ref). */ struct task_struct *mt = rq->migration_thread; + get_task_struct(mt); task_rq_unlock(rq, &flags); wake_up_process(mt); put_task_struct(mt); wait_for_completion(&req.done); + return; } out: @@ -1964,9 +1987,9 @@ void sched_exec(void) * pull_task - move a task from a remote runqueue to the local runqueue. * Both runqueues must be locked. */ -static -void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, - runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) +static void pull_task(struct rq *src_rq, struct prio_array *src_array, + struct task_struct *p, struct rq *this_rq, + struct prio_array *this_array, int this_cpu) { dequeue_task(p, src_array); dec_nr_running(p, src_rq); @@ -1987,7 +2010,7 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, * can_migrate_task - may task p from runqueue rq be migrated to this_cpu? */ static -int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu, +int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, struct sched_domain *sd, enum idle_type idle, int *all_pinned) { @@ -2019,6 +2042,7 @@ int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu, } #define rq_best_prio(rq) min((rq)->curr->prio, (rq)->best_expired_prio) + /* * move_tasks tries to move up to max_nr_move tasks and max_load_move weighted * load from busiest to this_rq, as part of a balancing operation within @@ -2026,18 +2050,17 @@ int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu, * * Called with both runqueues locked. */ -static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest, +static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum idle_type idle, int *all_pinned) { - prio_array_t *array, *dst_array; + int idx, pulled = 0, pinned = 0, this_best_prio, best_prio, + best_prio_seen, skip_for_load; + struct prio_array *array, *dst_array; struct list_head *head, *curr; - int idx, pulled = 0, pinned = 0, this_best_prio, busiest_best_prio; - int busiest_best_prio_seen; - int skip_for_load; /* skip the task based on weighted load issues */ + struct task_struct *tmp; long rem_load_move; - task_t *tmp; if (max_nr_move == 0 || max_load_move == 0) goto out; @@ -2045,15 +2068,15 @@ static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest, rem_load_move = max_load_move; pinned = 1; this_best_prio = rq_best_prio(this_rq); - busiest_best_prio = rq_best_prio(busiest); + best_prio = rq_best_prio(busiest); /* * Enable handling of the case where there is more than one task * with the best priority. If the current running task is one - * of those with prio==busiest_best_prio we know it won't be moved + * of those with prio==best_prio we know it won't be moved * and therefore it's safe to override the skip (based on load) of * any task we find with that prio. */ - busiest_best_prio_seen = busiest_best_prio == busiest->curr->prio; + best_prio_seen = best_prio == busiest->curr->prio; /* * We first consider expired tasks. Those will likely not be @@ -2089,7 +2112,7 @@ skip_bitmap: head = array->queue + idx; curr = head->prev; skip_queue: - tmp = list_entry(curr, task_t, run_list); + tmp = list_entry(curr, struct task_struct, run_list); curr = curr->prev; @@ -2100,10 +2123,11 @@ skip_queue: */ skip_for_load = tmp->load_weight > rem_load_move; if (skip_for_load && idx < this_best_prio) - skip_for_load = !busiest_best_prio_seen && idx == busiest_best_prio; + skip_for_load = !best_prio_seen && idx == best_prio; if (skip_for_load || !can_migrate_task(tmp, busiest, this_cpu, sd, idle, &pinned)) { - busiest_best_prio_seen |= idx == busiest_best_prio; + + best_prio_seen |= idx == best_prio; if (curr != head) goto skip_queue; idx++; @@ -2146,8 +2170,8 @@ out: /* * find_busiest_group finds and returns the busiest CPU group within the - * domain. It calculates and returns the amount of weighted load which should be - * moved to restore balance via the imbalance parameter. + * domain. It calculates and returns the amount of weighted load which + * should be moved to restore balance via the imbalance parameter. */ static struct sched_group * find_busiest_group(struct sched_domain *sd, int this_cpu, @@ -2188,7 +2212,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, sum_weighted_load = sum_nr_running = avg_load = 0; for_each_cpu_mask(i, group->cpumask) { - runqueue_t *rq = cpu_rq(i); + struct rq *rq = cpu_rq(i); if (*sd_idle && !idle_cpu(i)) *sd_idle = 0; @@ -2269,7 +2293,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, * capacity but still has some space to pick up some load * from other group and save more power */ - if (sum_nr_running <= group_capacity - 1) + if (sum_nr_running <= group_capacity - 1) { if (sum_nr_running > leader_nr_running || (sum_nr_running == leader_nr_running && first_cpu(group->cpumask) > @@ -2277,7 +2301,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, group_leader = group; leader_nr_running = sum_nr_running; } - + } group_next: #endif group = group->next; @@ -2332,8 +2356,7 @@ group_next: * moved */ if (*imbalance < busiest_load_per_task) { - unsigned long pwr_now, pwr_move; - unsigned long tmp; + unsigned long tmp, pwr_now, pwr_move; unsigned int imbn; small_imbalance: @@ -2405,22 +2428,23 @@ ret: /* * find_busiest_queue - find the busiest runqueue among the cpus in group. */ -static runqueue_t *find_busiest_queue(struct sched_group *group, - enum idle_type idle, unsigned long imbalance) +static struct rq * +find_busiest_queue(struct sched_group *group, enum idle_type idle, + unsigned long imbalance) { + struct rq *busiest = NULL, *rq; unsigned long max_load = 0; - runqueue_t *busiest = NULL, *rqi; int i; for_each_cpu_mask(i, group->cpumask) { - rqi = cpu_rq(i); + rq = cpu_rq(i); - if (rqi->nr_running == 1 && rqi->raw_weighted_load > imbalance) + if (rq->nr_running == 1 && rq->raw_weighted_load > imbalance) continue; - if (rqi->raw_weighted_load > max_load) { - max_load = rqi->raw_weighted_load; - busiest = rqi; + if (rq->raw_weighted_load > max_load) { + max_load = rq->raw_weighted_load; + busiest = rq; } } @@ -2433,22 +2457,24 @@ static runqueue_t *find_busiest_queue(struct sched_group *group, */ #define MAX_PINNED_INTERVAL 512 -#define minus_1_or_zero(n) ((n) > 0 ? (n) - 1 : 0) +static inline unsigned long minus_1_or_zero(unsigned long n) +{ + return n > 0 ? n - 1 : 0; +} + /* * Check this_cpu to ensure it is balanced within domain. Attempt to move * tasks if there is an imbalance. * * Called with this_rq unlocked. */ -static int load_balance(int this_cpu, runqueue_t *this_rq, +static int load_balance(int this_cpu, struct rq *this_rq, struct sched_domain *sd, enum idle_type idle) { + int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; struct sched_group *group; - runqueue_t *busiest; unsigned long imbalance; - int nr_moved, all_pinned = 0; - int active_balance = 0; - int sd_idle = 0; + struct rq *busiest; if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings) @@ -2482,8 +2508,8 @@ static int load_balance(int this_cpu, runqueue_t *this_rq, */ double_rq_lock(this_rq, busiest); nr_moved = move_tasks(this_rq, this_cpu, busiest, - minus_1_or_zero(busiest->nr_running), - imbalance, sd, idle, &all_pinned); + minus_1_or_zero(busiest->nr_running), + imbalance, sd, idle, &all_pinned); double_rq_unlock(this_rq, busiest); /* All tasks on this runqueue were pinned by CPU affinity */ @@ -2556,7 +2582,8 @@ out_one_pinned: (sd->balance_interval < sd->max_interval)) sd->balance_interval *= 2; - if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings) + if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && + !sched_smt_power_savings) return -1; return 0; } @@ -2568,11 +2595,11 @@ out_one_pinned: * Called from schedule when this_rq is about to become idle (NEWLY_IDLE). * this_rq is locked. */ -static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, - struct sched_domain *sd) +static int +load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) { struct sched_group *group; - runqueue_t *busiest = NULL; + struct rq *busiest = NULL; unsigned long imbalance; int nr_moved = 0; int sd_idle = 0; @@ -2618,9 +2645,11 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, out_balanced: schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); - if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings) + if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && + !sched_smt_power_savings) return -1; sd->nr_balance_failed = 0; + return 0; } @@ -2628,16 +2657,15 @@ out_balanced: * idle_balance is called by schedule() if this_cpu is about to become * idle. Attempts to pull tasks from other CPUs. */ -static void idle_balance(int this_cpu, runqueue_t *this_rq) +static void idle_balance(int this_cpu, struct rq *this_rq) { struct sched_domain *sd; for_each_domain(this_cpu, sd) { if (sd->flags & SD_BALANCE_NEWIDLE) { - if (load_balance_newidle(this_cpu, this_rq, sd)) { - /* We've pulled tasks over so stop searching */ + /* If we've pulled tasks over stop searching: */ + if (load_balance_newidle(this_cpu, this_rq, sd)) break; - } } } } @@ -2650,14 +2678,14 @@ static void idle_balance(int this_cpu, runqueue_t *this_rq) * * Called with busiest_rq locked. */ -static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu) +static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) { - struct sched_domain *sd; - runqueue_t *target_rq; int target_cpu = busiest_rq->push_cpu; + struct sched_domain *sd; + struct rq *target_rq; + /* Is there any task to move? */ if (busiest_rq->nr_running <= 1) - /* no task to move */ return; target_rq = cpu_rq(target_cpu); @@ -2675,21 +2703,20 @@ static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu) /* Search for an sd spanning us and the target CPU. */ for_each_domain(target_cpu, sd) { if ((sd->flags & SD_LOAD_BALANCE) && - cpu_isset(busiest_cpu, sd->span)) + cpu_isset(busiest_cpu, sd->span)) break; } - if (unlikely(sd == NULL)) - goto out; - - schedstat_inc(sd, alb_cnt); + if (likely(sd)) { + schedstat_inc(sd, alb_cnt); - if (move_tasks(target_rq, target_cpu, busiest_rq, 1, - RTPRIO_TO_LOAD_WEIGHT(100), sd, SCHED_IDLE, NULL)) - schedstat_inc(sd, alb_pushed); - else - schedstat_inc(sd, alb_failed); -out: + if (move_tasks(target_rq, target_cpu, busiest_rq, 1, + RTPRIO_TO_LOAD_WEIGHT(100), sd, SCHED_IDLE, + NULL)) + schedstat_inc(sd, alb_pushed); + else + schedstat_inc(sd, alb_failed); + } spin_unlock(&target_rq->lock); } @@ -2702,23 +2729,27 @@ out: * Balancing parameters are set up in arch_init_sched_domains. */ -/* Don't have all balancing operations going off at once */ -#define CPU_OFFSET(cpu) (HZ * cpu / NR_CPUS) +/* Don't have all balancing operations going off at once: */ +static inline unsigned long cpu_offset(int cpu) +{ + return jiffies + cpu * HZ / NR_CPUS; +} -static void rebalance_tick(int this_cpu, runqueue_t *this_rq, - enum idle_type idle) +static void +rebalance_tick(int this_cpu, struct rq *this_rq, enum idle_type idle) { - unsigned long old_load, this_load; - unsigned long j = jiffies + CPU_OFFSET(this_cpu); + unsigned long this_load, interval, j = cpu_offset(this_cpu); struct sched_domain *sd; - int i; + int i, scale; this_load = this_rq->raw_weighted_load; - /* Update our load */ - for (i = 0; i < 3; i++) { - unsigned long new_load = this_load; - int scale = 1 << i; + + /* Update our load: */ + for (i = 0, scale = 1; i < 3; i++, scale <<= 1) { + unsigned long old_load, new_load; + old_load = this_rq->cpu_load[i]; + new_load = this_load; /* * Round up the averaging division if load is increasing. This * prevents us from getting stuck on 9 if the load is 10, for @@ -2730,8 +2761,6 @@ static void rebalance_tick(int this_cpu, runqueue_t *this_rq, } for_each_domain(this_cpu, sd) { - unsigned long interval; - if (!(sd->flags & SD_LOAD_BALANCE)) continue; @@ -2761,17 +2790,18 @@ static void rebalance_tick(int this_cpu, runqueue_t *this_rq, /* * on UP we do not need to balance between CPUs: */ -static inline void rebalance_tick(int cpu, runqueue_t *rq, enum idle_type idle) +static inline void rebalance_tick(int cpu, struct rq *rq, enum idle_type idle) { } -static inline void idle_balance(int cpu, runqueue_t *rq) +static inline void idle_balance(int cpu, struct rq *rq) { } #endif -static inline int wake_priority_sleeper(runqueue_t *rq) +static inline int wake_priority_sleeper(struct rq *rq) { int ret = 0; + #ifdef CONFIG_SCHED_SMT spin_lock(&rq->lock); /* @@ -2795,25 +2825,26 @@ EXPORT_PER_CPU_SYMBOL(kstat); * This is called on clock ticks and on context switches. * Bank in p->sched_time the ns elapsed since the last tick or switch. */ -static inline void update_cpu_clock(task_t *p, runqueue_t *rq, - unsigned long long now) +static inline void +update_cpu_clock(struct task_struct *p, struct rq *rq, unsigned long long now) { - unsigned long long last = max(p->timestamp, rq->timestamp_last_tick); - p->sched_time += now - last; + p->sched_time += now - max(p->timestamp, rq->timestamp_last_tick); } /* * Return current->sched_time plus any more ns on the sched_clock * that have not yet been banked. */ -unsigned long long current_sched_time(const task_t *tsk) +unsigned long long current_sched_time(const struct task_struct *p) { unsigned long long ns; unsigned long flags; + local_irq_save(flags); - ns = max(tsk->timestamp, task_rq(tsk)->timestamp_last_tick); - ns = tsk->sched_time + (sched_clock() - ns); + ns = max(p->timestamp, task_rq(p)->timestamp_last_tick); + ns = p->sched_time + sched_clock() - ns; local_irq_restore(flags); + return ns; } @@ -2827,11 +2858,16 @@ unsigned long long current_sched_time(const task_t *tsk) * increasing number of running tasks. We also ignore the interactivity * if a better static_prio task has expired: */ -#define EXPIRED_STARVING(rq) \ - ((STARVATION_LIMIT && ((rq)->expired_timestamp && \ - (jiffies - (rq)->expired_timestamp >= \ - STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \ - ((rq)->curr->static_prio > (rq)->best_expired_prio)) +static inline int expired_starving(struct rq *rq) +{ + if (rq->curr->static_prio > rq->best_expired_prio) + return 1; + if (!STARVATION_LIMIT || !rq->expired_timestamp) + return 0; + if (jiffies - rq->expired_timestamp > STARVATION_LIMIT * rq->nr_running) + return 1; + return 0; +} /* * Account user cpu time to a process. @@ -2864,7 +2900,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset, cputime_t cputime) { struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; - runqueue_t *rq = this_rq(); + struct rq *rq = this_rq(); cputime64_t tmp; p->stime = cputime_add(p->stime, cputime); @@ -2894,7 +2930,7 @@ void account_steal_time(struct task_struct *p, cputime_t steal) { struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; cputime64_t tmp = cputime_to_cputime64(steal); - runqueue_t *rq = this_rq(); + struct rq *rq = this_rq(); if (p == rq->idle) { p->stime = cputime_add(p->stime, steal); @@ -2915,10 +2951,10 @@ void account_steal_time(struct task_struct *p, cputime_t steal) */ void scheduler_tick(void) { - int cpu = smp_processor_id(); - runqueue_t *rq = this_rq(); - task_t *p = current; unsigned long long now = sched_clock(); + struct task_struct *p = current; + int cpu = smp_processor_id(); + struct rq *rq = cpu_rq(cpu); update_cpu_clock(p, rq, now); @@ -2968,7 +3004,7 @@ void scheduler_tick(void) if (!rq->expired_timestamp) rq->expired_timestamp = jiffies; - if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) { + if (!TASK_INTERACTIVE(p) || expired_starving(rq)) { enqueue_task(p, rq->expired); if (p->static_prio < rq->best_expired_prio) rq->best_expired_prio = p->static_prio; @@ -3007,7 +3043,7 @@ out: } #ifdef CONFIG_SCHED_SMT -static inline void wakeup_busy_runqueue(runqueue_t *rq) +static inline void wakeup_busy_runqueue(struct rq *rq) { /* If an SMT runqueue is sleeping due to priority reasons wake it up */ if (rq->curr == rq->idle && rq->nr_running) @@ -3033,7 +3069,7 @@ static void wake_sleeping_dependent(int this_cpu) return; for_each_cpu_mask(i, sd->span) { - runqueue_t *smt_rq = cpu_rq(i); + struct rq *smt_rq = cpu_rq(i); if (i == this_cpu) continue; @@ -3050,7 +3086,8 @@ static void wake_sleeping_dependent(int this_cpu) * utilize, if another task runs on a sibling. This models the * slowdown effect of other tasks running on siblings: */ -static inline unsigned long smt_slice(task_t *p, struct sched_domain *sd) +static inline unsigned long +smt_slice(struct task_struct *p, struct sched_domain *sd) { return p->time_slice * (100 - sd->per_cpu_gain) / 100; } @@ -3061,7 +3098,8 @@ static inline unsigned long smt_slice(task_t *p, struct sched_domain *sd) * acquire their lock. As we only trylock the normal locking order does not * need to be obeyed. */ -static int dependent_sleeper(int this_cpu, runqueue_t *this_rq, task_t *p) +static int +dependent_sleeper(int this_cpu, struct rq *this_rq, struct task_struct *p) { struct sched_domain *tmp, *sd = NULL; int ret = 0, i; @@ -3081,8 +3119,8 @@ static int dependent_sleeper(int this_cpu, runqueue_t *this_rq, task_t *p) return 0; for_each_cpu_mask(i, sd->span) { - runqueue_t *smt_rq; - task_t *smt_curr; + struct task_struct *smt_curr; + struct rq *smt_rq; if (i == this_cpu) continue; @@ -3127,9 +3165,8 @@ unlock: static inline void wake_sleeping_dependent(int this_cpu) { } - -static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq, - task_t *p) +static inline int +dependent_sleeper(int this_cpu, struct rq *this_rq, struct task_struct *p) { return 0; } @@ -3142,12 +3179,13 @@ void fastcall add_preempt_count(int val) /* * Underflow? */ - BUG_ON((preempt_count() < 0)); + if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0))) + return; preempt_count() += val; /* * Spinlock count overflowing soon? */ - BUG_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK-10); + DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK-10); } EXPORT_SYMBOL(add_preempt_count); @@ -3156,11 +3194,15 @@ void fastcall sub_preempt_count(int val) /* * Underflow? */ - BUG_ON(val > preempt_count()); + if (DEBUG_LOCKS_WARN_ON(val > preempt_count())) + return; /* * Is the spinlock portion underflowing? */ - BUG_ON((val < PREEMPT_MASK) && !(preempt_count() & PREEMPT_MASK)); + if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) && + !(preempt_count() & PREEMPT_MASK))) + return; + preempt_count() -= val; } EXPORT_SYMBOL(sub_preempt_count); @@ -3178,14 +3220,14 @@ static inline int interactive_sleep(enum sleep_type sleep_type) */ asmlinkage void __sched schedule(void) { - long *switch_count; - task_t *prev, *next; - runqueue_t *rq; - prio_array_t *array; + struct task_struct *prev, *next; + struct prio_array *array; struct list_head *queue; unsigned long long now; unsigned long run_time; int cpu, idx, new_prio; + long *switch_count; + struct rq *rq; /* * Test if we are atomic. Since do_exit() needs to call into @@ -3275,7 +3317,7 @@ need_resched_nonpreemptible: idx = sched_find_first_bit(array->bitmap); queue = array->queue + idx; - next = list_entry(queue->next, task_t, run_list); + next = list_entry(queue->next, struct task_struct, run_list); if (!rt_task(next) && interactive_sleep(next->sleep_type)) { unsigned long long delta = now - next->timestamp; @@ -3338,7 +3380,6 @@ switch_tasks: if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) goto need_resched; } - EXPORT_SYMBOL(schedule); #ifdef CONFIG_PREEMPT @@ -3383,7 +3424,6 @@ need_resched: if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) goto need_resched; } - EXPORT_SYMBOL(preempt_schedule); /* @@ -3432,10 +3472,8 @@ need_resched: int default_wake_function(wait_queue_t *curr, unsigned mode, int sync, void *key) { - task_t *p = curr->private; - return try_to_wake_up(p, mode, sync); + return try_to_wake_up(curr->private, mode, sync); } - EXPORT_SYMBOL(default_wake_function); /* @@ -3453,13 +3491,11 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, struct list_head *tmp, *next; list_for_each_safe(tmp, next, &q->task_list) { - wait_queue_t *curr; - unsigned flags; - curr = list_entry(tmp, wait_queue_t, task_list); - flags = curr->flags; + wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list); + unsigned flags = curr->flags; + if (curr->func(curr, mode, sync, key) && - (flags & WQ_FLAG_EXCLUSIVE) && - !--nr_exclusive) + (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) break; } } @@ -3480,7 +3516,6 @@ void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode, __wake_up_common(q, mode, nr_exclusive, 0, key); spin_unlock_irqrestore(&q->lock, flags); } - EXPORT_SYMBOL(__wake_up); /* @@ -3549,6 +3584,7 @@ EXPORT_SYMBOL(complete_all); void fastcall __sched wait_for_completion(struct completion *x) { might_sleep(); + spin_lock_irq(&x->wait.lock); if (!x->done) { DECLARE_WAITQUEUE(wait, current); @@ -3693,7 +3729,6 @@ void fastcall __sched interruptible_sleep_on(wait_queue_head_t *q) schedule(); SLEEP_ON_TAIL } - EXPORT_SYMBOL(interruptible_sleep_on); long fastcall __sched @@ -3709,7 +3744,6 @@ interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout) return timeout; } - EXPORT_SYMBOL(interruptible_sleep_on_timeout); void fastcall __sched sleep_on(wait_queue_head_t *q) @@ -3722,7 +3756,6 @@ void fastcall __sched sleep_on(wait_queue_head_t *q) schedule(); SLEEP_ON_TAIL } - EXPORT_SYMBOL(sleep_on); long fastcall __sched sleep_on_timeout(wait_queue_head_t *q, long timeout) @@ -3752,11 +3785,11 @@ EXPORT_SYMBOL(sleep_on_timeout); * * Used by the rt_mutex code to implement priority inheritance logic. */ -void rt_mutex_setprio(task_t *p, int prio) +void rt_mutex_setprio(struct task_struct *p, int prio) { + struct prio_array *array; unsigned long flags; - prio_array_t *array; - runqueue_t *rq; + struct rq *rq; int oldprio; BUG_ON(prio < 0 || prio > MAX_PRIO); @@ -3793,12 +3826,12 @@ void rt_mutex_setprio(task_t *p, int prio) #endif -void set_user_nice(task_t *p, long nice) +void set_user_nice(struct task_struct *p, long nice) { - unsigned long flags; - prio_array_t *array; - runqueue_t *rq; + struct prio_array *array; int old_prio, delta; + unsigned long flags; + struct rq *rq; if (TASK_NICE(p) == nice || nice < -20 || nice > 19) return; @@ -3849,10 +3882,11 @@ EXPORT_SYMBOL(set_user_nice); * @p: task * @nice: nice value */ -int can_nice(const task_t *p, const int nice) +int can_nice(const struct task_struct *p, const int nice) { /* convert nice value [19,-20] to rlimit style value [1,40] */ int nice_rlim = 20 - nice; + return (nice_rlim <= p->signal->rlim[RLIMIT_NICE].rlim_cur || capable(CAP_SYS_NICE)); } @@ -3868,8 +3902,7 @@ int can_nice(const task_t *p, const int nice) */ asmlinkage long sys_nice(int increment) { - int retval; - long nice; + long nice, retval; /* * Setpriority might change our priority at the same moment. @@ -3908,7 +3941,7 @@ asmlinkage long sys_nice(int increment) * RT tasks are offset by -200. Normal tasks are centered * around 0, value goes from -16 to +15. */ -int task_prio(const task_t *p) +int task_prio(const struct task_struct *p) { return p->prio - MAX_RT_PRIO; } @@ -3917,7 +3950,7 @@ int task_prio(const task_t *p) * task_nice - return the nice value of a given task. * @p: the task in question. */ -int task_nice(const task_t *p) +int task_nice(const struct task_struct *p) { return TASK_NICE(p); } @@ -3936,7 +3969,7 @@ int idle_cpu(int cpu) * idle_task - return the idle task for a given cpu. * @cpu: the processor in question. */ -task_t *idle_task(int cpu) +struct task_struct *idle_task(int cpu) { return cpu_rq(cpu)->idle; } @@ -3945,7 +3978,7 @@ task_t *idle_task(int cpu) * find_process_by_pid - find a process with a matching PID value. * @pid: the pid in question. */ -static inline task_t *find_process_by_pid(pid_t pid) +static inline struct task_struct *find_process_by_pid(pid_t pid) { return pid ? find_task_by_pid(pid) : current; } @@ -3954,6 +3987,7 @@ static inline task_t *find_process_by_pid(pid_t pid) static void __setscheduler(struct task_struct *p, int policy, int prio) { BUG_ON(p->array); + p->policy = policy; p->rt_priority = prio; p->normal_prio = normal_prio(p); @@ -3977,11 +4011,10 @@ static void __setscheduler(struct task_struct *p, int policy, int prio) int sched_setscheduler(struct task_struct *p, int policy, struct sched_param *param) { - int retval; - int oldprio, oldpolicy = -1; - prio_array_t *array; + int retval, oldprio, oldpolicy = -1; + struct prio_array *array; unsigned long flags; - runqueue_t *rq; + struct rq *rq; /* may grab non-irq protected spin_locks */ BUG_ON(in_interrupt()); @@ -4079,9 +4112,9 @@ EXPORT_SYMBOL_GPL(sched_setscheduler); static int do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) { - int retval; struct sched_param lparam; struct task_struct *p; + int retval; if (!param || pid < 0) return -EINVAL; @@ -4097,6 +4130,7 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) read_unlock_irq(&tasklist_lock); retval = sched_setscheduler(p, policy, &lparam); put_task_struct(p); + return retval; } @@ -4132,8 +4166,8 @@ asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param) */ asmlinkage long sys_sched_getscheduler(pid_t pid) { + struct task_struct *p; int retval = -EINVAL; - task_t *p; if (pid < 0) goto out_nounlock; @@ -4160,8 +4194,8 @@ out_nounlock: asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param) { struct sched_param lp; + struct task_struct *p; int retval = -EINVAL; - task_t *p; if (!param || pid < 0) goto out_nounlock; @@ -4194,9 +4228,9 @@ out_unlock: long sched_setaffinity(pid_t pid, cpumask_t new_mask) { - task_t *p; - int retval; cpumask_t cpus_allowed; + struct task_struct *p; + int retval; lock_cpu_hotplug(); read_lock(&tasklist_lock); @@ -4282,8 +4316,8 @@ cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL; long sched_getaffinity(pid_t pid, cpumask_t *mask) { + struct task_struct *p; int retval; - task_t *p; lock_cpu_hotplug(); read_lock(&tasklist_lock); @@ -4342,9 +4376,8 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len, */ asmlinkage long sys_sched_yield(void) { - runqueue_t *rq = this_rq_lock(); - prio_array_t *array = current->array; - prio_array_t *target = rq->expired; + struct rq *rq = this_rq_lock(); + struct prio_array *array = current->array, *target = rq->expired; schedstat_inc(rq, yld_cnt); /* @@ -4378,6 +4411,7 @@ asmlinkage long sys_sched_yield(void) * no need to preempt or enable interrupts: */ __release(rq->lock); + spin_release(&rq->lock.dep_map, 1, _THIS_IP_); _raw_spin_unlock(&rq->lock); preempt_enable_no_resched(); @@ -4441,6 +4475,7 @@ int cond_resched_lock(spinlock_t *lock) spin_lock(lock); } if (need_resched() && __resched_legal()) { + spin_release(&lock->dep_map, 1, _THIS_IP_); _raw_spin_unlock(lock); preempt_enable_no_resched(); __cond_resched(); @@ -4456,7 +4491,9 @@ int __sched cond_resched_softirq(void) BUG_ON(!in_softirq()); if (need_resched() && __resched_legal()) { - __local_bh_enable(); + raw_local_irq_disable(); + _local_bh_enable(); + raw_local_irq_enable(); __cond_resched(); local_bh_disable(); return 1; @@ -4476,7 +4513,6 @@ void __sched yield(void) set_current_state(TASK_RUNNING); sys_sched_yield(); } - EXPORT_SYMBOL(yield); /* @@ -4488,18 +4524,17 @@ EXPORT_SYMBOL(yield); */ void __sched io_schedule(void) { - struct runqueue *rq = &__raw_get_cpu_var(runqueues); + struct rq *rq = &__raw_get_cpu_var(runqueues); atomic_inc(&rq->nr_iowait); schedule(); atomic_dec(&rq->nr_iowait); } - EXPORT_SYMBOL(io_schedule); long __sched io_schedule_timeout(long timeout) { - struct runqueue *rq = &__raw_get_cpu_var(runqueues); + struct rq *rq = &__raw_get_cpu_var(runqueues); long ret; atomic_inc(&rq->nr_iowait); @@ -4566,9 +4601,9 @@ asmlinkage long sys_sched_get_priority_min(int policy) asmlinkage long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval) { + struct task_struct *p; int retval = -EINVAL; struct timespec t; - task_t *p; if (pid < 0) goto out_nounlock; @@ -4596,28 +4631,32 @@ out_unlock: static inline struct task_struct *eldest_child(struct task_struct *p) { - if (list_empty(&p->children)) return NULL; + if (list_empty(&p->children)) + return NULL; return list_entry(p->children.next,struct task_struct,sibling); } static inline struct task_struct *older_sibling(struct task_struct *p) { - if (p->sibling.prev==&p->parent->children) return NULL; + if (p->sibling.prev==&p->parent->children) + return NULL; return list_entry(p->sibling.prev,struct task_struct,sibling); } static inline struct task_struct *younger_sibling(struct task_struct *p) { - if (p->sibling.next==&p->parent->children) return NULL; + if (p->sibling.next==&p->parent->children) + return NULL; return list_entry(p->sibling.next,struct task_struct,sibling); } -static void show_task(task_t *p) +static const char *stat_nam[] = { "R", "S", "D", "T", "t", "Z", "X" }; + +static void show_task(struct task_struct *p) { - task_t *relative; - unsigned state; + struct task_struct *relative; unsigned long free = 0; - static const char *stat_nam[] = { "R", "S", "D", "T", "t", "Z", "X" }; + unsigned state; printk("%-13.13s ", p->comm); state = p->state ? __ffs(p->state) + 1 : 0; @@ -4668,7 +4707,7 @@ static void show_task(task_t *p) void show_state(void) { - task_t *g, *p; + struct task_struct *g, *p; #if (BITS_PER_LONG == 32) printk("\n" @@ -4690,7 +4729,7 @@ void show_state(void) } while_each_thread(g, p); read_unlock(&tasklist_lock); - mutex_debug_show_all_locks(); + debug_show_all_locks(); } /** @@ -4701,9 +4740,9 @@ void show_state(void) * NOTE: this function does not set the idle thread's NEED_RESCHED * flag, to make booting more robust. */ -void __devinit init_idle(task_t *idle, int cpu) +void __devinit init_idle(struct task_struct *idle, int cpu) { - runqueue_t *rq = cpu_rq(cpu); + struct rq *rq = cpu_rq(cpu); unsigned long flags; idle->timestamp = sched_clock(); @@ -4742,7 +4781,7 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE; /* * This is how migration works: * - * 1) we queue a migration_req_t structure in the source CPU's + * 1) we queue a struct migration_req structure in the source CPU's * runqueue and wake up that CPU's migration thread. * 2) we down() the locked semaphore => thread blocks. * 3) migration thread wakes up (implicitly it forces the migrated @@ -4764,12 +4803,12 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE; * task must not exit() & deallocate itself prematurely. The * call is not atomic; no spinlocks may be held. */ -int set_cpus_allowed(task_t *p, cpumask_t new_mask) +int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) { + struct migration_req req; unsigned long flags; + struct rq *rq; int ret = 0; - migration_req_t req; - runqueue_t *rq; rq = task_rq_lock(p, &flags); if (!cpus_intersects(new_mask, cpu_online_map)) { @@ -4792,9 +4831,9 @@ int set_cpus_allowed(task_t *p, cpumask_t new_mask) } out: task_rq_unlock(rq, &flags); + return ret; } - EXPORT_SYMBOL_GPL(set_cpus_allowed); /* @@ -4810,7 +4849,7 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed); */ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) { - runqueue_t *rq_dest, *rq_src; + struct rq *rq_dest, *rq_src; int ret = 0; if (unlikely(cpu_is_offline(dest_cpu))) @@ -4855,16 +4894,16 @@ out: */ static int migration_thread(void *data) { - runqueue_t *rq; int cpu = (long)data; + struct rq *rq; rq = cpu_rq(cpu); BUG_ON(rq->migration_thread != current); set_current_state(TASK_INTERRUPTIBLE); while (!kthread_should_stop()) { + struct migration_req *req; struct list_head *head; - migration_req_t *req; try_to_freeze(); @@ -4888,7 +4927,7 @@ static int migration_thread(void *data) set_current_state(TASK_INTERRUPTIBLE); continue; } - req = list_entry(head->next, migration_req_t, list); + req = list_entry(head->next, struct migration_req, list); list_del_init(head->next); spin_unlock(&rq->lock); @@ -4913,28 +4952,28 @@ wait_to_die: #ifdef CONFIG_HOTPLUG_CPU /* Figure out where task on dead CPU should go, use force if neccessary. */ -static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *tsk) +static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) { - runqueue_t *rq; unsigned long flags; - int dest_cpu; cpumask_t mask; + struct rq *rq; + int dest_cpu; restart: /* On same node? */ mask = node_to_cpumask(cpu_to_node(dead_cpu)); - cpus_and(mask, mask, tsk->cpus_allowed); + cpus_and(mask, mask, p->cpus_allowed); dest_cpu = any_online_cpu(mask); /* On any allowed CPU? */ if (dest_cpu == NR_CPUS) - dest_cpu = any_online_cpu(tsk->cpus_allowed); + dest_cpu = any_online_cpu(p->cpus_allowed); /* No more Mr. Nice Guy. */ if (dest_cpu == NR_CPUS) { - rq = task_rq_lock(tsk, &flags); - cpus_setall(tsk->cpus_allowed); - dest_cpu = any_online_cpu(tsk->cpus_allowed); + rq = task_rq_lock(p, &flags); + cpus_setall(p->cpus_allowed); + dest_cpu = any_online_cpu(p->cpus_allowed); task_rq_unlock(rq, &flags); /* @@ -4942,12 +4981,12 @@ restart: * kernel threads (both mm NULL), since they never * leave kernel. */ - if (tsk->mm && printk_ratelimit()) + if (p->mm && printk_ratelimit()) printk(KERN_INFO "process %d (%s) no " "longer affine to cpu%d\n", - tsk->pid, tsk->comm, dead_cpu); + p->pid, p->comm, dead_cpu); } - if (!__migrate_task(tsk, dead_cpu, dest_cpu)) + if (!__migrate_task(p, dead_cpu, dest_cpu)) goto restart; } @@ -4958,9 +4997,9 @@ restart: * their home CPUs. So we just add the counter to another CPU's counter, * to keep the global sum constant after CPU-down: */ -static void migrate_nr_uninterruptible(runqueue_t *rq_src) +static void migrate_nr_uninterruptible(struct rq *rq_src) { - runqueue_t *rq_dest = cpu_rq(any_online_cpu(CPU_MASK_ALL)); + struct rq *rq_dest = cpu_rq(any_online_cpu(CPU_MASK_ALL)); unsigned long flags; local_irq_save(flags); @@ -4974,48 +5013,51 @@ static void migrate_nr_uninterruptible(runqueue_t *rq_src) /* Run through task list and migrate tasks from the dead cpu. */ static void migrate_live_tasks(int src_cpu) { - struct task_struct *tsk, *t; + struct task_struct *p, *t; write_lock_irq(&tasklist_lock); - do_each_thread(t, tsk) { - if (tsk == current) + do_each_thread(t, p) { + if (p == current) continue; - if (task_cpu(tsk) == src_cpu) - move_task_off_dead_cpu(src_cpu, tsk); - } while_each_thread(t, tsk); + if (task_cpu(p) == src_cpu) + move_task_off_dead_cpu(src_cpu, p); + } while_each_thread(t, p); write_unlock_irq(&tasklist_lock); } /* Schedules idle task to be the next runnable task on current CPU. * It does so by boosting its priority to highest possible and adding it to - * the _front_ of runqueue. Used by CPU offline code. + * the _front_ of the runqueue. Used by CPU offline code. */ void sched_idle_next(void) { - int cpu = smp_processor_id(); - runqueue_t *rq = this_rq(); + int this_cpu = smp_processor_id(); + struct rq *rq = cpu_rq(this_cpu); struct task_struct *p = rq->idle; unsigned long flags; /* cpu has to be offline */ - BUG_ON(cpu_online(cpu)); + BUG_ON(cpu_online(this_cpu)); - /* Strictly not necessary since rest of the CPUs are stopped by now - * and interrupts disabled on current cpu. + /* + * Strictly not necessary since rest of the CPUs are stopped by now + * and interrupts disabled on the current cpu. */ spin_lock_irqsave(&rq->lock, flags); __setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1); - /* Add idle task to _front_ of it's priority queue */ + + /* Add idle task to the _front_ of its priority queue: */ __activate_idle_task(p, rq); spin_unlock_irqrestore(&rq->lock, flags); } -/* Ensures that the idle task is using init_mm right before its cpu goes +/* + * Ensures that the idle task is using init_mm right before its cpu goes * offline. */ void idle_task_exit(void) @@ -5029,17 +5071,17 @@ void idle_task_exit(void) mmdrop(mm); } -static void migrate_dead(unsigned int dead_cpu, task_t *tsk) +static void migrate_dead(unsigned int dead_cpu, struct task_struct *p) { - struct runqueue *rq = cpu_rq(dead_cpu); + struct rq *rq = cpu_rq(dead_cpu); /* Must be exiting, otherwise would be on tasklist. */ - BUG_ON(tsk->exit_state != EXIT_ZOMBIE && tsk->exit_state != EXIT_DEAD); + BUG_ON(p->exit_state != EXIT_ZOMBIE && p->exit_state != EXIT_DEAD); /* Cannot have done final schedule yet: would have vanished. */ - BUG_ON(tsk->flags & PF_DEAD); + BUG_ON(p->flags & PF_DEAD); - get_task_struct(tsk); + get_task_struct(p); /* * Drop lock around migration; if someone else moves it, @@ -5047,25 +5089,25 @@ static void migrate_dead(unsigned int dead_cpu, task_t *tsk) * fine. */ spin_unlock_irq(&rq->lock); - move_task_off_dead_cpu(dead_cpu, tsk); + move_task_off_dead_cpu(dead_cpu, p); spin_lock_irq(&rq->lock); - put_task_struct(tsk); + put_task_struct(p); } /* release_task() removes task from tasklist, so we won't find dead tasks. */ static void migrate_dead_tasks(unsigned int dead_cpu) { - unsigned arr, i; - struct runqueue *rq = cpu_rq(dead_cpu); + struct rq *rq = cpu_rq(dead_cpu); + unsigned int arr, i; for (arr = 0; arr < 2; arr++) { for (i = 0; i < MAX_PRIO; i++) { struct list_head *list = &rq->arrays[arr].queue[i]; + while (!list_empty(list)) - migrate_dead(dead_cpu, - list_entry(list->next, task_t, - run_list)); + migrate_dead(dead_cpu, list_entry(list->next, + struct task_struct, run_list)); } } } @@ -5075,14 +5117,13 @@ static void migrate_dead_tasks(unsigned int dead_cpu) * migration_call - callback that gets triggered when a CPU is added. * Here we can start up the necessary migration thread for the new CPU. */ -static int __cpuinit migration_call(struct notifier_block *nfb, - unsigned long action, - void *hcpu) +static int __cpuinit +migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) { - int cpu = (long)hcpu; struct task_struct *p; - struct runqueue *rq; + int cpu = (long)hcpu; unsigned long flags; + struct rq *rq; switch (action) { case CPU_UP_PREPARE: @@ -5097,10 +5138,12 @@ static int __cpuinit migration_call(struct notifier_block *nfb, task_rq_unlock(rq, &flags); cpu_rq(cpu)->migration_thread = p; break; + case CPU_ONLINE: /* Strictly unneccessary, as first user will wake it. */ wake_up_process(cpu_rq(cpu)->migration_thread); break; + #ifdef CONFIG_HOTPLUG_CPU case CPU_UP_CANCELED: if (!cpu_rq(cpu)->migration_thread) @@ -5111,6 +5154,7 @@ static int __cpuinit migration_call(struct notifier_block *nfb, kthread_stop(cpu_rq(cpu)->migration_thread); cpu_rq(cpu)->migration_thread = NULL; break; + case CPU_DEAD: migrate_live_tasks(cpu); rq = cpu_rq(cpu); @@ -5131,9 +5175,10 @@ static int __cpuinit migration_call(struct notifier_block *nfb, * the requestors. */ spin_lock_irq(&rq->lock); while (!list_empty(&rq->migration_queue)) { - migration_req_t *req; + struct migration_req *req; + req = list_entry(rq->migration_queue.next, - migration_req_t, list); + struct migration_req, list); list_del_init(&req->list); complete(&req->done); } @@ -5155,10 +5200,12 @@ static struct notifier_block __cpuinitdata migration_notifier = { int __init migration_init(void) { void *cpu = (void *)(long)smp_processor_id(); - /* Start one for boot CPU. */ + + /* Start one for the boot CPU: */ migration_call(&migration_notifier, CPU_UP_PREPARE, cpu); migration_call(&migration_notifier, CPU_ONLINE, cpu); register_cpu_notifier(&migration_notifier); + return 0; } #endif @@ -5254,7 +5301,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) } while (sd); } #else -#define sched_domain_debug(sd, cpu) {} +# define sched_domain_debug(sd, cpu) do { } while (0) #endif static int sd_degenerate(struct sched_domain *sd) @@ -5280,8 +5327,8 @@ static int sd_degenerate(struct sched_domain *sd) return 1; } -static int sd_parent_degenerate(struct sched_domain *sd, - struct sched_domain *parent) +static int +sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) { unsigned long cflags = sd->flags, pflags = parent->flags; @@ -5314,7 +5361,7 @@ static int sd_parent_degenerate(struct sched_domain *sd, */ static void cpu_attach_domain(struct sched_domain *sd, int cpu) { - runqueue_t *rq = cpu_rq(cpu); + struct rq *rq = cpu_rq(cpu); struct sched_domain *tmp; /* Remove the sched domains which do not contribute to scheduling. */ @@ -5576,8 +5623,8 @@ static void touch_cache(void *__cache, unsigned long __size) /* * Measure the cache-cost of one task migration. Returns in units of nsec. */ -static unsigned long long measure_one(void *cache, unsigned long size, - int source, int target) +static unsigned long long +measure_one(void *cache, unsigned long size, int source, int target) { cpumask_t mask, saved_mask; unsigned long long t0, t1, t2, t3, cost; @@ -5927,9 +5974,9 @@ static int find_next_best_node(int node, unsigned long *used_nodes) */ static cpumask_t sched_domain_node_span(int node) { - int i; - cpumask_t span, nodemask; DECLARE_BITMAP(used_nodes, MAX_NUMNODES); + cpumask_t span, nodemask; + int i; cpus_clear(span); bitmap_zero(used_nodes, MAX_NUMNODES); @@ -5940,6 +5987,7 @@ static cpumask_t sched_domain_node_span(int node) for (i = 1; i < SD_NODES_PER_DOMAIN; i++) { int next_node = find_next_best_node(node, used_nodes); + nodemask = node_to_cpumask(next_node); cpus_or(span, span, nodemask); } @@ -5949,19 +5997,23 @@ static cpumask_t sched_domain_node_span(int node) #endif int sched_smt_power_savings = 0, sched_mc_power_savings = 0; + /* - * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we - * can switch it on easily if needed. + * SMT sched-domains: */ #ifdef CONFIG_SCHED_SMT static DEFINE_PER_CPU(struct sched_domain, cpu_domains); static struct sched_group sched_group_cpus[NR_CPUS]; + static int cpu_to_cpu_group(int cpu) { return cpu; } #endif +/* + * multi-core sched-domains: + */ #ifdef CONFIG_SCHED_MC static DEFINE_PER_CPU(struct sched_domain, core_domains); static struct sched_group *sched_group_core_bycpu[NR_CPUS]; @@ -5981,9 +6033,10 @@ static int cpu_to_core_group(int cpu) static DEFINE_PER_CPU(struct sched_domain, phys_domains); static struct sched_group *sched_group_phys_bycpu[NR_CPUS]; + static int cpu_to_phys_group(int cpu) { -#if defined(CONFIG_SCHED_MC) +#ifdef CONFIG_SCHED_MC cpumask_t mask = cpu_coregroup_map(cpu); return first_cpu(mask); #elif defined(CONFIG_SCHED_SMT) @@ -6529,6 +6582,7 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) { int err = 0; + #ifdef CONFIG_SCHED_SMT if (smt_capable()) err = sysfs_create_file(&cls->kset.kobj, @@ -6548,7 +6602,8 @@ static ssize_t sched_mc_power_savings_show(struct sys_device *dev, char *page) { return sprintf(page, "%u\n", sched_mc_power_savings); } -static ssize_t sched_mc_power_savings_store(struct sys_device *dev, const char *buf, size_t count) +static ssize_t sched_mc_power_savings_store(struct sys_device *dev, + const char *buf, size_t count) { return sched_power_savings_store(buf, count, 0); } @@ -6561,7 +6616,8 @@ static ssize_t sched_smt_power_savings_show(struct sys_device *dev, char *page) { return sprintf(page, "%u\n", sched_smt_power_savings); } -static ssize_t sched_smt_power_savings_store(struct sys_device *dev, const char *buf, size_t count) +static ssize_t sched_smt_power_savings_store(struct sys_device *dev, + const char *buf, size_t count) { return sched_power_savings_store(buf, count, 1); } @@ -6623,6 +6679,7 @@ int in_sched_functions(unsigned long addr) { /* Linker adds these: start and end of __sched functions */ extern char __sched_text_start[], __sched_text_end[]; + return in_lock_functions(addr) || (addr >= (unsigned long)__sched_text_start && addr < (unsigned long)__sched_text_end); @@ -6630,14 +6687,15 @@ int in_sched_functions(unsigned long addr) void __init sched_init(void) { - runqueue_t *rq; int i, j, k; for_each_possible_cpu(i) { - prio_array_t *array; + struct prio_array *array; + struct rq *rq; rq = cpu_rq(i); spin_lock_init(&rq->lock); + lockdep_set_class(&rq->lock, &rq->rq_lock_key); rq->nr_running = 0; rq->active = rq->arrays; rq->expired = rq->arrays + 1; @@ -6684,7 +6742,7 @@ void __init sched_init(void) #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP void __might_sleep(char *file, int line) { -#if defined(in_atomic) +#ifdef in_atomic static unsigned long prev_jiffy; /* ratelimiting */ if ((in_atomic() || irqs_disabled()) && @@ -6706,10 +6764,10 @@ EXPORT_SYMBOL(__might_sleep); #ifdef CONFIG_MAGIC_SYSRQ void normalize_rt_tasks(void) { + struct prio_array *array; struct task_struct *p; - prio_array_t *array; unsigned long flags; - runqueue_t *rq; + struct rq *rq; read_lock_irq(&tasklist_lock); for_each_process(p) { @@ -6753,7 +6811,7 @@ void normalize_rt_tasks(void) * * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED! */ -task_t *curr_task(int cpu) +struct task_struct *curr_task(int cpu) { return cpu_curr(cpu); } @@ -6773,7 +6831,7 @@ task_t *curr_task(int cpu) * * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED! */ -void set_curr_task(int cpu, task_t *p) +void set_curr_task(int cpu, struct task_struct *p) { cpu_curr(cpu) = p; } diff --git a/kernel/softirq.c b/kernel/softirq.c index 8f03e3b89b55..215541e26c1a 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -62,6 +62,119 @@ static inline void wakeup_softirqd(void) } /* + * This one is for softirq.c-internal use, + * where hardirqs are disabled legitimately: + */ +static void __local_bh_disable(unsigned long ip) +{ + unsigned long flags; + + WARN_ON_ONCE(in_irq()); + + raw_local_irq_save(flags); + add_preempt_count(SOFTIRQ_OFFSET); + /* + * Were softirqs turned off above: + */ + if (softirq_count() == SOFTIRQ_OFFSET) + trace_softirqs_off(ip); + raw_local_irq_restore(flags); +} + +void local_bh_disable(void) +{ + __local_bh_disable((unsigned long)__builtin_return_address(0)); +} + +EXPORT_SYMBOL(local_bh_disable); + +void __local_bh_enable(void) +{ + WARN_ON_ONCE(in_irq()); + + /* + * softirqs should never be enabled by __local_bh_enable(), + * it always nests inside local_bh_enable() sections: + */ + WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET); + + sub_preempt_count(SOFTIRQ_OFFSET); +} +EXPORT_SYMBOL_GPL(__local_bh_enable); + +/* + * Special-case - softirqs can safely be enabled in + * cond_resched_softirq(), or by __do_softirq(), + * without processing still-pending softirqs: + */ +void _local_bh_enable(void) +{ + WARN_ON_ONCE(in_irq()); + WARN_ON_ONCE(!irqs_disabled()); + + if (softirq_count() == SOFTIRQ_OFFSET) + trace_softirqs_on((unsigned long)__builtin_return_address(0)); + sub_preempt_count(SOFTIRQ_OFFSET); +} + +EXPORT_SYMBOL(_local_bh_enable); + +void local_bh_enable(void) +{ + unsigned long flags; + + WARN_ON_ONCE(in_irq()); + WARN_ON_ONCE(irqs_disabled()); + + local_irq_save(flags); + /* + * Are softirqs going to be turned on now: + */ + if (softirq_count() == SOFTIRQ_OFFSET) + trace_softirqs_on((unsigned long)__builtin_return_address(0)); + /* + * Keep preemption disabled until we are done with + * softirq processing: + */ + sub_preempt_count(SOFTIRQ_OFFSET - 1); + + if (unlikely(!in_interrupt() && local_softirq_pending())) + do_softirq(); + + dec_preempt_count(); + local_irq_restore(flags); + preempt_check_resched(); +} +EXPORT_SYMBOL(local_bh_enable); + +void local_bh_enable_ip(unsigned long ip) +{ + unsigned long flags; + + WARN_ON_ONCE(in_irq()); + + local_irq_save(flags); + /* + * Are softirqs going to be turned on now: + */ + if (softirq_count() == SOFTIRQ_OFFSET) + trace_softirqs_on(ip); + /* + * Keep preemption disabled until we are done with + * softirq processing: + */ + sub_preempt_count(SOFTIRQ_OFFSET - 1); + + if (unlikely(!in_interrupt() && local_softirq_pending())) + do_softirq(); + + dec_preempt_count(); + local_irq_restore(flags); + preempt_check_resched(); +} +EXPORT_SYMBOL(local_bh_enable_ip); + +/* * We restart softirq processing MAX_SOFTIRQ_RESTART times, * and we fall back to softirqd after that. * @@ -80,8 +193,11 @@ asmlinkage void __do_softirq(void) int cpu; pending = local_softirq_pending(); + account_system_vtime(current); + + __local_bh_disable((unsigned long)__builtin_return_address(0)); + trace_softirq_enter(); - local_bh_disable(); cpu = smp_processor_id(); restart: /* Reset the pending bitmask before enabling irqs */ @@ -109,7 +225,10 @@ restart: if (pending) wakeup_softirqd(); - __local_bh_enable(); + trace_softirq_exit(); + + account_system_vtime(current); + _local_bh_enable(); } #ifndef __ARCH_HAS_DO_SOFTIRQ @@ -136,23 +255,6 @@ EXPORT_SYMBOL(do_softirq); #endif -void local_bh_enable(void) -{ - WARN_ON(irqs_disabled()); - /* - * Keep preemption disabled until we are done with - * softirq processing: - */ - sub_preempt_count(SOFTIRQ_OFFSET - 1); - - if (unlikely(!in_interrupt() && local_softirq_pending())) - do_softirq(); - - dec_preempt_count(); - preempt_check_resched(); -} -EXPORT_SYMBOL(local_bh_enable); - #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED # define invoke_softirq() __do_softirq() #else @@ -165,6 +267,7 @@ EXPORT_SYMBOL(local_bh_enable); void irq_exit(void) { account_system_vtime(current); + trace_hardirq_exit(); sub_preempt_count(IRQ_EXIT_OFFSET); if (!in_interrupt() && local_softirq_pending()) invoke_softirq(); diff --git a/kernel/spinlock.c b/kernel/spinlock.c index b31e54eadf56..bfd6ad9c0330 100644 --- a/kernel/spinlock.c +++ b/kernel/spinlock.c @@ -13,6 +13,7 @@ #include <linux/preempt.h> #include <linux/spinlock.h> #include <linux/interrupt.h> +#include <linux/debug_locks.h> #include <linux/module.h> /* @@ -29,8 +30,10 @@ EXPORT_SYMBOL(generic__raw_read_trylock); int __lockfunc _spin_trylock(spinlock_t *lock) { preempt_disable(); - if (_raw_spin_trylock(lock)) + if (_raw_spin_trylock(lock)) { + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); return 1; + } preempt_enable(); return 0; @@ -40,8 +43,10 @@ EXPORT_SYMBOL(_spin_trylock); int __lockfunc _read_trylock(rwlock_t *lock) { preempt_disable(); - if (_raw_read_trylock(lock)) + if (_raw_read_trylock(lock)) { + rwlock_acquire_read(&lock->dep_map, 0, 1, _RET_IP_); return 1; + } preempt_enable(); return 0; @@ -51,19 +56,28 @@ EXPORT_SYMBOL(_read_trylock); int __lockfunc _write_trylock(rwlock_t *lock) { preempt_disable(); - if (_raw_write_trylock(lock)) + if (_raw_write_trylock(lock)) { + rwlock_acquire(&lock->dep_map, 0, 1, _RET_IP_); return 1; + } preempt_enable(); return 0; } EXPORT_SYMBOL(_write_trylock); -#if !defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP) +/* + * If lockdep is enabled then we use the non-preemption spin-ops + * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are + * not re-enabled during lock-acquire (which the preempt-spin-ops do): + */ +#if !defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP) || \ + defined(CONFIG_PROVE_LOCKING) void __lockfunc _read_lock(rwlock_t *lock) { preempt_disable(); + rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); _raw_read_lock(lock); } EXPORT_SYMBOL(_read_lock); @@ -74,7 +88,17 @@ unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock) local_irq_save(flags); preempt_disable(); + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); + /* + * On lockdep we dont want the hand-coded irq-enable of + * _raw_spin_lock_flags() code, because lockdep assumes + * that interrupts are not re-enabled during lock-acquire: + */ +#ifdef CONFIG_PROVE_LOCKING + _raw_spin_lock(lock); +#else _raw_spin_lock_flags(lock, &flags); +#endif return flags; } EXPORT_SYMBOL(_spin_lock_irqsave); @@ -83,6 +107,7 @@ void __lockfunc _spin_lock_irq(spinlock_t *lock) { local_irq_disable(); preempt_disable(); + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); _raw_spin_lock(lock); } EXPORT_SYMBOL(_spin_lock_irq); @@ -91,6 +116,7 @@ void __lockfunc _spin_lock_bh(spinlock_t *lock) { local_bh_disable(); preempt_disable(); + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); _raw_spin_lock(lock); } EXPORT_SYMBOL(_spin_lock_bh); @@ -101,6 +127,7 @@ unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock) local_irq_save(flags); preempt_disable(); + rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); _raw_read_lock(lock); return flags; } @@ -110,6 +137,7 @@ void __lockfunc _read_lock_irq(rwlock_t *lock) { local_irq_disable(); preempt_disable(); + rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); _raw_read_lock(lock); } EXPORT_SYMBOL(_read_lock_irq); @@ -118,6 +146,7 @@ void __lockfunc _read_lock_bh(rwlock_t *lock) { local_bh_disable(); preempt_disable(); + rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); _raw_read_lock(lock); } EXPORT_SYMBOL(_read_lock_bh); @@ -128,6 +157,7 @@ unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock) local_irq_save(flags); preempt_disable(); + rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); _raw_write_lock(lock); return flags; } @@ -137,6 +167,7 @@ void __lockfunc _write_lock_irq(rwlock_t *lock) { local_irq_disable(); preempt_disable(); + rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); _raw_write_lock(lock); } EXPORT_SYMBOL(_write_lock_irq); @@ -145,6 +176,7 @@ void __lockfunc _write_lock_bh(rwlock_t *lock) { local_bh_disable(); preempt_disable(); + rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); _raw_write_lock(lock); } EXPORT_SYMBOL(_write_lock_bh); @@ -152,6 +184,7 @@ EXPORT_SYMBOL(_write_lock_bh); void __lockfunc _spin_lock(spinlock_t *lock) { preempt_disable(); + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); _raw_spin_lock(lock); } @@ -160,6 +193,7 @@ EXPORT_SYMBOL(_spin_lock); void __lockfunc _write_lock(rwlock_t *lock) { preempt_disable(); + rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); _raw_write_lock(lock); } @@ -255,8 +289,22 @@ BUILD_LOCK_OPS(write, rwlock); #endif /* CONFIG_PREEMPT */ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + +void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass) +{ + preempt_disable(); + spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); + _raw_spin_lock(lock); +} + +EXPORT_SYMBOL(_spin_lock_nested); + +#endif + void __lockfunc _spin_unlock(spinlock_t *lock) { + spin_release(&lock->dep_map, 1, _RET_IP_); _raw_spin_unlock(lock); preempt_enable(); } @@ -264,6 +312,7 @@ EXPORT_SYMBOL(_spin_unlock); void __lockfunc _write_unlock(rwlock_t *lock) { + rwlock_release(&lock->dep_map, 1, _RET_IP_); _raw_write_unlock(lock); preempt_enable(); } @@ -271,6 +320,7 @@ EXPORT_SYMBOL(_write_unlock); void __lockfunc _read_unlock(rwlock_t *lock) { + rwlock_release(&lock->dep_map, 1, _RET_IP_); _raw_read_unlock(lock); preempt_enable(); } @@ -278,6 +328,7 @@ EXPORT_SYMBOL(_read_unlock); void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) { + spin_release(&lock->dep_map, 1, _RET_IP_); _raw_spin_unlock(lock); local_irq_restore(flags); preempt_enable(); @@ -286,6 +337,7 @@ EXPORT_SYMBOL(_spin_unlock_irqrestore); void __lockfunc _spin_unlock_irq(spinlock_t *lock) { + spin_release(&lock->dep_map, 1, _RET_IP_); _raw_spin_unlock(lock); local_irq_enable(); preempt_enable(); @@ -294,14 +346,16 @@ EXPORT_SYMBOL(_spin_unlock_irq); void __lockfunc _spin_unlock_bh(spinlock_t *lock) { + spin_release(&lock->dep_map, 1, _RET_IP_); _raw_spin_unlock(lock); preempt_enable_no_resched(); - local_bh_enable(); + local_bh_enable_ip((unsigned long)__builtin_return_address(0)); } EXPORT_SYMBOL(_spin_unlock_bh); void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) { + rwlock_release(&lock->dep_map, 1, _RET_IP_); _raw_read_unlock(lock); local_irq_restore(flags); preempt_enable(); @@ -310,6 +364,7 @@ EXPORT_SYMBOL(_read_unlock_irqrestore); void __lockfunc _read_unlock_irq(rwlock_t *lock) { + rwlock_release(&lock->dep_map, 1, _RET_IP_); _raw_read_unlock(lock); local_irq_enable(); preempt_enable(); @@ -318,14 +373,16 @@ EXPORT_SYMBOL(_read_unlock_irq); void __lockfunc _read_unlock_bh(rwlock_t *lock) { + rwlock_release(&lock->dep_map, 1, _RET_IP_); _raw_read_unlock(lock); preempt_enable_no_resched(); - local_bh_enable(); + local_bh_enable_ip((unsigned long)__builtin_return_address(0)); } EXPORT_SYMBOL(_read_unlock_bh); void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) { + rwlock_release(&lock->dep_map, 1, _RET_IP_); _raw_write_unlock(lock); local_irq_restore(flags); preempt_enable(); @@ -334,6 +391,7 @@ EXPORT_SYMBOL(_write_unlock_irqrestore); void __lockfunc _write_unlock_irq(rwlock_t *lock) { + rwlock_release(&lock->dep_map, 1, _RET_IP_); _raw_write_unlock(lock); local_irq_enable(); preempt_enable(); @@ -342,9 +400,10 @@ EXPORT_SYMBOL(_write_unlock_irq); void __lockfunc _write_unlock_bh(rwlock_t *lock) { + rwlock_release(&lock->dep_map, 1, _RET_IP_); _raw_write_unlock(lock); preempt_enable_no_resched(); - local_bh_enable(); + local_bh_enable_ip((unsigned long)__builtin_return_address(0)); } EXPORT_SYMBOL(_write_unlock_bh); @@ -352,11 +411,13 @@ int __lockfunc _spin_trylock_bh(spinlock_t *lock) { local_bh_disable(); preempt_disable(); - if (_raw_spin_trylock(lock)) + if (_raw_spin_trylock(lock)) { + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); return 1; + } preempt_enable_no_resched(); - local_bh_enable(); + local_bh_enable_ip((unsigned long)__builtin_return_address(0)); return 0; } EXPORT_SYMBOL(_spin_trylock_bh); diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c new file mode 100644 index 000000000000..b71816e47a30 --- /dev/null +++ b/kernel/stacktrace.c @@ -0,0 +1,24 @@ +/* + * kernel/stacktrace.c + * + * Stack trace management functions + * + * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + */ +#include <linux/sched.h> +#include <linux/kallsyms.h> +#include <linux/stacktrace.h> + +void print_stack_trace(struct stack_trace *trace, int spaces) +{ + int i, j; + + for (i = 0; i < trace->nr_entries; i++) { + unsigned long ip = trace->entries[i]; + + for (j = 0; j < spaces + 1; j++) + printk(" "); + print_ip_sym(ip); + } +} + diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 2c0aacc37c55..dcfb5d731466 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -4,7 +4,6 @@ #include <linux/cpu.h> #include <linux/err.h> #include <linux/syscalls.h> -#include <linux/kthread.h> #include <asm/atomic.h> #include <asm/semaphore.h> #include <asm/uaccess.h> @@ -26,11 +25,13 @@ static unsigned int stopmachine_num_threads; static atomic_t stopmachine_thread_ack; static DECLARE_MUTEX(stopmachine_mutex); -static int stopmachine(void *unused) +static int stopmachine(void *cpu) { int irqs_disabled = 0; int prepared = 0; + set_cpus_allowed(current, cpumask_of_cpu((int)(long)cpu)); + /* Ack: we are alive */ smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */ atomic_inc(&stopmachine_thread_ack); @@ -84,8 +85,7 @@ static void stopmachine_set_state(enum stopmachine_state state) static int stop_machine(void) { - int ret = 0; - unsigned int i; + int i, ret = 0; struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; /* One high-prio thread per cpu. We'll do this one. */ @@ -96,16 +96,11 @@ static int stop_machine(void) stopmachine_state = STOPMACHINE_WAIT; for_each_online_cpu(i) { - struct task_struct *tsk; if (i == raw_smp_processor_id()) continue; - tsk = kthread_create(stopmachine, NULL, "stopmachine"); - if (IS_ERR(tsk)) { - ret = PTR_ERR(tsk); + ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL); + if (ret < 0) break; - } - kthread_bind(tsk, i); - wake_up_process(tsk); stopmachine_num_threads++; } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 99a58f279077..362a0cc37138 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -932,6 +932,17 @@ static ctl_table vm_table[] = { .strategy = &sysctl_intvec, .extra1 = &zero, }, + { + .ctl_name = VM_MIN_UNMAPPED, + .procname = "min_unmapped_ratio", + .data = &sysctl_min_unmapped_ratio, + .maxlen = sizeof(sysctl_min_unmapped_ratio), + .mode = 0644, + .proc_handler = &sysctl_min_unmapped_ratio_sysctl_handler, + .strategy = &sysctl_intvec, + .extra1 = &zero, + .extra2 = &one_hundred, + }, #endif #ifdef CONFIG_X86_32 { diff --git a/kernel/timer.c b/kernel/timer.c index 5a8960253063..396a3c024c2c 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1208,7 +1208,7 @@ unsigned long wall_jiffies = INITIAL_JIFFIES; * playing with xtime and avenrun. */ #ifndef ARCH_HAVE_XTIME_LOCK -seqlock_t xtime_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED; +__cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock); EXPORT_SYMBOL(xtime_lock); #endif @@ -1368,7 +1368,7 @@ asmlinkage long sys_getegid(void) static void process_timeout(unsigned long __data) { - wake_up_process((task_t *)__data); + wake_up_process((struct task_struct *)__data); } /** @@ -1559,6 +1559,13 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info) return 0; } +/* + * lockdep: we want to track each per-CPU base as a separate lock-class, + * but timer-bases are kmalloc()-ed, so we need to attach separate + * keys to them: + */ +static struct lock_class_key base_lock_keys[NR_CPUS]; + static int __devinit init_timers_cpu(int cpu) { int j; @@ -1594,6 +1601,8 @@ static int __devinit init_timers_cpu(int cpu) } spin_lock_init(&base->lock); + lockdep_set_class(&base->lock, base_lock_keys + cpu); + for (j = 0; j < TVN_SIZE; j++) { INIT_LIST_HEAD(base->tv5.vec + j); INIT_LIST_HEAD(base->tv4.vec + j); diff --git a/kernel/wait.c b/kernel/wait.c index 5985d866531f..a1d57aeb7f75 100644 --- a/kernel/wait.c +++ b/kernel/wait.c @@ -10,6 +10,10 @@ #include <linux/wait.h> #include <linux/hash.h> +struct lock_class_key waitqueue_lock_key; + +EXPORT_SYMBOL(waitqueue_lock_key); + void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) { unsigned long flags; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 59f0b42bd89e..90d2c6001659 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -51,7 +51,7 @@ struct cpu_workqueue_struct { wait_queue_head_t work_done; struct workqueue_struct *wq; - task_t *thread; + struct task_struct *thread; int run_depth; /* Detect run_workqueue() recursion depth */ } ____cacheline_aligned; diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index e4fcbd12cf6e..e5889b1a33ff 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -48,7 +48,7 @@ config DEBUG_KERNEL config LOG_BUF_SHIFT int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" if DEBUG_KERNEL range 12 21 - default 17 if S390 + default 17 if S390 || LOCKDEP default 16 if X86_NUMAQ || IA64 default 15 if SMP default 14 @@ -107,7 +107,7 @@ config DEBUG_SLAB_LEAK config DEBUG_PREEMPT bool "Debug preemptible kernel" - depends on DEBUG_KERNEL && PREEMPT + depends on DEBUG_KERNEL && PREEMPT && TRACE_IRQFLAGS_SUPPORT default y help If you say Y here then the kernel will use a debug variant of the @@ -115,14 +115,6 @@ config DEBUG_PREEMPT if kernel code uses it in a preemption-unsafe way. Also, the kernel will detect preemption count underflows. -config DEBUG_MUTEXES - bool "Mutex debugging, deadlock detection" - default n - depends on DEBUG_KERNEL - help - This allows mutex semantics violations and mutex related deadlocks - (lockups) to be detected and reported automatically. - config DEBUG_RT_MUTEXES bool "RT Mutex debugging, deadlock detection" depends on DEBUG_KERNEL && RT_MUTEXES @@ -142,7 +134,7 @@ config RT_MUTEX_TESTER This option enables a rt-mutex tester. config DEBUG_SPINLOCK - bool "Spinlock debugging" + bool "Spinlock and rw-lock debugging: basic checks" depends on DEBUG_KERNEL help Say Y here and build SMP to catch missing spinlock initialization @@ -150,13 +142,122 @@ config DEBUG_SPINLOCK best used in conjunction with the NMI watchdog so that spinlock deadlocks are also debuggable. +config DEBUG_MUTEXES + bool "Mutex debugging: basic checks" + depends on DEBUG_KERNEL + help + This feature allows mutex semantics violations to be detected and + reported. + +config DEBUG_RWSEMS + bool "RW-sem debugging: basic checks" + depends on DEBUG_KERNEL + help + This feature allows read-write semaphore semantics violations to + be detected and reported. + +config DEBUG_LOCK_ALLOC + bool "Lock debugging: detect incorrect freeing of live locks" + depends on TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT + select DEBUG_SPINLOCK + select DEBUG_MUTEXES + select DEBUG_RWSEMS + select LOCKDEP + help + This feature will check whether any held lock (spinlock, rwlock, + mutex or rwsem) is incorrectly freed by the kernel, via any of the + memory-freeing routines (kfree(), kmem_cache_free(), free_pages(), + vfree(), etc.), whether a live lock is incorrectly reinitialized via + spin_lock_init()/mutex_init()/etc., or whether there is any lock + held during task exit. + +config PROVE_LOCKING + bool "Lock debugging: prove locking correctness" + depends on TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT + select LOCKDEP + select DEBUG_SPINLOCK + select DEBUG_MUTEXES + select DEBUG_RWSEMS + select DEBUG_LOCK_ALLOC + default n + help + This feature enables the kernel to prove that all locking + that occurs in the kernel runtime is mathematically + correct: that under no circumstance could an arbitrary (and + not yet triggered) combination of observed locking + sequences (on an arbitrary number of CPUs, running an + arbitrary number of tasks and interrupt contexts) cause a + deadlock. + + In short, this feature enables the kernel to report locking + related deadlocks before they actually occur. + + The proof does not depend on how hard and complex a + deadlock scenario would be to trigger: how many + participant CPUs, tasks and irq-contexts would be needed + for it to trigger. The proof also does not depend on + timing: if a race and a resulting deadlock is possible + theoretically (no matter how unlikely the race scenario + is), it will be proven so and will immediately be + reported by the kernel (once the event is observed that + makes the deadlock theoretically possible). + + If a deadlock is impossible (i.e. the locking rules, as + observed by the kernel, are mathematically correct), the + kernel reports nothing. + + NOTE: this feature can also be enabled for rwlocks, mutexes + and rwsems - in which case all dependencies between these + different locking variants are observed and mapped too, and + the proof of observed correctness is also maintained for an + arbitrary combination of these separate locking variants. + + For more details, see Documentation/lockdep-design.txt. + +config LOCKDEP + bool + depends on TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT + select STACKTRACE + select FRAME_POINTER + select KALLSYMS + select KALLSYMS_ALL + +config DEBUG_LOCKDEP + bool "Lock dependency engine debugging" + depends on LOCKDEP + help + If you say Y here, the lock dependency engine will do + additional runtime checks to debug itself, at the price + of more runtime overhead. + +config TRACE_IRQFLAGS + bool + default y + depends on TRACE_IRQFLAGS_SUPPORT + depends on PROVE_LOCKING + config DEBUG_SPINLOCK_SLEEP - bool "Sleep-inside-spinlock checking" + bool "Spinlock debugging: sleep-inside-spinlock checking" depends on DEBUG_KERNEL help If you say Y here, various routines which may sleep will become very noisy if they are called with a spinlock held. +config DEBUG_LOCKING_API_SELFTESTS + bool "Locking API boot-time self-tests" + depends on DEBUG_KERNEL + help + Say Y here if you want the kernel to run a short self-test during + bootup. The self-test checks whether common types of locking bugs + are detected by debugging mechanisms or not. (if you disable + lock debugging then those bugs wont be detected of course.) + The following locking APIs are covered: spinlocks, rwlocks, + mutexes and rwsems. + +config STACKTRACE + bool + depends on STACKTRACE_SUPPORT + config DEBUG_KOBJECT bool "kobject debugging" depends on DEBUG_KERNEL @@ -212,7 +313,7 @@ config DEBUG_VM config FRAME_POINTER bool "Compile the kernel with frame pointers" - depends on DEBUG_KERNEL && (X86 || CRIS || M68K || M68KNOMMU || FRV || UML) + depends on DEBUG_KERNEL && (X86 || CRIS || M68K || M68KNOMMU || FRV || UML || S390) default y if DEBUG_INFO && UML help If you say Y here the resulting kernel image will be slightly larger diff --git a/lib/Makefile b/lib/Makefile index 10c13c9d7824..be9719ae82d0 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -11,13 +11,14 @@ lib-$(CONFIG_SMP) += cpumask.o lib-y += kobject.o kref.o kobject_uevent.o klist.o -obj-y += sort.o parser.o halfmd4.o iomap_copy.o +obj-y += sort.o parser.o halfmd4.o iomap_copy.o debug_locks.o ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG CFLAGS_kobject_uevent.o += -DDEBUG endif +obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o diff --git a/lib/debug_locks.c b/lib/debug_locks.c new file mode 100644 index 000000000000..0ef01d14727c --- /dev/null +++ b/lib/debug_locks.c @@ -0,0 +1,45 @@ +/* + * lib/debug_locks.c + * + * Generic place for common debugging facilities for various locks: + * spinlocks, rwlocks, mutexes and rwsems. + * + * Started by Ingo Molnar: + * + * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + */ +#include <linux/rwsem.h> +#include <linux/mutex.h> +#include <linux/module.h> +#include <linux/spinlock.h> +#include <linux/debug_locks.h> + +/* + * We want to turn all lock-debugging facilities on/off at once, + * via a global flag. The reason is that once a single bug has been + * detected and reported, there might be cascade of followup bugs + * that would just muddy the log. So we report the first one and + * shut up after that. + */ +int debug_locks = 1; + +/* + * The locking-testsuite uses <debug_locks_silent> to get a + * 'silent failure': nothing is printed to the console when + * a locking bug is detected. + */ +int debug_locks_silent; + +/* + * Generic 'turn off all lock debugging' function: + */ +int debug_locks_off(void) +{ + if (xchg(&debug_locks, 0)) { + if (!debug_locks_silent) { + console_verbose(); + return 1; + } + } + return 0; +} diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c index e713e86811ae..e0fdfddb406e 100644 --- a/lib/kernel_lock.c +++ b/lib/kernel_lock.c @@ -177,7 +177,12 @@ static inline void __lock_kernel(void) static inline void __unlock_kernel(void) { - spin_unlock(&kernel_flag); + /* + * the BKL is not covered by lockdep, so we open-code the + * unlocking sequence (and thus avoid the dep-chain ops): + */ + _raw_spin_unlock(&kernel_flag); + preempt_enable(); } /* diff --git a/lib/locking-selftest-hardirq.h b/lib/locking-selftest-hardirq.h new file mode 100644 index 000000000000..10d4a150b259 --- /dev/null +++ b/lib/locking-selftest-hardirq.h @@ -0,0 +1,9 @@ +#undef IRQ_DISABLE +#undef IRQ_ENABLE +#undef IRQ_ENTER +#undef IRQ_EXIT + +#define IRQ_ENABLE HARDIRQ_ENABLE +#define IRQ_DISABLE HARDIRQ_DISABLE +#define IRQ_ENTER HARDIRQ_ENTER +#define IRQ_EXIT HARDIRQ_EXIT diff --git a/lib/locking-selftest-mutex.h b/lib/locking-selftest-mutex.h new file mode 100644 index 000000000000..68601b6f584b --- /dev/null +++ b/lib/locking-selftest-mutex.h @@ -0,0 +1,11 @@ +#undef LOCK +#define LOCK ML + +#undef UNLOCK +#define UNLOCK MU + +#undef RLOCK +#undef WLOCK + +#undef INIT +#define INIT MI diff --git a/lib/locking-selftest-rlock-hardirq.h b/lib/locking-selftest-rlock-hardirq.h new file mode 100644 index 000000000000..9f517ebcb786 --- /dev/null +++ b/lib/locking-selftest-rlock-hardirq.h @@ -0,0 +1,2 @@ +#include "locking-selftest-rlock.h" +#include "locking-selftest-hardirq.h" diff --git a/lib/locking-selftest-rlock-softirq.h b/lib/locking-selftest-rlock-softirq.h new file mode 100644 index 000000000000..981455db7ff0 --- /dev/null +++ b/lib/locking-selftest-rlock-softirq.h @@ -0,0 +1,2 @@ +#include "locking-selftest-rlock.h" +#include "locking-selftest-softirq.h" diff --git a/lib/locking-selftest-rlock.h b/lib/locking-selftest-rlock.h new file mode 100644 index 000000000000..6789044f4d0e --- /dev/null +++ b/lib/locking-selftest-rlock.h @@ -0,0 +1,14 @@ +#undef LOCK +#define LOCK RL + +#undef UNLOCK +#define UNLOCK RU + +#undef RLOCK +#define RLOCK RL + +#undef WLOCK +#define WLOCK WL + +#undef INIT +#define INIT RWI diff --git a/lib/locking-selftest-rsem.h b/lib/locking-selftest-rsem.h new file mode 100644 index 000000000000..62da886680c7 --- /dev/null +++ b/lib/locking-selftest-rsem.h @@ -0,0 +1,14 @@ +#undef LOCK +#define LOCK RSL + +#undef UNLOCK +#define UNLOCK RSU + +#undef RLOCK +#define RLOCK RSL + +#undef WLOCK +#define WLOCK WSL + +#undef INIT +#define INIT RWSI diff --git a/lib/locking-selftest-softirq.h b/lib/locking-selftest-softirq.h new file mode 100644 index 000000000000..a83de2a04ace --- /dev/null +++ b/lib/locking-selftest-softirq.h @@ -0,0 +1,9 @@ +#undef IRQ_DISABLE +#undef IRQ_ENABLE +#undef IRQ_ENTER +#undef IRQ_EXIT + +#define IRQ_DISABLE SOFTIRQ_DISABLE +#define IRQ_ENABLE SOFTIRQ_ENABLE +#define IRQ_ENTER SOFTIRQ_ENTER +#define IRQ_EXIT SOFTIRQ_EXIT diff --git a/lib/locking-selftest-spin-hardirq.h b/lib/locking-selftest-spin-hardirq.h new file mode 100644 index 000000000000..693198dce30a --- /dev/null +++ b/lib/locking-selftest-spin-hardirq.h @@ -0,0 +1,2 @@ +#include "locking-selftest-spin.h" +#include "locking-selftest-hardirq.h" diff --git a/lib/locking-selftest-spin-softirq.h b/lib/locking-selftest-spin-softirq.h new file mode 100644 index 000000000000..c472e2a87ffc --- /dev/null +++ b/lib/locking-selftest-spin-softirq.h @@ -0,0 +1,2 @@ +#include "locking-selftest-spin.h" +#include "locking-selftest-softirq.h" diff --git a/lib/locking-selftest-spin.h b/lib/locking-selftest-spin.h new file mode 100644 index 000000000000..ccd1b4b09757 --- /dev/null +++ b/lib/locking-selftest-spin.h @@ -0,0 +1,11 @@ +#undef LOCK +#define LOCK L + +#undef UNLOCK +#define UNLOCK U + +#undef RLOCK +#undef WLOCK + +#undef INIT +#define INIT SI diff --git a/lib/locking-selftest-wlock-hardirq.h b/lib/locking-selftest-wlock-hardirq.h new file mode 100644 index 000000000000..2dd2e5122caa --- /dev/null +++ b/lib/locking-selftest-wlock-hardirq.h @@ -0,0 +1,2 @@ +#include "locking-selftest-wlock.h" +#include "locking-selftest-hardirq.h" diff --git a/lib/locking-selftest-wlock-softirq.h b/lib/locking-selftest-wlock-softirq.h new file mode 100644 index 000000000000..cb80d1cb944e --- /dev/null +++ b/lib/locking-selftest-wlock-softirq.h @@ -0,0 +1,2 @@ +#include "locking-selftest-wlock.h" +#include "locking-selftest-softirq.h" diff --git a/lib/locking-selftest-wlock.h b/lib/locking-selftest-wlock.h new file mode 100644 index 000000000000..0815322d99ed --- /dev/null +++ b/lib/locking-selftest-wlock.h @@ -0,0 +1,14 @@ +#undef LOCK +#define LOCK WL + +#undef UNLOCK +#define UNLOCK WU + +#undef RLOCK +#define RLOCK RL + +#undef WLOCK +#define WLOCK WL + +#undef INIT +#define INIT RWI diff --git a/lib/locking-selftest-wsem.h b/lib/locking-selftest-wsem.h new file mode 100644 index 000000000000..b88c5f2dc5f0 --- /dev/null +++ b/lib/locking-selftest-wsem.h @@ -0,0 +1,14 @@ +#undef LOCK +#define LOCK WSL + +#undef UNLOCK +#define UNLOCK WSU + +#undef RLOCK +#define RLOCK RSL + +#undef WLOCK +#define WLOCK WSL + +#undef INIT +#define INIT RWSI diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c new file mode 100644 index 000000000000..7945787f439a --- /dev/null +++ b/lib/locking-selftest.c @@ -0,0 +1,1216 @@ +/* + * lib/locking-selftest.c + * + * Testsuite for various locking APIs: spinlocks, rwlocks, + * mutexes and rw-semaphores. + * + * It is checking both false positives and false negatives. + * + * Started by Ingo Molnar: + * + * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + */ +#include <linux/rwsem.h> +#include <linux/mutex.h> +#include <linux/sched.h> +#include <linux/delay.h> +#include <linux/module.h> +#include <linux/lockdep.h> +#include <linux/spinlock.h> +#include <linux/kallsyms.h> +#include <linux/interrupt.h> +#include <linux/debug_locks.h> +#include <linux/irqflags.h> + +/* + * Change this to 1 if you want to see the failure printouts: + */ +static unsigned int debug_locks_verbose; + +static int __init setup_debug_locks_verbose(char *str) +{ + get_option(&str, &debug_locks_verbose); + + return 1; +} + +__setup("debug_locks_verbose=", setup_debug_locks_verbose); + +#define FAILURE 0 +#define SUCCESS 1 + +#define LOCKTYPE_SPIN 0x1 +#define LOCKTYPE_RWLOCK 0x2 +#define LOCKTYPE_MUTEX 0x4 +#define LOCKTYPE_RWSEM 0x8 + +/* + * Normal standalone locks, for the circular and irq-context + * dependency tests: + */ +static DEFINE_SPINLOCK(lock_A); +static DEFINE_SPINLOCK(lock_B); +static DEFINE_SPINLOCK(lock_C); +static DEFINE_SPINLOCK(lock_D); + +static DEFINE_RWLOCK(rwlock_A); +static DEFINE_RWLOCK(rwlock_B); +static DEFINE_RWLOCK(rwlock_C); +static DEFINE_RWLOCK(rwlock_D); + +static DEFINE_MUTEX(mutex_A); +static DEFINE_MUTEX(mutex_B); +static DEFINE_MUTEX(mutex_C); +static DEFINE_MUTEX(mutex_D); + +static DECLARE_RWSEM(rwsem_A); +static DECLARE_RWSEM(rwsem_B); +static DECLARE_RWSEM(rwsem_C); +static DECLARE_RWSEM(rwsem_D); + +/* + * Locks that we initialize dynamically as well so that + * e.g. X1 and X2 becomes two instances of the same class, + * but X* and Y* are different classes. We do this so that + * we do not trigger a real lockup: + */ +static DEFINE_SPINLOCK(lock_X1); +static DEFINE_SPINLOCK(lock_X2); +static DEFINE_SPINLOCK(lock_Y1); +static DEFINE_SPINLOCK(lock_Y2); +static DEFINE_SPINLOCK(lock_Z1); +static DEFINE_SPINLOCK(lock_Z2); + +static DEFINE_RWLOCK(rwlock_X1); +static DEFINE_RWLOCK(rwlock_X2); +static DEFINE_RWLOCK(rwlock_Y1); +static DEFINE_RWLOCK(rwlock_Y2); +static DEFINE_RWLOCK(rwlock_Z1); +static DEFINE_RWLOCK(rwlock_Z2); + +static DEFINE_MUTEX(mutex_X1); +static DEFINE_MUTEX(mutex_X2); +static DEFINE_MUTEX(mutex_Y1); +static DEFINE_MUTEX(mutex_Y2); +static DEFINE_MUTEX(mutex_Z1); +static DEFINE_MUTEX(mutex_Z2); + +static DECLARE_RWSEM(rwsem_X1); +static DECLARE_RWSEM(rwsem_X2); +static DECLARE_RWSEM(rwsem_Y1); +static DECLARE_RWSEM(rwsem_Y2); +static DECLARE_RWSEM(rwsem_Z1); +static DECLARE_RWSEM(rwsem_Z2); + +/* + * non-inlined runtime initializers, to let separate locks share + * the same lock-class: + */ +#define INIT_CLASS_FUNC(class) \ +static noinline void \ +init_class_##class(spinlock_t *lock, rwlock_t *rwlock, struct mutex *mutex, \ + struct rw_semaphore *rwsem) \ +{ \ + spin_lock_init(lock); \ + rwlock_init(rwlock); \ + mutex_init(mutex); \ + init_rwsem(rwsem); \ +} + +INIT_CLASS_FUNC(X) +INIT_CLASS_FUNC(Y) +INIT_CLASS_FUNC(Z) + +static void init_shared_classes(void) +{ + init_class_X(&lock_X1, &rwlock_X1, &mutex_X1, &rwsem_X1); + init_class_X(&lock_X2, &rwlock_X2, &mutex_X2, &rwsem_X2); + + init_class_Y(&lock_Y1, &rwlock_Y1, &mutex_Y1, &rwsem_Y1); + init_class_Y(&lock_Y2, &rwlock_Y2, &mutex_Y2, &rwsem_Y2); + + init_class_Z(&lock_Z1, &rwlock_Z1, &mutex_Z1, &rwsem_Z1); + init_class_Z(&lock_Z2, &rwlock_Z2, &mutex_Z2, &rwsem_Z2); +} + +/* + * For spinlocks and rwlocks we also do hardirq-safe / softirq-safe tests. + * The following functions use a lock from a simulated hardirq/softirq + * context, causing the locks to be marked as hardirq-safe/softirq-safe: + */ + +#define HARDIRQ_DISABLE local_irq_disable +#define HARDIRQ_ENABLE local_irq_enable + +#define HARDIRQ_ENTER() \ + local_irq_disable(); \ + irq_enter(); \ + WARN_ON(!in_irq()); + +#define HARDIRQ_EXIT() \ + __irq_exit(); \ + local_irq_enable(); + +#define SOFTIRQ_DISABLE local_bh_disable +#define SOFTIRQ_ENABLE local_bh_enable + +#define SOFTIRQ_ENTER() \ + local_bh_disable(); \ + local_irq_disable(); \ + trace_softirq_enter(); \ + WARN_ON(!in_softirq()); + +#define SOFTIRQ_EXIT() \ + trace_softirq_exit(); \ + local_irq_enable(); \ + local_bh_enable(); + +/* + * Shortcuts for lock/unlock API variants, to keep + * the testcases compact: + */ +#define L(x) spin_lock(&lock_##x) +#define U(x) spin_unlock(&lock_##x) +#define LU(x) L(x); U(x) +#define SI(x) spin_lock_init(&lock_##x) + +#define WL(x) write_lock(&rwlock_##x) +#define WU(x) write_unlock(&rwlock_##x) +#define WLU(x) WL(x); WU(x) + +#define RL(x) read_lock(&rwlock_##x) +#define RU(x) read_unlock(&rwlock_##x) +#define RLU(x) RL(x); RU(x) +#define RWI(x) rwlock_init(&rwlock_##x) + +#define ML(x) mutex_lock(&mutex_##x) +#define MU(x) mutex_unlock(&mutex_##x) +#define MI(x) mutex_init(&mutex_##x) + +#define WSL(x) down_write(&rwsem_##x) +#define WSU(x) up_write(&rwsem_##x) + +#define RSL(x) down_read(&rwsem_##x) +#define RSU(x) up_read(&rwsem_##x) +#define RWSI(x) init_rwsem(&rwsem_##x) + +#define LOCK_UNLOCK_2(x,y) LOCK(x); LOCK(y); UNLOCK(y); UNLOCK(x) + +/* + * Generate different permutations of the same testcase, using + * the same basic lock-dependency/state events: + */ + +#define GENERATE_TESTCASE(name) \ + \ +static void name(void) { E(); } + +#define GENERATE_PERMUTATIONS_2_EVENTS(name) \ + \ +static void name##_12(void) { E1(); E2(); } \ +static void name##_21(void) { E2(); E1(); } + +#define GENERATE_PERMUTATIONS_3_EVENTS(name) \ + \ +static void name##_123(void) { E1(); E2(); E3(); } \ +static void name##_132(void) { E1(); E3(); E2(); } \ +static void name##_213(void) { E2(); E1(); E3(); } \ +static void name##_231(void) { E2(); E3(); E1(); } \ +static void name##_312(void) { E3(); E1(); E2(); } \ +static void name##_321(void) { E3(); E2(); E1(); } + +/* + * AA deadlock: + */ + +#define E() \ + \ + LOCK(X1); \ + LOCK(X2); /* this one should fail */ + +/* + * 6 testcases: + */ +#include "locking-selftest-spin.h" +GENERATE_TESTCASE(AA_spin) +#include "locking-selftest-wlock.h" +GENERATE_TESTCASE(AA_wlock) +#include "locking-selftest-rlock.h" +GENERATE_TESTCASE(AA_rlock) +#include "locking-selftest-mutex.h" +GENERATE_TESTCASE(AA_mutex) +#include "locking-selftest-wsem.h" +GENERATE_TESTCASE(AA_wsem) +#include "locking-selftest-rsem.h" +GENERATE_TESTCASE(AA_rsem) + +#undef E + +/* + * Special-case for read-locking, they are + * allowed to recurse on the same lock class: + */ +static void rlock_AA1(void) +{ + RL(X1); + RL(X1); // this one should NOT fail +} + +static void rlock_AA1B(void) +{ + RL(X1); + RL(X2); // this one should NOT fail +} + +static void rsem_AA1(void) +{ + RSL(X1); + RSL(X1); // this one should fail +} + +static void rsem_AA1B(void) +{ + RSL(X1); + RSL(X2); // this one should fail +} +/* + * The mixing of read and write locks is not allowed: + */ +static void rlock_AA2(void) +{ + RL(X1); + WL(X2); // this one should fail +} + +static void rsem_AA2(void) +{ + RSL(X1); + WSL(X2); // this one should fail +} + +static void rlock_AA3(void) +{ + WL(X1); + RL(X2); // this one should fail +} + +static void rsem_AA3(void) +{ + WSL(X1); + RSL(X2); // this one should fail +} + +/* + * ABBA deadlock: + */ + +#define E() \ + \ + LOCK_UNLOCK_2(A, B); \ + LOCK_UNLOCK_2(B, A); /* fail */ + +/* + * 6 testcases: + */ +#include "locking-selftest-spin.h" +GENERATE_TESTCASE(ABBA_spin) +#include "locking-selftest-wlock.h" +GENERATE_TESTCASE(ABBA_wlock) +#include "locking-selftest-rlock.h" +GENERATE_TESTCASE(ABBA_rlock) +#include "locking-selftest-mutex.h" +GENERATE_TESTCASE(ABBA_mutex) +#include "locking-selftest-wsem.h" +GENERATE_TESTCASE(ABBA_wsem) +#include "locking-selftest-rsem.h" +GENERATE_TESTCASE(ABBA_rsem) + +#undef E + +/* + * AB BC CA deadlock: + */ + +#define E() \ + \ + LOCK_UNLOCK_2(A, B); \ + LOCK_UNLOCK_2(B, C); \ + LOCK_UNLOCK_2(C, A); /* fail */ + +/* + * 6 testcases: + */ +#include "locking-selftest-spin.h" +GENERATE_TESTCASE(ABBCCA_spin) +#include "locking-selftest-wlock.h" +GENERATE_TESTCASE(ABBCCA_wlock) +#include "locking-selftest-rlock.h" +GENERATE_TESTCASE(ABBCCA_rlock) +#include "locking-selftest-mutex.h" +GENERATE_TESTCASE(ABBCCA_mutex) +#include "locking-selftest-wsem.h" +GENERATE_TESTCASE(ABBCCA_wsem) +#include "locking-selftest-rsem.h" +GENERATE_TESTCASE(ABBCCA_rsem) + +#undef E + +/* + * AB CA BC deadlock: + */ + +#define E() \ + \ + LOCK_UNLOCK_2(A, B); \ + LOCK_UNLOCK_2(C, A); \ + LOCK_UNLOCK_2(B, C); /* fail */ + +/* + * 6 testcases: + */ +#include "locking-selftest-spin.h" +GENERATE_TESTCASE(ABCABC_spin) +#include "locking-selftest-wlock.h" +GENERATE_TESTCASE(ABCABC_wlock) +#include "locking-selftest-rlock.h" +GENERATE_TESTCASE(ABCABC_rlock) +#include "locking-selftest-mutex.h" +GENERATE_TESTCASE(ABCABC_mutex) +#include "locking-selftest-wsem.h" +GENERATE_TESTCASE(ABCABC_wsem) +#include "locking-selftest-rsem.h" +GENERATE_TESTCASE(ABCABC_rsem) + +#undef E + +/* + * AB BC CD DA deadlock: + */ + +#define E() \ + \ + LOCK_UNLOCK_2(A, B); \ + LOCK_UNLOCK_2(B, C); \ + LOCK_UNLOCK_2(C, D); \ + LOCK_UNLOCK_2(D, A); /* fail */ + +/* + * 6 testcases: + */ +#include "locking-selftest-spin.h" +GENERATE_TESTCASE(ABBCCDDA_spin) +#include "locking-selftest-wlock.h" +GENERATE_TESTCASE(ABBCCDDA_wlock) +#include "locking-selftest-rlock.h" +GENERATE_TESTCASE(ABBCCDDA_rlock) +#include "locking-selftest-mutex.h" +GENERATE_TESTCASE(ABBCCDDA_mutex) +#include "locking-selftest-wsem.h" +GENERATE_TESTCASE(ABBCCDDA_wsem) +#include "locking-selftest-rsem.h" +GENERATE_TESTCASE(ABBCCDDA_rsem) + +#undef E + +/* + * AB CD BD DA deadlock: + */ +#define E() \ + \ + LOCK_UNLOCK_2(A, B); \ + LOCK_UNLOCK_2(C, D); \ + LOCK_UNLOCK_2(B, D); \ + LOCK_UNLOCK_2(D, A); /* fail */ + +/* + * 6 testcases: + */ +#include "locking-selftest-spin.h" +GENERATE_TESTCASE(ABCDBDDA_spin) +#include "locking-selftest-wlock.h" +GENERATE_TESTCASE(ABCDBDDA_wlock) +#include "locking-selftest-rlock.h" +GENERATE_TESTCASE(ABCDBDDA_rlock) +#include "locking-selftest-mutex.h" +GENERATE_TESTCASE(ABCDBDDA_mutex) +#include "locking-selftest-wsem.h" +GENERATE_TESTCASE(ABCDBDDA_wsem) +#include "locking-selftest-rsem.h" +GENERATE_TESTCASE(ABCDBDDA_rsem) + +#undef E + +/* + * AB CD BC DA deadlock: + */ +#define E() \ + \ + LOCK_UNLOCK_2(A, B); \ + LOCK_UNLOCK_2(C, D); \ + LOCK_UNLOCK_2(B, C); \ + LOCK_UNLOCK_2(D, A); /* fail */ + +/* + * 6 testcases: + */ +#include "locking-selftest-spin.h" +GENERATE_TESTCASE(ABCDBCDA_spin) +#include "locking-selftest-wlock.h" +GENERATE_TESTCASE(ABCDBCDA_wlock) +#include "locking-selftest-rlock.h" +GENERATE_TESTCASE(ABCDBCDA_rlock) +#include "locking-selftest-mutex.h" +GENERATE_TESTCASE(ABCDBCDA_mutex) +#include "locking-selftest-wsem.h" +GENERATE_TESTCASE(ABCDBCDA_wsem) +#include "locking-selftest-rsem.h" +GENERATE_TESTCASE(ABCDBCDA_rsem) + +#undef E + +/* + * Double unlock: + */ +#define E() \ + \ + LOCK(A); \ + UNLOCK(A); \ + UNLOCK(A); /* fail */ + +/* + * 6 testcases: + */ +#include "locking-selftest-spin.h" +GENERATE_TESTCASE(double_unlock_spin) +#include "locking-selftest-wlock.h" +GENERATE_TESTCASE(double_unlock_wlock) +#include "locking-selftest-rlock.h" +GENERATE_TESTCASE(double_unlock_rlock) +#include "locking-selftest-mutex.h" +GENERATE_TESTCASE(double_unlock_mutex) +#include "locking-selftest-wsem.h" +GENERATE_TESTCASE(double_unlock_wsem) +#include "locking-selftest-rsem.h" +GENERATE_TESTCASE(double_unlock_rsem) + +#undef E + +/* + * Bad unlock ordering: + */ +#define E() \ + \ + LOCK(A); \ + LOCK(B); \ + UNLOCK(A); /* fail */ \ + UNLOCK(B); + +/* + * 6 testcases: + */ +#include "locking-selftest-spin.h" +GENERATE_TESTCASE(bad_unlock_order_spin) +#include "locking-selftest-wlock.h" +GENERATE_TESTCASE(bad_unlock_order_wlock) +#include "locking-selftest-rlock.h" +GENERATE_TESTCASE(bad_unlock_order_rlock) +#include "locking-selftest-mutex.h" +GENERATE_TESTCASE(bad_unlock_order_mutex) +#include "locking-selftest-wsem.h" +GENERATE_TESTCASE(bad_unlock_order_wsem) +#include "locking-selftest-rsem.h" +GENERATE_TESTCASE(bad_unlock_order_rsem) + +#undef E + +/* + * initializing a held lock: + */ +#define E() \ + \ + LOCK(A); \ + INIT(A); /* fail */ + +/* + * 6 testcases: + */ +#include "locking-selftest-spin.h" +GENERATE_TESTCASE(init_held_spin) +#include "locking-selftest-wlock.h" +GENERATE_TESTCASE(init_held_wlock) +#include "locking-selftest-rlock.h" +GENERATE_TESTCASE(init_held_rlock) +#include "locking-selftest-mutex.h" +GENERATE_TESTCASE(init_held_mutex) +#include "locking-selftest-wsem.h" +GENERATE_TESTCASE(init_held_wsem) +#include "locking-selftest-rsem.h" +GENERATE_TESTCASE(init_held_rsem) + +#undef E + +/* + * locking an irq-safe lock with irqs enabled: + */ +#define E1() \ + \ + IRQ_ENTER(); \ + LOCK(A); \ + UNLOCK(A); \ + IRQ_EXIT(); + +#define E2() \ + \ + LOCK(A); \ + UNLOCK(A); + +/* + * Generate 24 testcases: + */ +#include "locking-selftest-spin-hardirq.h" +GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_spin) + +#include "locking-selftest-rlock-hardirq.h" +GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_rlock) + +#include "locking-selftest-wlock-hardirq.h" +GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_wlock) + +#include "locking-selftest-spin-softirq.h" +GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_spin) + +#include "locking-selftest-rlock-softirq.h" +GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_rlock) + +#include "locking-selftest-wlock-softirq.h" +GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_wlock) + +#undef E1 +#undef E2 + +/* + * Enabling hardirqs with a softirq-safe lock held: + */ +#define E1() \ + \ + SOFTIRQ_ENTER(); \ + LOCK(A); \ + UNLOCK(A); \ + SOFTIRQ_EXIT(); + +#define E2() \ + \ + HARDIRQ_DISABLE(); \ + LOCK(A); \ + HARDIRQ_ENABLE(); \ + UNLOCK(A); + +/* + * Generate 12 testcases: + */ +#include "locking-selftest-spin.h" +GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_spin) + +#include "locking-selftest-wlock.h" +GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_wlock) + +#include "locking-selftest-rlock.h" +GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock) + +#undef E1 +#undef E2 + +/* + * Enabling irqs with an irq-safe lock held: + */ +#define E1() \ + \ + IRQ_ENTER(); \ + LOCK(A); \ + UNLOCK(A); \ + IRQ_EXIT(); + +#define E2() \ + \ + IRQ_DISABLE(); \ + LOCK(A); \ + IRQ_ENABLE(); \ + UNLOCK(A); + +/* + * Generate 24 testcases: + */ +#include "locking-selftest-spin-hardirq.h" +GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_spin) + +#include "locking-selftest-rlock-hardirq.h" +GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_rlock) + +#include "locking-selftest-wlock-hardirq.h" +GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_wlock) + +#include "locking-selftest-spin-softirq.h" +GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_spin) + +#include "locking-selftest-rlock-softirq.h" +GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_rlock) + +#include "locking-selftest-wlock-softirq.h" +GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock) + +#undef E1 +#undef E2 + +/* + * Acquiring a irq-unsafe lock while holding an irq-safe-lock: + */ +#define E1() \ + \ + LOCK(A); \ + LOCK(B); \ + UNLOCK(B); \ + UNLOCK(A); \ + +#define E2() \ + \ + LOCK(B); \ + UNLOCK(B); + +#define E3() \ + \ + IRQ_ENTER(); \ + LOCK(A); \ + UNLOCK(A); \ + IRQ_EXIT(); + +/* + * Generate 36 testcases: + */ +#include "locking-selftest-spin-hardirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_spin) + +#include "locking-selftest-rlock-hardirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_rlock) + +#include "locking-selftest-wlock-hardirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_wlock) + +#include "locking-selftest-spin-softirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_spin) + +#include "locking-selftest-rlock-softirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_rlock) + +#include "locking-selftest-wlock-softirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock) + +#undef E1 +#undef E2 +#undef E3 + +/* + * If a lock turns into softirq-safe, but earlier it took + * a softirq-unsafe lock: + */ + +#define E1() \ + IRQ_DISABLE(); \ + LOCK(A); \ + LOCK(B); \ + UNLOCK(B); \ + UNLOCK(A); \ + IRQ_ENABLE(); + +#define E2() \ + LOCK(B); \ + UNLOCK(B); + +#define E3() \ + IRQ_ENTER(); \ + LOCK(A); \ + UNLOCK(A); \ + IRQ_EXIT(); + +/* + * Generate 36 testcases: + */ +#include "locking-selftest-spin-hardirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_spin) + +#include "locking-selftest-rlock-hardirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_rlock) + +#include "locking-selftest-wlock-hardirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_wlock) + +#include "locking-selftest-spin-softirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_spin) + +#include "locking-selftest-rlock-softirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_rlock) + +#include "locking-selftest-wlock-softirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_wlock) + +#undef E1 +#undef E2 +#undef E3 + +/* + * read-lock / write-lock irq inversion. + * + * Deadlock scenario: + * + * CPU#1 is at #1, i.e. it has write-locked A, but has not + * taken B yet. + * + * CPU#2 is at #2, i.e. it has locked B. + * + * Hardirq hits CPU#2 at point #2 and is trying to read-lock A. + * + * The deadlock occurs because CPU#1 will spin on B, and CPU#2 + * will spin on A. + */ + +#define E1() \ + \ + IRQ_DISABLE(); \ + WL(A); \ + LOCK(B); \ + UNLOCK(B); \ + WU(A); \ + IRQ_ENABLE(); + +#define E2() \ + \ + LOCK(B); \ + UNLOCK(B); + +#define E3() \ + \ + IRQ_ENTER(); \ + RL(A); \ + RU(A); \ + IRQ_EXIT(); + +/* + * Generate 36 testcases: + */ +#include "locking-selftest-spin-hardirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_hard_spin) + +#include "locking-selftest-rlock-hardirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_hard_rlock) + +#include "locking-selftest-wlock-hardirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_hard_wlock) + +#include "locking-selftest-spin-softirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_spin) + +#include "locking-selftest-rlock-softirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_rlock) + +#include "locking-selftest-wlock-softirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_wlock) + +#undef E1 +#undef E2 +#undef E3 + +/* + * read-lock / write-lock recursion that is actually safe. + */ + +#define E1() \ + \ + IRQ_DISABLE(); \ + WL(A); \ + WU(A); \ + IRQ_ENABLE(); + +#define E2() \ + \ + RL(A); \ + RU(A); \ + +#define E3() \ + \ + IRQ_ENTER(); \ + RL(A); \ + L(B); \ + U(B); \ + RU(A); \ + IRQ_EXIT(); + +/* + * Generate 12 testcases: + */ +#include "locking-selftest-hardirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_hard) + +#include "locking-selftest-softirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft) + +#undef E1 +#undef E2 +#undef E3 + +/* + * read-lock / write-lock recursion that is unsafe. + */ + +#define E1() \ + \ + IRQ_DISABLE(); \ + L(B); \ + WL(A); \ + WU(A); \ + U(B); \ + IRQ_ENABLE(); + +#define E2() \ + \ + RL(A); \ + RU(A); \ + +#define E3() \ + \ + IRQ_ENTER(); \ + L(B); \ + U(B); \ + IRQ_EXIT(); + +/* + * Generate 12 testcases: + */ +#include "locking-selftest-hardirq.h" +// GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_hard) + +#include "locking-selftest-softirq.h" +// GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_soft) + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define I_SPINLOCK(x) lockdep_reset_lock(&lock_##x.dep_map) +# define I_RWLOCK(x) lockdep_reset_lock(&rwlock_##x.dep_map) +# define I_MUTEX(x) lockdep_reset_lock(&mutex_##x.dep_map) +# define I_RWSEM(x) lockdep_reset_lock(&rwsem_##x.dep_map) +#else +# define I_SPINLOCK(x) +# define I_RWLOCK(x) +# define I_MUTEX(x) +# define I_RWSEM(x) +#endif + +#define I1(x) \ + do { \ + I_SPINLOCK(x); \ + I_RWLOCK(x); \ + I_MUTEX(x); \ + I_RWSEM(x); \ + } while (0) + +#define I2(x) \ + do { \ + spin_lock_init(&lock_##x); \ + rwlock_init(&rwlock_##x); \ + mutex_init(&mutex_##x); \ + init_rwsem(&rwsem_##x); \ + } while (0) + +static void reset_locks(void) +{ + local_irq_disable(); + I1(A); I1(B); I1(C); I1(D); + I1(X1); I1(X2); I1(Y1); I1(Y2); I1(Z1); I1(Z2); + lockdep_reset(); + I2(A); I2(B); I2(C); I2(D); + init_shared_classes(); + local_irq_enable(); +} + +#undef I + +static int testcase_total; +static int testcase_successes; +static int expected_testcase_failures; +static int unexpected_testcase_failures; + +static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask) +{ + unsigned long saved_preempt_count = preempt_count(); + int expected_failure = 0; + + WARN_ON(irqs_disabled()); + + testcase_fn(); + /* + * Filter out expected failures: + */ +#ifndef CONFIG_PROVE_LOCKING + if ((lockclass_mask & LOCKTYPE_SPIN) && debug_locks != expected) + expected_failure = 1; + if ((lockclass_mask & LOCKTYPE_RWLOCK) && debug_locks != expected) + expected_failure = 1; + if ((lockclass_mask & LOCKTYPE_MUTEX) && debug_locks != expected) + expected_failure = 1; + if ((lockclass_mask & LOCKTYPE_RWSEM) && debug_locks != expected) + expected_failure = 1; +#endif + if (debug_locks != expected) { + if (expected_failure) { + expected_testcase_failures++; + printk("failed|"); + } else { + unexpected_testcase_failures++; + printk("FAILED|"); + } + } else { + testcase_successes++; + printk(" ok |"); + } + testcase_total++; + + if (debug_locks_verbose) + printk(" lockclass mask: %x, debug_locks: %d, expected: %d\n", + lockclass_mask, debug_locks, expected); + /* + * Some tests (e.g. double-unlock) might corrupt the preemption + * count, so restore it: + */ + preempt_count() = saved_preempt_count; +#ifdef CONFIG_TRACE_IRQFLAGS + if (softirq_count()) + current->softirqs_enabled = 0; + else + current->softirqs_enabled = 1; +#endif + + reset_locks(); +} + +static inline void print_testname(const char *testname) +{ + printk("%33s:", testname); +} + +#define DO_TESTCASE_1(desc, name, nr) \ + print_testname(desc"/"#nr); \ + dotest(name##_##nr, SUCCESS, LOCKTYPE_RWLOCK); \ + printk("\n"); + +#define DO_TESTCASE_1B(desc, name, nr) \ + print_testname(desc"/"#nr); \ + dotest(name##_##nr, FAILURE, LOCKTYPE_RWLOCK); \ + printk("\n"); + +#define DO_TESTCASE_3(desc, name, nr) \ + print_testname(desc"/"#nr); \ + dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN); \ + dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK); \ + dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK); \ + printk("\n"); + +#define DO_TESTCASE_3RW(desc, name, nr) \ + print_testname(desc"/"#nr); \ + dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN|LOCKTYPE_RWLOCK);\ + dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK); \ + dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK); \ + printk("\n"); + +#define DO_TESTCASE_6(desc, name) \ + print_testname(desc); \ + dotest(name##_spin, FAILURE, LOCKTYPE_SPIN); \ + dotest(name##_wlock, FAILURE, LOCKTYPE_RWLOCK); \ + dotest(name##_rlock, FAILURE, LOCKTYPE_RWLOCK); \ + dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX); \ + dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM); \ + dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM); \ + printk("\n"); + +#define DO_TESTCASE_6_SUCCESS(desc, name) \ + print_testname(desc); \ + dotest(name##_spin, SUCCESS, LOCKTYPE_SPIN); \ + dotest(name##_wlock, SUCCESS, LOCKTYPE_RWLOCK); \ + dotest(name##_rlock, SUCCESS, LOCKTYPE_RWLOCK); \ + dotest(name##_mutex, SUCCESS, LOCKTYPE_MUTEX); \ + dotest(name##_wsem, SUCCESS, LOCKTYPE_RWSEM); \ + dotest(name##_rsem, SUCCESS, LOCKTYPE_RWSEM); \ + printk("\n"); + +/* + * 'read' variant: rlocks must not trigger. + */ +#define DO_TESTCASE_6R(desc, name) \ + print_testname(desc); \ + dotest(name##_spin, FAILURE, LOCKTYPE_SPIN); \ + dotest(name##_wlock, FAILURE, LOCKTYPE_RWLOCK); \ + dotest(name##_rlock, SUCCESS, LOCKTYPE_RWLOCK); \ + dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX); \ + dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM); \ + dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM); \ + printk("\n"); + +#define DO_TESTCASE_2I(desc, name, nr) \ + DO_TESTCASE_1("hard-"desc, name##_hard, nr); \ + DO_TESTCASE_1("soft-"desc, name##_soft, nr); + +#define DO_TESTCASE_2IB(desc, name, nr) \ + DO_TESTCASE_1B("hard-"desc, name##_hard, nr); \ + DO_TESTCASE_1B("soft-"desc, name##_soft, nr); + +#define DO_TESTCASE_6I(desc, name, nr) \ + DO_TESTCASE_3("hard-"desc, name##_hard, nr); \ + DO_TESTCASE_3("soft-"desc, name##_soft, nr); + +#define DO_TESTCASE_6IRW(desc, name, nr) \ + DO_TESTCASE_3RW("hard-"desc, name##_hard, nr); \ + DO_TESTCASE_3RW("soft-"desc, name##_soft, nr); + +#define DO_TESTCASE_2x3(desc, name) \ + DO_TESTCASE_3(desc, name, 12); \ + DO_TESTCASE_3(desc, name, 21); + +#define DO_TESTCASE_2x6(desc, name) \ + DO_TESTCASE_6I(desc, name, 12); \ + DO_TESTCASE_6I(desc, name, 21); + +#define DO_TESTCASE_6x2(desc, name) \ + DO_TESTCASE_2I(desc, name, 123); \ + DO_TESTCASE_2I(desc, name, 132); \ + DO_TESTCASE_2I(desc, name, 213); \ + DO_TESTCASE_2I(desc, name, 231); \ + DO_TESTCASE_2I(desc, name, 312); \ + DO_TESTCASE_2I(desc, name, 321); + +#define DO_TESTCASE_6x2B(desc, name) \ + DO_TESTCASE_2IB(desc, name, 123); \ + DO_TESTCASE_2IB(desc, name, 132); \ + DO_TESTCASE_2IB(desc, name, 213); \ + DO_TESTCASE_2IB(desc, name, 231); \ + DO_TESTCASE_2IB(desc, name, 312); \ + DO_TESTCASE_2IB(desc, name, 321); + +#define DO_TESTCASE_6x6(desc, name) \ + DO_TESTCASE_6I(desc, name, 123); \ + DO_TESTCASE_6I(desc, name, 132); \ + DO_TESTCASE_6I(desc, name, 213); \ + DO_TESTCASE_6I(desc, name, 231); \ + DO_TESTCASE_6I(desc, name, 312); \ + DO_TESTCASE_6I(desc, name, 321); + +#define DO_TESTCASE_6x6RW(desc, name) \ + DO_TESTCASE_6IRW(desc, name, 123); \ + DO_TESTCASE_6IRW(desc, name, 132); \ + DO_TESTCASE_6IRW(desc, name, 213); \ + DO_TESTCASE_6IRW(desc, name, 231); \ + DO_TESTCASE_6IRW(desc, name, 312); \ + DO_TESTCASE_6IRW(desc, name, 321); + + +void locking_selftest(void) +{ + /* + * Got a locking failure before the selftest ran? + */ + if (!debug_locks) { + printk("----------------------------------\n"); + printk("| Locking API testsuite disabled |\n"); + printk("----------------------------------\n"); + return; + } + + /* + * Run the testsuite: + */ + printk("------------------------\n"); + printk("| Locking API testsuite:\n"); + printk("----------------------------------------------------------------------------\n"); + printk(" | spin |wlock |rlock |mutex | wsem | rsem |\n"); + printk(" --------------------------------------------------------------------------\n"); + + init_shared_classes(); + debug_locks_silent = !debug_locks_verbose; + + DO_TESTCASE_6R("A-A deadlock", AA); + DO_TESTCASE_6R("A-B-B-A deadlock", ABBA); + DO_TESTCASE_6R("A-B-B-C-C-A deadlock", ABBCCA); + DO_TESTCASE_6R("A-B-C-A-B-C deadlock", ABCABC); + DO_TESTCASE_6R("A-B-B-C-C-D-D-A deadlock", ABBCCDDA); + DO_TESTCASE_6R("A-B-C-D-B-D-D-A deadlock", ABCDBDDA); + DO_TESTCASE_6R("A-B-C-D-B-C-D-A deadlock", ABCDBCDA); + DO_TESTCASE_6("double unlock", double_unlock); + DO_TESTCASE_6("initialize held", init_held); + DO_TESTCASE_6_SUCCESS("bad unlock order", bad_unlock_order); + + printk(" --------------------------------------------------------------------------\n"); + print_testname("recursive read-lock"); + printk(" |"); + dotest(rlock_AA1, SUCCESS, LOCKTYPE_RWLOCK); + printk(" |"); + dotest(rsem_AA1, FAILURE, LOCKTYPE_RWSEM); + printk("\n"); + + print_testname("recursive read-lock #2"); + printk(" |"); + dotest(rlock_AA1B, SUCCESS, LOCKTYPE_RWLOCK); + printk(" |"); + dotest(rsem_AA1B, FAILURE, LOCKTYPE_RWSEM); + printk("\n"); + + print_testname("mixed read-write-lock"); + printk(" |"); + dotest(rlock_AA2, FAILURE, LOCKTYPE_RWLOCK); + printk(" |"); + dotest(rsem_AA2, FAILURE, LOCKTYPE_RWSEM); + printk("\n"); + + print_testname("mixed write-read-lock"); + printk(" |"); + dotest(rlock_AA3, FAILURE, LOCKTYPE_RWLOCK); + printk(" |"); + dotest(rsem_AA3, FAILURE, LOCKTYPE_RWSEM); + printk("\n"); + + printk(" --------------------------------------------------------------------------\n"); + + /* + * irq-context testcases: + */ + DO_TESTCASE_2x6("irqs-on + irq-safe-A", irqsafe1); + DO_TESTCASE_2x3("sirq-safe-A => hirqs-on", irqsafe2A); + DO_TESTCASE_2x6("safe-A + irqs-on", irqsafe2B); + DO_TESTCASE_6x6("safe-A + unsafe-B #1", irqsafe3); + DO_TESTCASE_6x6("safe-A + unsafe-B #2", irqsafe4); + DO_TESTCASE_6x6RW("irq lock-inversion", irq_inversion); + + DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion); +// DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2); + + if (unexpected_testcase_failures) { + printk("-----------------------------------------------------------------\n"); + debug_locks = 0; + printk("BUG: %3d unexpected failures (out of %3d) - debugging disabled! |\n", + unexpected_testcase_failures, testcase_total); + printk("-----------------------------------------------------------------\n"); + } else if (expected_testcase_failures && testcase_successes) { + printk("--------------------------------------------------------\n"); + printk("%3d out of %3d testcases failed, as expected. |\n", + expected_testcase_failures, testcase_total); + printk("----------------------------------------------------\n"); + debug_locks = 1; + } else if (expected_testcase_failures && !testcase_successes) { + printk("--------------------------------------------------------\n"); + printk("All %3d testcases failed, as expected. |\n", + expected_testcase_failures); + printk("----------------------------------------\n"); + debug_locks = 1; + } else { + printk("-------------------------------------------------------\n"); + printk("Good, all %3d testcases passed! |\n", + testcase_successes); + printk("---------------------------------\n"); + debug_locks = 1; + } + debug_locks_silent = 0; +} diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c index 40ffde940a86..db4fed74b940 100644 --- a/lib/rwsem-spinlock.c +++ b/lib/rwsem-spinlock.c @@ -17,27 +17,22 @@ struct rwsem_waiter { #define RWSEM_WAITING_FOR_WRITE 0x00000002 }; -#if RWSEM_DEBUG -void rwsemtrace(struct rw_semaphore *sem, const char *str) -{ - if (sem->debug) - printk("[%d] %s({%d,%d})\n", - current->pid, str, sem->activity, - list_empty(&sem->wait_list) ? 0 : 1); -} -#endif - /* * initialise the semaphore */ -void fastcall init_rwsem(struct rw_semaphore *sem) +void __init_rwsem(struct rw_semaphore *sem, const char *name, + struct lock_class_key *key) { +#ifdef CONFIG_DEBUG_LOCK_ALLOC + /* + * Make sure we are not reinitializing a held semaphore: + */ + debug_check_no_locks_freed((void *)sem, sizeof(*sem)); + lockdep_init_map(&sem->dep_map, name, key); +#endif sem->activity = 0; spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->wait_list); -#if RWSEM_DEBUG - sem->debug = 0; -#endif } /* @@ -56,8 +51,6 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) struct task_struct *tsk; int woken; - rwsemtrace(sem, "Entering __rwsem_do_wake"); - waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); if (!wakewrite) { @@ -104,7 +97,6 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) sem->activity += woken; out: - rwsemtrace(sem, "Leaving __rwsem_do_wake"); return sem; } @@ -138,8 +130,6 @@ void fastcall __sched __down_read(struct rw_semaphore *sem) struct rwsem_waiter waiter; struct task_struct *tsk; - rwsemtrace(sem, "Entering __down_read"); - spin_lock_irq(&sem->wait_lock); if (sem->activity >= 0 && list_empty(&sem->wait_list)) { @@ -171,9 +161,8 @@ void fastcall __sched __down_read(struct rw_semaphore *sem) } tsk->state = TASK_RUNNING; - out: - rwsemtrace(sem, "Leaving __down_read"); + ; } /* @@ -184,7 +173,6 @@ int fastcall __down_read_trylock(struct rw_semaphore *sem) unsigned long flags; int ret = 0; - rwsemtrace(sem, "Entering __down_read_trylock"); spin_lock_irqsave(&sem->wait_lock, flags); @@ -196,7 +184,6 @@ int fastcall __down_read_trylock(struct rw_semaphore *sem) spin_unlock_irqrestore(&sem->wait_lock, flags); - rwsemtrace(sem, "Leaving __down_read_trylock"); return ret; } @@ -204,13 +191,11 @@ int fastcall __down_read_trylock(struct rw_semaphore *sem) * get a write lock on the semaphore * - we increment the waiting count anyway to indicate an exclusive lock */ -void fastcall __sched __down_write(struct rw_semaphore *sem) +void fastcall __sched __down_write_nested(struct rw_semaphore *sem, int subclass) { struct rwsem_waiter waiter; struct task_struct *tsk; - rwsemtrace(sem, "Entering __down_write"); - spin_lock_irq(&sem->wait_lock); if (sem->activity == 0 && list_empty(&sem->wait_list)) { @@ -242,9 +227,13 @@ void fastcall __sched __down_write(struct rw_semaphore *sem) } tsk->state = TASK_RUNNING; - out: - rwsemtrace(sem, "Leaving __down_write"); + ; +} + +void fastcall __sched __down_write(struct rw_semaphore *sem) +{ + __down_write_nested(sem, 0); } /* @@ -255,8 +244,6 @@ int fastcall __down_write_trylock(struct rw_semaphore *sem) unsigned long flags; int ret = 0; - rwsemtrace(sem, "Entering __down_write_trylock"); - spin_lock_irqsave(&sem->wait_lock, flags); if (sem->activity == 0 && list_empty(&sem->wait_list)) { @@ -267,7 +254,6 @@ int fastcall __down_write_trylock(struct rw_semaphore *sem) spin_unlock_irqrestore(&sem->wait_lock, flags); - rwsemtrace(sem, "Leaving __down_write_trylock"); return ret; } @@ -278,16 +264,12 @@ void fastcall __up_read(struct rw_semaphore *sem) { unsigned long flags; - rwsemtrace(sem, "Entering __up_read"); - spin_lock_irqsave(&sem->wait_lock, flags); if (--sem->activity == 0 && !list_empty(&sem->wait_list)) sem = __rwsem_wake_one_writer(sem); spin_unlock_irqrestore(&sem->wait_lock, flags); - - rwsemtrace(sem, "Leaving __up_read"); } /* @@ -297,8 +279,6 @@ void fastcall __up_write(struct rw_semaphore *sem) { unsigned long flags; - rwsemtrace(sem, "Entering __up_write"); - spin_lock_irqsave(&sem->wait_lock, flags); sem->activity = 0; @@ -306,8 +286,6 @@ void fastcall __up_write(struct rw_semaphore *sem) sem = __rwsem_do_wake(sem, 1); spin_unlock_irqrestore(&sem->wait_lock, flags); - - rwsemtrace(sem, "Leaving __up_write"); } /* @@ -318,8 +296,6 @@ void fastcall __downgrade_write(struct rw_semaphore *sem) { unsigned long flags; - rwsemtrace(sem, "Entering __downgrade_write"); - spin_lock_irqsave(&sem->wait_lock, flags); sem->activity = 1; @@ -327,18 +303,14 @@ void fastcall __downgrade_write(struct rw_semaphore *sem) sem = __rwsem_do_wake(sem, 0); spin_unlock_irqrestore(&sem->wait_lock, flags); - - rwsemtrace(sem, "Leaving __downgrade_write"); } -EXPORT_SYMBOL(init_rwsem); +EXPORT_SYMBOL(__init_rwsem); EXPORT_SYMBOL(__down_read); EXPORT_SYMBOL(__down_read_trylock); +EXPORT_SYMBOL(__down_write_nested); EXPORT_SYMBOL(__down_write); EXPORT_SYMBOL(__down_write_trylock); EXPORT_SYMBOL(__up_read); EXPORT_SYMBOL(__up_write); EXPORT_SYMBOL(__downgrade_write); -#if RWSEM_DEBUG -EXPORT_SYMBOL(rwsemtrace); -#endif diff --git a/lib/rwsem.c b/lib/rwsem.c index 62fa4eba9ffe..b322421c2969 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -8,6 +8,26 @@ #include <linux/init.h> #include <linux/module.h> +/* + * Initialize an rwsem: + */ +void __init_rwsem(struct rw_semaphore *sem, const char *name, + struct lock_class_key *key) +{ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + /* + * Make sure we are not reinitializing a held semaphore: + */ + debug_check_no_locks_freed((void *)sem, sizeof(*sem)); + lockdep_init_map(&sem->dep_map, name, key); +#endif + sem->count = RWSEM_UNLOCKED_VALUE; + spin_lock_init(&sem->wait_lock); + INIT_LIST_HEAD(&sem->wait_list); +} + +EXPORT_SYMBOL(__init_rwsem); + struct rwsem_waiter { struct list_head list; struct task_struct *task; @@ -16,17 +36,6 @@ struct rwsem_waiter { #define RWSEM_WAITING_FOR_WRITE 0x00000002 }; -#if RWSEM_DEBUG -#undef rwsemtrace -void rwsemtrace(struct rw_semaphore *sem, const char *str) -{ - printk("sem=%p\n", sem); - printk("(sem)=%08lx\n", sem->count); - if (sem->debug) - printk("[%d] %s({%08lx})\n", current->pid, str, sem->count); -} -#endif - /* * handle the lock release when processes blocked on it that can now run * - if we come here from up_xxxx(), then: @@ -45,8 +54,6 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading) struct list_head *next; signed long oldcount, woken, loop; - rwsemtrace(sem, "Entering __rwsem_do_wake"); - if (downgrading) goto dont_wake_writers; @@ -127,7 +134,6 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading) next->prev = &sem->wait_list; out: - rwsemtrace(sem, "Leaving __rwsem_do_wake"); return sem; /* undo the change to count, but check for a transition 1->0 */ @@ -186,13 +192,9 @@ rwsem_down_read_failed(struct rw_semaphore *sem) { struct rwsem_waiter waiter; - rwsemtrace(sem, "Entering rwsem_down_read_failed"); - waiter.flags = RWSEM_WAITING_FOR_READ; rwsem_down_failed_common(sem, &waiter, RWSEM_WAITING_BIAS - RWSEM_ACTIVE_BIAS); - - rwsemtrace(sem, "Leaving rwsem_down_read_failed"); return sem; } @@ -204,12 +206,9 @@ rwsem_down_write_failed(struct rw_semaphore *sem) { struct rwsem_waiter waiter; - rwsemtrace(sem, "Entering rwsem_down_write_failed"); - waiter.flags = RWSEM_WAITING_FOR_WRITE; rwsem_down_failed_common(sem, &waiter, -RWSEM_ACTIVE_BIAS); - rwsemtrace(sem, "Leaving rwsem_down_write_failed"); return sem; } @@ -221,8 +220,6 @@ struct rw_semaphore fastcall *rwsem_wake(struct rw_semaphore *sem) { unsigned long flags; - rwsemtrace(sem, "Entering rwsem_wake"); - spin_lock_irqsave(&sem->wait_lock, flags); /* do nothing if list empty */ @@ -231,8 +228,6 @@ struct rw_semaphore fastcall *rwsem_wake(struct rw_semaphore *sem) spin_unlock_irqrestore(&sem->wait_lock, flags); - rwsemtrace(sem, "Leaving rwsem_wake"); - return sem; } @@ -245,8 +240,6 @@ struct rw_semaphore fastcall *rwsem_downgrade_wake(struct rw_semaphore *sem) { unsigned long flags; - rwsemtrace(sem, "Entering rwsem_downgrade_wake"); - spin_lock_irqsave(&sem->wait_lock, flags); /* do nothing if list empty */ @@ -255,7 +248,6 @@ struct rw_semaphore fastcall *rwsem_downgrade_wake(struct rw_semaphore *sem) spin_unlock_irqrestore(&sem->wait_lock, flags); - rwsemtrace(sem, "Leaving rwsem_downgrade_wake"); return sem; } @@ -263,6 +255,3 @@ EXPORT_SYMBOL(rwsem_down_read_failed); EXPORT_SYMBOL(rwsem_down_write_failed); EXPORT_SYMBOL(rwsem_wake); EXPORT_SYMBOL(rwsem_downgrade_wake); -#if RWSEM_DEBUG -EXPORT_SYMBOL(rwsemtrace); -#endif diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c index 93c15ee3f8ea..3d9c4dc965ed 100644 --- a/lib/spinlock_debug.c +++ b/lib/spinlock_debug.c @@ -8,38 +8,71 @@ #include <linux/spinlock.h> #include <linux/interrupt.h> +#include <linux/debug_locks.h> #include <linux/delay.h> +#include <linux/module.h> + +void __spin_lock_init(spinlock_t *lock, const char *name, + struct lock_class_key *key) +{ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + /* + * Make sure we are not reinitializing a held lock: + */ + debug_check_no_locks_freed((void *)lock, sizeof(*lock)); + lockdep_init_map(&lock->dep_map, name, key); +#endif + lock->raw_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + lock->magic = SPINLOCK_MAGIC; + lock->owner = SPINLOCK_OWNER_INIT; + lock->owner_cpu = -1; +} + +EXPORT_SYMBOL(__spin_lock_init); + +void __rwlock_init(rwlock_t *lock, const char *name, + struct lock_class_key *key) +{ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + /* + * Make sure we are not reinitializing a held lock: + */ + debug_check_no_locks_freed((void *)lock, sizeof(*lock)); + lockdep_init_map(&lock->dep_map, name, key); +#endif + lock->raw_lock = (raw_rwlock_t) __RAW_RW_LOCK_UNLOCKED; + lock->magic = RWLOCK_MAGIC; + lock->owner = SPINLOCK_OWNER_INIT; + lock->owner_cpu = -1; +} + +EXPORT_SYMBOL(__rwlock_init); static void spin_bug(spinlock_t *lock, const char *msg) { - static long print_once = 1; struct task_struct *owner = NULL; - if (xchg(&print_once, 0)) { - if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT) - owner = lock->owner; - printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n", - msg, raw_smp_processor_id(), - current->comm, current->pid); - printk(KERN_EMERG " lock: %p, .magic: %08x, .owner: %s/%d, " - ".owner_cpu: %d\n", - lock, lock->magic, - owner ? owner->comm : "<none>", - owner ? owner->pid : -1, - lock->owner_cpu); - dump_stack(); -#ifdef CONFIG_SMP - /* - * We cannot continue on SMP: - */ -// panic("bad locking"); -#endif - } + if (!debug_locks_off()) + return; + + if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT) + owner = lock->owner; + printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n", + msg, raw_smp_processor_id(), + current->comm, current->pid); + printk(KERN_EMERG " lock: %p, .magic: %08x, .owner: %s/%d, " + ".owner_cpu: %d\n", + lock, lock->magic, + owner ? owner->comm : "<none>", + owner ? owner->pid : -1, + lock->owner_cpu); + dump_stack(); } #define SPIN_BUG_ON(cond, lock, msg) if (unlikely(cond)) spin_bug(lock, msg) -static inline void debug_spin_lock_before(spinlock_t *lock) +static inline void +debug_spin_lock_before(spinlock_t *lock) { SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic"); SPIN_BUG_ON(lock->owner == current, lock, "recursion"); @@ -118,20 +151,13 @@ void _raw_spin_unlock(spinlock_t *lock) static void rwlock_bug(rwlock_t *lock, const char *msg) { - static long print_once = 1; - - if (xchg(&print_once, 0)) { - printk(KERN_EMERG "BUG: rwlock %s on CPU#%d, %s/%d, %p\n", - msg, raw_smp_processor_id(), current->comm, - current->pid, lock); - dump_stack(); -#ifdef CONFIG_SMP - /* - * We cannot continue on SMP: - */ - panic("bad locking"); -#endif - } + if (!debug_locks_off()) + return; + + printk(KERN_EMERG "BUG: rwlock %s on CPU#%d, %s/%d, %p\n", + msg, raw_smp_processor_id(), current->comm, + current->pid, lock); + dump_stack(); } #define RWLOCK_BUG_ON(cond, lock, msg) if (unlikely(cond)) rwlock_bug(lock, msg) diff --git a/mm/memory.c b/mm/memory.c index 7e2a4b1580e3..c1e14c9e67e4 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -503,7 +503,7 @@ again: return -ENOMEM; src_pte = pte_offset_map_nested(src_pmd, addr); src_ptl = pte_lockptr(src_mm, src_pmd); - spin_lock(src_ptl); + spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); do { /* diff --git a/mm/mremap.c b/mm/mremap.c index 1903bdf65e42..7c15cf3373ad 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -97,7 +97,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, new_pte = pte_offset_map_nested(new_pmd, new_addr); new_ptl = pte_lockptr(mm, new_pmd); if (new_ptl != old_ptl) - spin_lock(new_ptl); + spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE, new_pte++, new_addr += PAGE_SIZE) { diff --git a/mm/oom_kill.c b/mm/oom_kill.c index d46ed0f1dc06..b9af136e5cfa 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -225,7 +225,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints) * CAP_SYS_RAW_IO set, send SIGTERM instead (but it's unlikely that * we select a process with CAP_SYS_RAW_IO set). */ -static void __oom_kill_task(task_t *p, const char *message) +static void __oom_kill_task(struct task_struct *p, const char *message) { if (p->pid == 1) { WARN_ON(1); @@ -255,10 +255,10 @@ static void __oom_kill_task(task_t *p, const char *message) force_sig(SIGKILL, p); } -static int oom_kill_task(task_t *p, const char *message) +static int oom_kill_task(struct task_struct *p, const char *message) { struct mm_struct *mm; - task_t * g, * q; + struct task_struct *g, *q; mm = p->mm; @@ -316,7 +316,7 @@ static int oom_kill_process(struct task_struct *p, unsigned long points, */ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) { - task_t *p; + struct task_struct *p; unsigned long points = 0; if (printk_ratelimit()) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3e792a583f3b..54a4f5375bba 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2005,6 +2005,10 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat, zone->spanned_pages = size; zone->present_pages = realsize; +#ifdef CONFIG_NUMA + zone->min_unmapped_ratio = (realsize*sysctl_min_unmapped_ratio) + / 100; +#endif zone->name = zone_names[j]; spin_lock_init(&zone->lock); spin_lock_init(&zone->lru_lock); @@ -2298,6 +2302,24 @@ int min_free_kbytes_sysctl_handler(ctl_table *table, int write, return 0; } +#ifdef CONFIG_NUMA +int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write, + struct file *file, void __user *buffer, size_t *length, loff_t *ppos) +{ + struct zone *zone; + int rc; + + rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos); + if (rc) + return rc; + + for_each_zone(zone) + zone->min_unmapped_ratio = (zone->present_pages * + sysctl_min_unmapped_ratio) / 100; + return 0; +} +#endif + /* * lowmem_reserve_ratio_sysctl_handler - just a wrapper around * proc_dointvec() so that we can call setup_per_zone_lowmem_reserve() diff --git a/mm/slab.c b/mm/slab.c index 3936af344542..85c2e03098a7 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1021,7 +1021,8 @@ static void drain_alien_cache(struct kmem_cache *cachep, } } -static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) +static inline int cache_free_alien(struct kmem_cache *cachep, void *objp, + int nesting) { struct slab *slabp = virt_to_slab(objp); int nodeid = slabp->nodeid; @@ -1039,7 +1040,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) STATS_INC_NODEFREES(cachep); if (l3->alien && l3->alien[nodeid]) { alien = l3->alien[nodeid]; - spin_lock(&alien->lock); + spin_lock_nested(&alien->lock, nesting); if (unlikely(alien->avail == alien->limit)) { STATS_INC_ACOVERFLOW(cachep); __drain_alien_cache(cachep, alien, nodeid); @@ -1068,7 +1069,8 @@ static inline void free_alien_cache(struct array_cache **ac_ptr) { } -static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) +static inline int cache_free_alien(struct kmem_cache *cachep, void *objp, + int nesting) { return 0; } @@ -1272,6 +1274,11 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, local_irq_disable(); memcpy(ptr, list, sizeof(struct kmem_list3)); + /* + * Do not assume that spinlocks can be initialized via memcpy: + */ + spin_lock_init(&ptr->list_lock); + MAKE_ALL_LISTS(cachep, ptr, nodeid); cachep->nodelists[nodeid] = ptr; local_irq_enable(); @@ -1398,7 +1405,7 @@ void __init kmem_cache_init(void) } /* 4) Replace the bootstrap head arrays */ { - void *ptr; + struct array_cache *ptr; ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); @@ -1406,6 +1413,11 @@ void __init kmem_cache_init(void) BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache); memcpy(ptr, cpu_cache_get(&cache_cache), sizeof(struct arraycache_init)); + /* + * Do not assume that spinlocks can be initialized via memcpy: + */ + spin_lock_init(&ptr->lock); + cache_cache.array[smp_processor_id()] = ptr; local_irq_enable(); @@ -1416,6 +1428,11 @@ void __init kmem_cache_init(void) != &initarray_generic.cache); memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep), sizeof(struct arraycache_init)); + /* + * Do not assume that spinlocks can be initialized via memcpy: + */ + spin_lock_init(&ptr->lock); + malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = ptr; local_irq_enable(); @@ -1743,6 +1760,8 @@ static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) } #endif +static void __cache_free(struct kmem_cache *cachep, void *objp, int nesting); + /** * slab_destroy - destroy and release all objects in a slab * @cachep: cache pointer being destroyed @@ -1766,8 +1785,17 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) call_rcu(&slab_rcu->head, kmem_rcu_free); } else { kmem_freepages(cachep, addr); - if (OFF_SLAB(cachep)) - kmem_cache_free(cachep->slabp_cache, slabp); + if (OFF_SLAB(cachep)) { + unsigned long flags; + + /* + * lockdep: we may nest inside an already held + * ac->lock, so pass in a nesting flag: + */ + local_irq_save(flags); + __cache_free(cachep->slabp_cache, slabp, 1); + local_irq_restore(flags); + } } } @@ -3072,7 +3100,16 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, if (slabp->inuse == 0) { if (l3->free_objects > l3->free_limit) { l3->free_objects -= cachep->num; + /* + * It is safe to drop the lock. The slab is + * no longer linked to the cache. cachep + * cannot disappear - we are using it and + * all destruction of caches must be + * serialized properly by the user. + */ + spin_unlock(&l3->list_lock); slab_destroy(cachep, slabp); + spin_lock(&l3->list_lock); } else { list_add(&slabp->list, &l3->slabs_free); } @@ -3098,7 +3135,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) #endif check_irq_off(); l3 = cachep->nodelists[node]; - spin_lock(&l3->list_lock); + spin_lock_nested(&l3->list_lock, SINGLE_DEPTH_NESTING); if (l3->shared) { struct array_cache *shared_array = l3->shared; int max = shared_array->limit - shared_array->avail; @@ -3141,14 +3178,14 @@ free_done: * Release an obj back to its cache. If the obj has a constructed state, it must * be in this state _before_ it is released. Called with disabled ints. */ -static inline void __cache_free(struct kmem_cache *cachep, void *objp) +static void __cache_free(struct kmem_cache *cachep, void *objp, int nesting) { struct array_cache *ac = cpu_cache_get(cachep); check_irq_off(); objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0)); - if (cache_free_alien(cachep, objp)) + if (cache_free_alien(cachep, objp, nesting)) return; if (likely(ac->avail < ac->limit)) { @@ -3387,7 +3424,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp) BUG_ON(virt_to_cache(objp) != cachep); local_irq_save(flags); - __cache_free(cachep, objp); + __cache_free(cachep, objp, 0); local_irq_restore(flags); } EXPORT_SYMBOL(kmem_cache_free); @@ -3412,7 +3449,7 @@ void kfree(const void *objp) kfree_debugcheck(objp); c = virt_to_cache(objp); debug_check_no_locks_freed(objp, obj_size(c)); - __cache_free(c, (void *)objp); + __cache_free(c, (void *)objp, 0); local_irq_restore(flags); } EXPORT_SYMBOL(kfree); diff --git a/mm/swap_state.c b/mm/swap_state.c index fccbd9bba77b..5f7cf2a4cb55 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -38,7 +38,7 @@ static struct backing_dev_info swap_backing_dev_info = { struct address_space swapper_space = { .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN), - .tree_lock = RW_LOCK_UNLOCKED, + .tree_lock = __RW_LOCK_UNLOCKED(swapper_space.tree_lock), .a_ops = &swap_aops, .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear), .backing_dev_info = &swap_backing_dev_info, diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 35f8553f893a..7b450798b458 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -330,6 +330,8 @@ void __vunmap(void *addr, int deallocate_pages) return; } + debug_check_no_locks_freed(addr, area->size); + if (deallocate_pages) { int i; diff --git a/mm/vmscan.c b/mm/vmscan.c index ff2ebe9458a3..5d4c4d02254d 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1503,10 +1503,6 @@ module_init(kswapd_init) * * If non-zero call zone_reclaim when the number of free pages falls below * the watermarks. - * - * In the future we may add flags to the mode. However, the page allocator - * should only have to check that zone_reclaim_mode != 0 before calling - * zone_reclaim(). */ int zone_reclaim_mode __read_mostly; @@ -1524,6 +1520,12 @@ int zone_reclaim_mode __read_mostly; #define ZONE_RECLAIM_PRIORITY 4 /* + * Percentage of pages in a zone that must be unmapped for zone_reclaim to + * occur. + */ +int sysctl_min_unmapped_ratio = 1; + +/* * Try to free up some pages from this zone through reclaim. */ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) @@ -1590,18 +1592,17 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) int node_id; /* - * Do not reclaim if there are not enough reclaimable pages in this - * zone that would satify this allocations. + * Zone reclaim reclaims unmapped file backed pages. * - * All unmapped pagecache pages are reclaimable. - * - * Both counters may be temporarily off a bit so we use - * SWAP_CLUSTER_MAX as the boundary. It may also be good to - * leave a few frequently used unmapped pagecache pages around. + * A small portion of unmapped file backed pages is needed for + * file I/O otherwise pages read by file I/O will be immediately + * thrown out if the zone is overallocated. So we do not reclaim + * if less than a specified percentage of the zone is used by + * unmapped file backed pages. */ if (zone_page_state(zone, NR_FILE_PAGES) - - zone_page_state(zone, NR_FILE_MAPPED) < SWAP_CLUSTER_MAX) - return 0; + zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_ratio) + return 0; /* * Avoid concurrent zone reclaims, do not reclaim in a zone that does diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 3948949a609a..458031bfff55 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -364,6 +364,14 @@ static void vlan_transfer_operstate(const struct net_device *dev, struct net_dev } } +/* + * vlan network devices have devices nesting below it, and are a special + * "super class" of normal network devices; split their locks off into a + * separate class since they always nest. + */ +static struct lock_class_key vlan_netdev_xmit_lock_key; + + /* Attach a VLAN device to a mac address (ie Ethernet Card). * Returns the device that was created, or NULL if there was * an error of some kind. @@ -460,6 +468,7 @@ static struct net_device *register_vlan_device(const char *eth_IF_name, new_dev = alloc_netdev(sizeof(struct vlan_dev_info), name, vlan_setup); + if (new_dev == NULL) goto out_unlock; @@ -518,6 +527,8 @@ static struct net_device *register_vlan_device(const char *eth_IF_name, if (register_netdevice(new_dev)) goto out_free_newdev; + lockdep_set_class(&new_dev->_xmit_lock, &vlan_netdev_xmit_lock_key); + new_dev->iflink = real_dev->ifindex; vlan_transfer_operstate(real_dev, new_dev); linkwatch_fire_event(new_dev); /* _MUST_ call rfc2863_policy() */ diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 7cfbdb215ba2..44f6a181a754 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -71,6 +71,13 @@ static kmem_cache_t *skbuff_head_cache __read_mostly; static kmem_cache_t *skbuff_fclone_cache __read_mostly; /* + * lockdep: lock class key used by skb_queue_head_init(): + */ +struct lock_class_key skb_queue_lock_key; + +EXPORT_SYMBOL(skb_queue_lock_key); + +/* * Keep out-of-line to prevent kernel bloat. * __builtin_return_address is not used because it is not always * reliable. diff --git a/net/core/sock.c b/net/core/sock.c index 533b9317144b..51fcfbc041a7 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -129,6 +129,53 @@ #include <net/tcp.h> #endif +/* + * Each address family might have different locking rules, so we have + * one slock key per address family: + */ +static struct lock_class_key af_family_keys[AF_MAX]; +static struct lock_class_key af_family_slock_keys[AF_MAX]; + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +/* + * Make lock validator output more readable. (we pre-construct these + * strings build-time, so that runtime initialization of socket + * locks is fast): + */ +static const char *af_family_key_strings[AF_MAX+1] = { + "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" , + "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK", + "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" , + "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" , + "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" , + "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" , + "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" , + "sk_lock-21" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" , + "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" , + "sk_lock-27" , "sk_lock-28" , "sk_lock-29" , + "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-AF_MAX" +}; +static const char *af_family_slock_key_strings[AF_MAX+1] = { + "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , + "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK", + "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" , + "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" , + "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" , + "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" , + "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" , + "slock-21" , "slock-AF_SNA" , "slock-AF_IRDA" , + "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" , + "slock-27" , "slock-28" , "slock-29" , + "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_MAX" +}; +#endif + +/* + * sk_callback_lock locking rules are per-address-family, + * so split the lock classes by using a per-AF key: + */ +static struct lock_class_key af_callback_keys[AF_MAX]; + /* Take into consideration the size of the struct sk_buff overhead in the * determination of these values, since that is non-constant across * platforms. This makes socket queueing behavior and performance @@ -237,9 +284,16 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb) skb->dev = NULL; bh_lock_sock(sk); - if (!sock_owned_by_user(sk)) + if (!sock_owned_by_user(sk)) { + /* + * trylock + unlock semantics: + */ + mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_); + rc = sk->sk_backlog_rcv(sk, skb); - else + + mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); + } else sk_add_backlog(sk, skb); bh_unlock_sock(sk); out: @@ -749,6 +803,33 @@ lenout: return 0; } +/* + * Initialize an sk_lock. + * + * (We also register the sk_lock with the lock validator.) + */ +static void inline sock_lock_init(struct sock *sk) +{ + spin_lock_init(&sk->sk_lock.slock); + sk->sk_lock.owner = NULL; + init_waitqueue_head(&sk->sk_lock.wq); + /* + * Make sure we are not reinitializing a held lock: + */ + debug_check_no_locks_freed((void *)&sk->sk_lock, sizeof(sk->sk_lock)); + + /* + * Mark both the sk_lock and the sk_lock.slock as a + * per-address-family lock class: + */ + lockdep_set_class_and_name(&sk->sk_lock.slock, + af_family_slock_keys + sk->sk_family, + af_family_slock_key_strings[sk->sk_family]); + lockdep_init_map(&sk->sk_lock.dep_map, + af_family_key_strings[sk->sk_family], + af_family_keys + sk->sk_family); +} + /** * sk_alloc - All socket objects are allocated here * @family: protocol family @@ -848,6 +929,8 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) rwlock_init(&newsk->sk_dst_lock); rwlock_init(&newsk->sk_callback_lock); + lockdep_set_class(&newsk->sk_callback_lock, + af_callback_keys + newsk->sk_family); newsk->sk_dst_cache = NULL; newsk->sk_wmem_queued = 0; @@ -1422,6 +1505,8 @@ void sock_init_data(struct socket *sock, struct sock *sk) rwlock_init(&sk->sk_dst_lock); rwlock_init(&sk->sk_callback_lock); + lockdep_set_class(&sk->sk_callback_lock, + af_callback_keys + sk->sk_family); sk->sk_state_change = sock_def_wakeup; sk->sk_data_ready = sock_def_readable; @@ -1449,24 +1534,34 @@ void sock_init_data(struct socket *sock, struct sock *sk) void fastcall lock_sock(struct sock *sk) { might_sleep(); - spin_lock_bh(&(sk->sk_lock.slock)); + spin_lock_bh(&sk->sk_lock.slock); if (sk->sk_lock.owner) __lock_sock(sk); sk->sk_lock.owner = (void *)1; - spin_unlock_bh(&(sk->sk_lock.slock)); + spin_unlock(&sk->sk_lock.slock); + /* + * The sk_lock has mutex_lock() semantics here: + */ + mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_); + local_bh_enable(); } EXPORT_SYMBOL(lock_sock); void fastcall release_sock(struct sock *sk) { - spin_lock_bh(&(sk->sk_lock.slock)); + /* + * The sk_lock has mutex_unlock() semantics: + */ + mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); + + spin_lock_bh(&sk->sk_lock.slock); if (sk->sk_backlog.tail) __release_sock(sk); sk->sk_lock.owner = NULL; - if (waitqueue_active(&(sk->sk_lock.wq))) - wake_up(&(sk->sk_lock.wq)); - spin_unlock_bh(&(sk->sk_lock.slock)); + if (waitqueue_active(&sk->sk_lock.wq)) + wake_up(&sk->sk_lock.wq); + spin_unlock_bh(&sk->sk_lock.slock); } EXPORT_SYMBOL(release_sock); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index da44fabf4dc5..2dc6dbb28467 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -205,21 +205,27 @@ __u8 ip_tos2prio[16] = { struct rt_hash_bucket { struct rtable *chain; }; -#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ + defined(CONFIG_PROVE_LOCKING) /* * Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks * The size of this table is a power of two and depends on the number of CPUS. + * (on lockdep we have a quite big spinlock_t, so keep the size down there) */ -#if NR_CPUS >= 32 -#define RT_HASH_LOCK_SZ 4096 -#elif NR_CPUS >= 16 -#define RT_HASH_LOCK_SZ 2048 -#elif NR_CPUS >= 8 -#define RT_HASH_LOCK_SZ 1024 -#elif NR_CPUS >= 4 -#define RT_HASH_LOCK_SZ 512 +#ifdef CONFIG_LOCKDEP +# define RT_HASH_LOCK_SZ 256 #else -#define RT_HASH_LOCK_SZ 256 +# if NR_CPUS >= 32 +# define RT_HASH_LOCK_SZ 4096 +# elif NR_CPUS >= 16 +# define RT_HASH_LOCK_SZ 2048 +# elif NR_CPUS >= 8 +# define RT_HASH_LOCK_SZ 1024 +# elif NR_CPUS >= 4 +# define RT_HASH_LOCK_SZ 512 +# else +# define RT_HASH_LOCK_SZ 256 +# endif #endif static spinlock_t *rt_hash_locks; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 8355b729fa95..5a886e6efbbe 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -90,7 +90,7 @@ static struct socket *tcp_socket; void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { - .lhash_lock = RW_LOCK_UNLOCKED, + .lhash_lock = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock), .lhash_users = ATOMIC_INIT(0), .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait), }; @@ -1090,7 +1090,7 @@ process: skb->dev = NULL; - bh_lock_sock(sk); + bh_lock_sock_nested(sk); ret = 0; if (!sock_owned_by_user(sk)) { #ifdef CONFIG_NET_DMA diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index e0851697ad5e..0ccb7cb22b15 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -40,7 +40,7 @@ int sysctl_tcp_abort_on_overflow; struct inet_timewait_death_row tcp_death_row = { .sysctl_max_tw_buckets = NR_FILE * 2, .period = TCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS, - .death_lock = SPIN_LOCK_UNLOCKED, + .death_lock = __SPIN_LOCK_UNLOCKED(tcp_death_row.death_lock), .hashinfo = &tcp_hashinfo, .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0, (unsigned long)&tcp_death_row), diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 70cee82a98bf..55c0adc8f115 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -156,7 +156,7 @@ static void netlink_sock_destruct(struct sock *sk) static void netlink_table_grab(void) { - write_lock_bh(&nl_table_lock); + write_lock_irq(&nl_table_lock); if (atomic_read(&nl_table_users)) { DECLARE_WAITQUEUE(wait, current); @@ -166,9 +166,9 @@ static void netlink_table_grab(void) set_current_state(TASK_UNINTERRUPTIBLE); if (atomic_read(&nl_table_users) == 0) break; - write_unlock_bh(&nl_table_lock); + write_unlock_irq(&nl_table_lock); schedule(); - write_lock_bh(&nl_table_lock); + write_lock_irq(&nl_table_lock); } __set_current_state(TASK_RUNNING); @@ -178,7 +178,7 @@ static void netlink_table_grab(void) static __inline__ void netlink_table_ungrab(void) { - write_unlock_bh(&nl_table_lock); + write_unlock_irq(&nl_table_lock); wake_up(&nl_table_wait); } diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 6db6006616c6..dc6cb93c8830 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -515,7 +515,7 @@ rpc_depopulate(struct dentry *parent) struct dentry *dentry, *dvec[10]; int n = 0; - mutex_lock(&dir->i_mutex); + mutex_lock_nested(&dir->i_mutex, I_MUTEX_CHILD); repeat: spin_lock(&dcache_lock); list_for_each_safe(pos, next, &parent->d_subdirs) { @@ -631,7 +631,7 @@ rpc_lookup_negative(char *path, struct nameidata *nd) if ((error = rpc_lookup_parent(path, nd)) != 0) return ERR_PTR(error); dir = nd->dentry->d_inode; - mutex_lock(&dir->i_mutex); + mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); dentry = lookup_one_len(nd->last.name, nd->dentry, nd->last.len); if (IS_ERR(dentry)) goto out_err; @@ -693,7 +693,7 @@ rpc_rmdir(char *path) if ((error = rpc_lookup_parent(path, &nd)) != 0) return error; dir = nd.dentry->d_inode; - mutex_lock(&dir->i_mutex); + mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); dentry = lookup_one_len(nd.last.name, nd.dentry, nd.last.len); if (IS_ERR(dentry)) { error = PTR_ERR(dentry); @@ -754,7 +754,7 @@ rpc_unlink(char *path) if ((error = rpc_lookup_parent(path, &nd)) != 0) return error; dir = nd.dentry->d_inode; - mutex_lock(&dir->i_mutex); + mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); dentry = lookup_one_len(nd.last.name, nd.dentry, nd.last.len); if (IS_ERR(dentry)) { error = PTR_ERR(dentry); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index aca650109425..e9a287bc3142 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -565,6 +565,14 @@ static struct proto unix_proto = { .obj_size = sizeof(struct unix_sock), }; +/* + * AF_UNIX sockets do not interact with hardware, hence they + * dont trigger interrupts - so it's safe for them to have + * bh-unsafe locking for their sk_receive_queue.lock. Split off + * this special lock-class by reinitializing the spinlock key: + */ +static struct lock_class_key af_unix_sk_receive_queue_lock_key; + static struct sock * unix_create1(struct socket *sock) { struct sock *sk = NULL; @@ -580,6 +588,8 @@ static struct sock * unix_create1(struct socket *sock) atomic_inc(&unix_nr_socks); sock_init_data(sock,sk); + lockdep_set_class(&sk->sk_receive_queue.lock, + &af_unix_sk_receive_queue_lock_key); sk->sk_write_space = unix_write_space; sk->sk_max_ack_backlog = sysctl_unix_max_dgram_qlen; @@ -1045,7 +1055,7 @@ restart: goto out_unlock; } - unix_state_wlock(sk); + unix_state_wlock_nested(sk); if (sk->sk_state != st) { unix_state_wunlock(sk); diff --git a/sound/aoa/core/snd-aoa-gpio-feature.c b/sound/aoa/core/snd-aoa-gpio-feature.c index bab97547a052..7ae0c0bdfad8 100644 --- a/sound/aoa/core/snd-aoa-gpio-feature.c +++ b/sound/aoa/core/snd-aoa-gpio-feature.c @@ -112,12 +112,7 @@ static struct device_node *get_gpio(char *name, static void get_irq(struct device_node * np, int *irqptr) { - *irqptr = -1; - if (!np) - return; - if (np->n_intrs != 1) - return; - *irqptr = np->intrs[0].line; + *irqptr = irq_of_parse_and_map(np, 0); } /* 0x4 is outenable, 0x1 is out, thus 4 or 5 */ diff --git a/sound/aoa/soundbus/i2sbus/i2sbus-core.c b/sound/aoa/soundbus/i2sbus/i2sbus-core.c index f268dacdaa00..01c0724335a3 100644 --- a/sound/aoa/soundbus/i2sbus/i2sbus-core.c +++ b/sound/aoa/soundbus/i2sbus/i2sbus-core.c @@ -129,7 +129,7 @@ static int i2sbus_add_dev(struct macio_dev *macio, if (strncmp(np->name, "i2s-", 4)) return 0; - if (np->n_intrs != 3) + if (macio_irq_count(macio) != 3) return 0; dev = kzalloc(sizeof(struct i2sbus_dev), GFP_KERNEL); @@ -183,9 +183,10 @@ static int i2sbus_add_dev(struct macio_dev *macio, snprintf(dev->rnames[i], sizeof(dev->rnames[i]), rnames[i], np->name); } for (i=0;i<3;i++) { - if (request_irq(np->intrs[i].line, ints[i], 0, dev->rnames[i], dev)) + if (request_irq(macio_irq(macio, i), ints[i], 0, + dev->rnames[i], dev)) goto err; - dev->interrupts[i] = np->intrs[i].line; + dev->interrupts[i] = macio_irq(macio, i); } for (i=0;i<3;i++) { diff --git a/sound/core/seq/seq_device.c b/sound/core/seq/seq_device.c index d812dc886360..4260de90f36f 100644 --- a/sound/core/seq/seq_device.c +++ b/sound/core/seq/seq_device.c @@ -380,6 +380,12 @@ static struct ops_list * create_driver(char *id) /* set up driver entry */ strlcpy(ops->id, id, sizeof(ops->id)); mutex_init(&ops->reg_mutex); + /* + * The ->reg_mutex locking rules are per-driver, so we create + * separate per-driver lock classes: + */ + lockdep_set_class(&ops->reg_mutex, (struct lock_class_key *)id); + ops->driver = DRIVER_EMPTY; INIT_LIST_HEAD(&ops->dev_list); /* lock this instance */ diff --git a/sound/core/seq/seq_ports.c b/sound/core/seq/seq_ports.c index d467b4f0ff2b..8c64b58ff77b 100644 --- a/sound/core/seq/seq_ports.c +++ b/sound/core/seq/seq_ports.c @@ -514,7 +514,7 @@ int snd_seq_port_connect(struct snd_seq_client *connector, atomic_set(&subs->ref_count, 2); down_write(&src->list_mutex); - down_write(&dest->list_mutex); + down_write_nested(&dest->list_mutex, SINGLE_DEPTH_NESTING); exclusive = info->flags & SNDRV_SEQ_PORT_SUBS_EXCLUSIVE ? 1 : 0; err = -EBUSY; @@ -587,7 +587,7 @@ int snd_seq_port_disconnect(struct snd_seq_client *connector, unsigned long flags; down_write(&src->list_mutex); - down_write(&dest->list_mutex); + down_write_nested(&dest->list_mutex, SINGLE_DEPTH_NESTING); /* look for the connection */ list_for_each(p, &src->list_head) { diff --git a/sound/oss/dmasound/dmasound_awacs.c b/sound/oss/dmasound/dmasound_awacs.c index de454ca39226..4359903f4376 100644 --- a/sound/oss/dmasound/dmasound_awacs.c +++ b/sound/oss/dmasound/dmasound_awacs.c @@ -374,10 +374,7 @@ setup_audio_gpio(const char *name, const char* compatible, int *gpio_addr, int* *gpio_pol = *pp; else *gpio_pol = 1; - if (np->n_intrs > 0) - return np->intrs[0].line; - - return 0; + return irq_of_parse_and_map(np, 0); } static inline void @@ -2864,14 +2861,13 @@ printk("dmasound_pmac: couldn't find a Codec we can handle\n"); * other info if necessary (early AWACS we want to read chip ids) */ - if (of_get_address(io, 2, NULL, NULL) == NULL || io->n_intrs < 3) { + if (of_get_address(io, 2, NULL, NULL) == NULL) { /* OK - maybe we need to use the 'awacs' node (on earlier * machines). */ if (awacs_node) { io = awacs_node ; - if (of_get_address(io, 2, NULL, NULL) == NULL || - io->n_intrs < 3) { + if (of_get_address(io, 2, NULL, NULL) == NULL) { printk("dmasound_pmac: can't use %s\n", io->full_name); return -ENODEV; @@ -2940,9 +2936,9 @@ printk("dmasound_pmac: couldn't find a Codec we can handle\n"); if (awacs_revision == AWACS_SCREAMER && awacs) awacs_recalibrate(); - awacs_irq = io->intrs[0].line; - awacs_tx_irq = io->intrs[1].line; - awacs_rx_irq = io->intrs[2].line; + awacs_irq = irq_of_parse_and_map(io, 0); + awacs_tx_irq = irq_of_parse_and_map(io, 1); + awacs_rx_irq = irq_of_parse_and_map(io, 2); /* Hack for legacy crap that will be killed someday */ awacs_node = io; diff --git a/sound/ppc/pmac.c b/sound/ppc/pmac.c index 90db9a1d1e0a..641430631505 100644 --- a/sound/ppc/pmac.c +++ b/sound/ppc/pmac.c @@ -1120,6 +1120,7 @@ int __init snd_pmac_new(struct snd_card *card, struct snd_pmac **chip_return) struct snd_pmac *chip; struct device_node *np; int i, err; + unsigned int irq; unsigned long ctrl_addr, txdma_addr, rxdma_addr; static struct snd_device_ops ops = { .dev_free = snd_pmac_dev_free, @@ -1153,10 +1154,6 @@ int __init snd_pmac_new(struct snd_card *card, struct snd_pmac **chip_return) if (chip->is_k2) { static char *rnames[] = { "Sound Control", "Sound DMA" }; - if (np->n_intrs < 3) { - err = -ENODEV; - goto __error; - } for (i = 0; i < 2; i ++) { if (of_address_to_resource(np->parent, i, &chip->rsrc[i])) { @@ -1185,10 +1182,6 @@ int __init snd_pmac_new(struct snd_card *card, struct snd_pmac **chip_return) } else { static char *rnames[] = { "Sound Control", "Sound Tx DMA", "Sound Rx DMA" }; - if (np->n_intrs < 3) { - err = -ENODEV; - goto __error; - } for (i = 0; i < 3; i ++) { if (of_address_to_resource(np, i, &chip->rsrc[i])) { @@ -1220,28 +1213,30 @@ int __init snd_pmac_new(struct snd_card *card, struct snd_pmac **chip_return) chip->playback.dma = ioremap(txdma_addr, 0x100); chip->capture.dma = ioremap(rxdma_addr, 0x100); if (chip->model <= PMAC_BURGUNDY) { - if (request_irq(np->intrs[0].line, snd_pmac_ctrl_intr, 0, + irq = irq_of_parse_and_map(np, 0); + if (request_irq(irq, snd_pmac_ctrl_intr, 0, "PMac", (void*)chip)) { - snd_printk(KERN_ERR "pmac: unable to grab IRQ %d\n", np->intrs[0].line); + snd_printk(KERN_ERR "pmac: unable to grab IRQ %d\n", + irq); err = -EBUSY; goto __error; } - chip->irq = np->intrs[0].line; + chip->irq = irq; } - if (request_irq(np->intrs[1].line, snd_pmac_tx_intr, 0, - "PMac Output", (void*)chip)) { - snd_printk(KERN_ERR "pmac: unable to grab IRQ %d\n", np->intrs[1].line); + irq = irq_of_parse_and_map(np, 1); + if (request_irq(irq, snd_pmac_tx_intr, 0, "PMac Output", (void*)chip)){ + snd_printk(KERN_ERR "pmac: unable to grab IRQ %d\n", irq); err = -EBUSY; goto __error; } - chip->tx_irq = np->intrs[1].line; - if (request_irq(np->intrs[2].line, snd_pmac_rx_intr, 0, - "PMac Input", (void*)chip)) { - snd_printk(KERN_ERR "pmac: unable to grab IRQ %d\n", np->intrs[2].line); + chip->tx_irq = irq; + irq = irq_of_parse_and_map(np, 2); + if (request_irq(irq, snd_pmac_rx_intr, 0, "PMac Input", (void*)chip)) { + snd_printk(KERN_ERR "pmac: unable to grab IRQ %d\n", irq); err = -EBUSY; goto __error; } - chip->rx_irq = np->intrs[2].line; + chip->rx_irq = irq; snd_pmac_sound_feature(chip, 1); diff --git a/sound/ppc/tumbler.c b/sound/ppc/tumbler.c index 70e4ebc70260..692c61177678 100644 --- a/sound/ppc/tumbler.c +++ b/sound/ppc/tumbler.c @@ -1121,7 +1121,7 @@ static long tumbler_find_device(const char *device, const char *platform, DBG("(I) GPIO device %s found, offset: %x, active state: %d !\n", device, gp->addr, gp->active_state); - return (node->n_intrs > 0) ? node->intrs[0].line : 0; + return irq_of_parse_and_map(node, 0); } /* reset audio */ @@ -1264,16 +1264,16 @@ static int __init tumbler_init(struct snd_pmac *chip) &mix->line_mute, 1); irq = tumbler_find_device("headphone-detect", NULL, &mix->hp_detect, 0); - if (irq < 0) + if (irq <= NO_IRQ) irq = tumbler_find_device("headphone-detect", NULL, &mix->hp_detect, 1); - if (irq < 0) + if (irq <= NO_IRQ) irq = tumbler_find_device("keywest-gpio15", NULL, &mix->hp_detect, 1); mix->headphone_irq = irq; irq = tumbler_find_device("line-output-detect", NULL, &mix->line_detect, 0); - if (irq < 0) + if (irq <= NO_IRQ) irq = tumbler_find_device("line-output-detect", NULL, &mix->line_detect, 1); mix->lineout_irq = irq; diff --git a/sound/sparc/amd7930.c b/sound/sparc/amd7930.c index db3e22efd02e..2bd8e40b8541 100644 --- a/sound/sparc/amd7930.c +++ b/sound/sparc/amd7930.c @@ -1033,10 +1033,10 @@ static int __init amd7930_attach_common(struct resource *rp, int irq) strcpy(card->driver, "AMD7930"); strcpy(card->shortname, "Sun AMD7930"); - sprintf(card->longname, "%s at 0x%02lx:0x%08lx, irq %d", + sprintf(card->longname, "%s at 0x%02lx:0x%08Lx, irq %d", card->shortname, rp->flags & 0xffL, - rp->start, + (unsigned long long)rp->start, irq); if ((err = snd_amd7930_create(card, rp, diff --git a/sound/sparc/cs4231.c b/sound/sparc/cs4231.c index 5018fcf41df5..9a06c3bd6944 100644 --- a/sound/sparc/cs4231.c +++ b/sound/sparc/cs4231.c @@ -2036,7 +2036,7 @@ static int __init cs4231_sbus_attach(struct sbus_dev *sdev) if (err) return err; - sprintf(card->longname, "%s at 0x%02lx:0x%016lx, irq %d", + sprintf(card->longname, "%s at 0x%02lx:0x%016Lx, irq %d", card->shortname, rp->flags & 0xffL, (unsigned long long)rp->start, diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c index 59a02a0d9afc..f3ae6e23610e 100644 --- a/sound/sparc/dbri.c +++ b/sound/sparc/dbri.c @@ -2645,7 +2645,7 @@ static int __init dbri_attach(int prom_node, struct sbus_dev *sdev) strcpy(card->driver, "DBRI"); strcpy(card->shortname, "Sun DBRI"); rp = &sdev->resource[0]; - sprintf(card->longname, "%s at 0x%02lx:0x%016lx, irq %d", + sprintf(card->longname, "%s at 0x%02lx:0x%016Lx, irq %d", card->shortname, rp->flags & 0xffL, (unsigned long long)rp->start, irq.pri); |