From b332828c39326b1dca617f387dd15d12e81cd5f0 Mon Sep 17 00:00:00 2001
From: "K.Prasad" <prasad@linux.vnet.ibm.com>
Date: Mon, 1 Jun 2009 23:43:10 +0530
Subject: hw-breakpoints: prepare the code for Hardware Breakpoint interfaces

The generic hardware breakpoint interface provides an abstraction of
hardware breakpoints in front of specific arch implementations for both kernel
and user side breakpoints.
This includes execution breakpoints and read/write breakpoints, also known as
"watchpoints".

This patch introduces header files containing constants, structure definitions
and declaration of functions used by the hardware breakpoint core and x86
specific code.
It also introduces an array based storage for the debug-register values in
'struct thread_struct', while modifying all users of debugreg<n> member in the
structure.

[ Impact: add headers for new hardware breakpoint interface ]

Original-patch-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
Reviewed-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/asm-generic/hw_breakpoint.h | 139 ++++++++++++++++++++++++++++++++++++
 1 file changed, 139 insertions(+)
 create mode 100644 include/asm-generic/hw_breakpoint.h

(limited to 'include')
diff --git a/include/asm-generic/hw_breakpoint.h b/include/asm-generic/hw_breakpoint.h
new file mode 100644
index 000000000000..9bf2d12eb74a
--- /dev/null
+++ b/include/asm-generic/hw_breakpoint.h
@@ -0,0 +1,139 @@
+#ifndef	_ASM_GENERIC_HW_BREAKPOINT_H
+#define	_ASM_GENERIC_HW_BREAKPOINT_H
+
+#ifndef __ARCH_HW_BREAKPOINT_H
+#error "Please don't include this file directly"
+#endif
+
+#ifdef	__KERNEL__
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/kallsyms.h>
+
+/**
+ * struct hw_breakpoint - unified kernel/user-space hardware breakpoint
+ * @triggered: callback invoked after target address access
+ * @info: arch-specific breakpoint info (address, length, and type)
+ *
+ * %hw_breakpoint structures are the kernel's way of representing
+ * hardware breakpoints.  These are data breakpoints
+ * (also known as "watchpoints", triggered on data access), and the breakpoint's
+ * target address can be located in either kernel space or user space.
+ *
+ * The breakpoint's address, length, and type are highly
+ * architecture-specific.  The values are encoded in the @info field; you
+ * specify them when registering the breakpoint.  To examine the encoded
+ * values use hw_breakpoint_get_{kaddress,uaddress,len,type}(), declared
+ * below.
+ *
+ * The address is specified as a regular kernel pointer (for kernel-space
+ * breakponts) or as an %__user pointer (for user-space breakpoints).
+ * With register_user_hw_breakpoint(), the address must refer to a
+ * location in user space.  The breakpoint will be active only while the
+ * requested task is running.  Conversely with
+ * register_kernel_hw_breakpoint(), the address must refer to a location
+ * in kernel space, and the breakpoint will be active on all CPUs
+ * regardless of the current task.
+ *
+ * The length is the breakpoint's extent in bytes, which is subject to
+ * certain limitations.  include/asm/hw_breakpoint.h contains macros
+ * defining the available lengths for a specific architecture.  Note that
+ * the address's alignment must match the length.  The breakpoint will
+ * catch accesses to any byte in the range from address to address +
+ * (length - 1).
+ *
+ * The breakpoint's type indicates the sort of access that will cause it
+ * to trigger.  Possible values may include:
+ *
+ * 	%HW_BREAKPOINT_RW (triggered on read or write access),
+ * 	%HW_BREAKPOINT_WRITE (triggered on write access), and
+ * 	%HW_BREAKPOINT_READ (triggered on read access).
+ *
+ * Appropriate macros are defined in include/asm/hw_breakpoint.h; not all
+ * possibilities are available on all architectures.  Execute breakpoints
+ * must have length equal to the special value %HW_BREAKPOINT_LEN_EXECUTE.
+ *
+ * When a breakpoint gets hit, the @triggered callback is
+ * invoked in_interrupt with a pointer to the %hw_breakpoint structure and the
+ * processor registers.
+ * Data breakpoints occur after the memory access has taken place.
+ * Breakpoints are disabled during execution @triggered, to avoid
+ * recursive traps and allow unhindered access to breakpointed memory.
+ *
+ * This sample code sets a breakpoint on pid_max and registers a callback
+ * function for writes to that variable.  Note that it is not portable
+ * as written, because not all architectures support HW_BREAKPOINT_LEN_4.
+ *
+ * ----------------------------------------------------------------------
+ *
+ * #include <asm/hw_breakpoint.h>
+ *
+ * struct hw_breakpoint my_bp;
+ *
+ * static void my_triggered(struct hw_breakpoint *bp, struct pt_regs *regs)
+ * {
+ * 	printk(KERN_DEBUG "Inside triggered routine of breakpoint exception\n");
+ * 	dump_stack();
+ *  	.......<more debugging output>........
+ * }
+ *
+ * static struct hw_breakpoint my_bp;
+ *
+ * static int init_module(void)
+ * {
+ *	..........<do anything>............
+ *	my_bp.info.type = HW_BREAKPOINT_WRITE;
+ *	my_bp.info.len = HW_BREAKPOINT_LEN_4;
+ *
+ *	my_bp.installed = (void *)my_bp_installed;
+ *
+ *	rc = register_kernel_hw_breakpoint(&my_bp);
+ *	..........<do anything>............
+ * }
+ *
+ * static void cleanup_module(void)
+ * {
+ *	..........<do anything>............
+ *	unregister_kernel_hw_breakpoint(&my_bp);
+ *	..........<do anything>............
+ * }
+ *
+ * ----------------------------------------------------------------------
+ */
+struct hw_breakpoint {
+	void (*triggered)(struct hw_breakpoint *, struct pt_regs *);
+	struct arch_hw_breakpoint info;
+};
+
+/*
+ * len and type values are defined in include/asm/hw_breakpoint.h.
+ * Available values vary according to the architecture.  On i386 the
+ * possibilities are:
+ *
+ *	HW_BREAKPOINT_LEN_1
+ *	HW_BREAKPOINT_LEN_2
+ *	HW_BREAKPOINT_LEN_4
+ *	HW_BREAKPOINT_RW
+ *	HW_BREAKPOINT_READ
+ *
+ * On other architectures HW_BREAKPOINT_LEN_8 may be available, and the
+ * 1-, 2-, and 4-byte lengths may be unavailable.  There also may be
+ * HW_BREAKPOINT_WRITE.  You can use #ifdef to check at compile time.
+ */
+
+extern int register_user_hw_breakpoint(struct task_struct *tsk,
+					struct hw_breakpoint *bp);
+extern int modify_user_hw_breakpoint(struct task_struct *tsk,
+					struct hw_breakpoint *bp);
+extern void unregister_user_hw_breakpoint(struct task_struct *tsk,
+						struct hw_breakpoint *bp);
+/*
+ * Kernel breakpoints are not associated with any particular thread.
+ */
+extern int register_kernel_hw_breakpoint(struct hw_breakpoint *bp);
+extern void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp);
+
+extern unsigned int hbp_kernel_pos;
+
+#endif	/* __KERNEL__ */
+#endif	/* _ASM_GENERIC_HW_BREAKPOINT_H */
-- 
cgit v1.2.3-70-g09d2


From bd1a5c849bdcc5c89e4a6a18216cd2b9a7a8a78f Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Thu, 13 Aug 2009 16:34:53 -0400
Subject: tracing: Ftrace dynamic ftrace_event_call support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add dynamic ftrace_event_call support to ftrace. Trace engines can add
new ftrace_event_call to ftrace on the fly. Each operator function of
the call takes an ftrace_event_call data structure as an argument,
because these functions may be shared among several ftrace_event_calls.

Changes from v13:
 - Define remove_subsystem_dir() always (revirt a2ca5e03), because
   trace_remove_event_call() uses it.
 - Modify syscall tracer because of ftrace_event_call change.

[fweisbec@gmail.com: Fixed conflict against latest tracing/core]

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Frank Ch. Eigler <fche@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Jim Keniston <jkenisto@us.ibm.com>
Cc: K.Prasad <prasad@linux.vnet.ibm.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Przemysław Pawełczyk <przemyslaw@pawelczyk.it>
Cc: Roland McGrath <roland@redhat.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Vegard Nossum <vegard.nossum@gmail.com>
LKML-Reference: <20090813203453.31965.71901.stgit@localhost.localdomain>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/ftrace_event.h  |  19 +++----
 include/linux/syscalls.h      |   4 +-
 include/trace/ftrace.h        |  16 +++---
 include/trace/syscall.h       |  11 ++--
 kernel/trace/trace_events.c   | 121 +++++++++++++++++++++++++++++-------------
 kernel/trace/trace_export.c   |  18 +++----
 kernel/trace/trace_syscalls.c |  20 +++----
 7 files changed, 130 insertions(+), 79 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index ace2da9e0a0d..1ab3089b5c59 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -112,12 +112,12 @@ struct ftrace_event_call {
 	struct dentry		*dir;
 	struct trace_event	*event;
 	int			enabled;
-	int			(*regfunc)(void *);
-	void			(*unregfunc)(void *);
+	int			(*regfunc)(struct ftrace_event_call *);
+	void			(*unregfunc)(struct ftrace_event_call *);
 	int			id;
-	int			(*raw_init)(void);
-	int			(*show_format)(struct ftrace_event_call *call,
-					       struct trace_seq *s);
+	int			(*raw_init)(struct ftrace_event_call *);
+	int			(*show_format)(struct ftrace_event_call *,
+					       struct trace_seq *);
 	int			(*define_fields)(struct ftrace_event_call *);
 	struct list_head	fields;
 	int			filter_active;
@@ -147,11 +147,12 @@ enum {
 	FILTER_PTR_STRING,
 };
 
-extern int trace_define_field(struct ftrace_event_call *call,
-			      const char *type, const char *name,
-			      int offset, int size, int is_signed,
-			      int filter_type);
 extern int trace_define_common_fields(struct ftrace_event_call *call);
+extern int trace_define_field(struct ftrace_event_call *call, char *type,
+			      char *name, int offset, int size, int is_signed,
+			      int filter_type);
+extern int trace_add_event_call(struct ftrace_event_call *call);
+extern void trace_remove_event_call(struct ftrace_event_call *call);
 
 #define is_signed_type(type)	(((type)(-1)) < 0)
 
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index f124c8995555..646102eeff92 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -165,7 +165,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \
 	struct trace_event enter_syscall_print_##sname = {		\
 		.trace                  = print_syscall_enter,		\
 	};								\
-	static int init_enter_##sname(void)				\
+	static int init_enter_##sname(struct ftrace_event_call *call)	\
 	{								\
 		int num, id;						\
 		num = syscall_name_to_nr("sys"#sname);			\
@@ -202,7 +202,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \
 	struct trace_event exit_syscall_print_##sname = {		\
 		.trace                  = print_syscall_exit,		\
 	};								\
-	static int init_exit_##sname(void)				\
+	static int init_exit_##sname(struct ftrace_event_call *call)	\
 	{								\
 		int num, id;						\
 		num = syscall_name_to_nr("sys"#sname);			\
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 360a77ad79e1..f2bd7a8f8e8b 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -434,7 +434,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
  *	event_trace_printk(_RET_IP_, "<call>: " <fmt>);
  * }
  *
- * static int ftrace_reg_event_<call>(void)
+ * static int ftrace_reg_event_<call>(struct ftrace_event_call *unused)
  * {
  *	int ret;
  *
@@ -445,7 +445,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
  *	return ret;
  * }
  *
- * static void ftrace_unreg_event_<call>(void)
+ * static void ftrace_unreg_event_<call>(struct ftrace_event_call *unused)
  * {
  *	unregister_trace_<call>(ftrace_event_<call>);
  * }
@@ -478,7 +478,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
  *	trace_current_buffer_unlock_commit(event, irq_flags, pc);
  * }
  *
- * static int ftrace_raw_reg_event_<call>(void)
+ * static int ftrace_raw_reg_event_<call>(struct ftrace_event_call *unused)
  * {
  *	int ret;
  *
@@ -489,7 +489,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
  *	return ret;
  * }
  *
- * static void ftrace_unreg_event_<call>(void)
+ * static void ftrace_unreg_event_<call>(struct ftrace_event_call *unused)
  * {
  *	unregister_trace_<call>(ftrace_raw_event_<call>);
  * }
@@ -498,7 +498,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
  *	.trace			= ftrace_raw_output_<call>, <-- stage 2
  * };
  *
- * static int ftrace_raw_init_event_<call>(void)
+ * static int ftrace_raw_init_event_<call>(struct ftrace_event_call *unused)
  * {
  *	int id;
  *
@@ -592,7 +592,7 @@ static void ftrace_raw_event_##call(proto)				\
 		trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \
 }									\
 									\
-static int ftrace_raw_reg_event_##call(void *ptr)			\
+static int ftrace_raw_reg_event_##call(struct ftrace_event_call *unused)\
 {									\
 	int ret;							\
 									\
@@ -603,7 +603,7 @@ static int ftrace_raw_reg_event_##call(void *ptr)			\
 	return ret;							\
 }									\
 									\
-static void ftrace_raw_unreg_event_##call(void *ptr)			\
+static void ftrace_raw_unreg_event_##call(struct ftrace_event_call *unused)\
 {									\
 	unregister_trace_##call(ftrace_raw_event_##call);		\
 }									\
@@ -612,7 +612,7 @@ static struct trace_event ftrace_event_type_##call = {			\
 	.trace			= ftrace_raw_output_##call,		\
 };									\
 									\
-static int ftrace_raw_init_event_##call(void)				\
+static int ftrace_raw_init_event_##call(struct ftrace_event_call *unused)\
 {									\
 	int id;								\
 									\
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 5dc283ba5ae0..e290b86f6167 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -39,16 +39,19 @@ void set_syscall_enter_id(int num, int id);
 void set_syscall_exit_id(int num, int id);
 extern struct trace_event event_syscall_enter;
 extern struct trace_event event_syscall_exit;
-extern int reg_event_syscall_enter(void *ptr);
-extern void unreg_event_syscall_enter(void *ptr);
-extern int reg_event_syscall_exit(void *ptr);
-extern void unreg_event_syscall_exit(void *ptr);
+
 extern int syscall_enter_format(struct ftrace_event_call *call,
 				struct trace_seq *s);
 extern int syscall_exit_format(struct ftrace_event_call *call,
 				struct trace_seq *s);
 extern int syscall_enter_define_fields(struct ftrace_event_call *call);
 extern int syscall_exit_define_fields(struct ftrace_event_call *call);
+extern int reg_event_syscall_enter(struct ftrace_event_call *call);
+extern void unreg_event_syscall_enter(struct ftrace_event_call *call);
+extern int reg_event_syscall_exit(struct ftrace_event_call *call);
+extern void unreg_event_syscall_exit(struct ftrace_event_call *call);
+extern int
+ftrace_format_syscall(struct ftrace_event_call *call, struct trace_seq *s);
 enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags);
 enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags);
 #endif
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index d33bcdeffe69..8079bb511c43 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -27,8 +27,8 @@ DEFINE_MUTEX(event_mutex);
 
 LIST_HEAD(ftrace_events);
 
-int trace_define_field(struct ftrace_event_call *call, const char *type,
-		       const char *name, int offset, int size, int is_signed,
+int trace_define_field(struct ftrace_event_call *call, char *type,
+		       char *name, int offset, int size, int is_signed,
 		       int filter_type)
 {
 	struct ftrace_event_field *field;
@@ -92,9 +92,7 @@ int trace_define_common_fields(struct ftrace_event_call *call)
 }
 EXPORT_SYMBOL_GPL(trace_define_common_fields);
 
-#ifdef CONFIG_MODULES
-
-static void trace_destroy_fields(struct ftrace_event_call *call)
+void trace_destroy_fields(struct ftrace_event_call *call)
 {
 	struct ftrace_event_field *field, *next;
 
@@ -106,8 +104,6 @@ static void trace_destroy_fields(struct ftrace_event_call *call)
 	}
 }
 
-#endif /* CONFIG_MODULES */
-
 static void ftrace_event_enable_disable(struct ftrace_event_call *call,
 					int enable)
 {
@@ -116,14 +112,14 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call,
 		if (call->enabled) {
 			call->enabled = 0;
 			tracing_stop_cmdline_record();
-			call->unregfunc(call->data);
+			call->unregfunc(call);
 		}
 		break;
 	case 1:
 		if (!call->enabled) {
 			call->enabled = 1;
 			tracing_start_cmdline_record();
-			call->regfunc(call->data);
+			call->regfunc(call);
 		}
 		break;
 	}
@@ -991,27 +987,43 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
 	return 0;
 }
 
-#define for_each_event(event, start, end)			\
-	for (event = start;					\
-	     (unsigned long)event < (unsigned long)end;		\
-	     event++)
+static int __trace_add_event_call(struct ftrace_event_call *call)
+{
+	struct dentry *d_events;
+	int ret;
 
-#ifdef CONFIG_MODULES
+	if (!call->name)
+		return -EINVAL;
 
-static LIST_HEAD(ftrace_module_file_list);
+	if (call->raw_init) {
+		ret = call->raw_init(call);
+		if (ret < 0) {
+			if (ret != -ENOSYS)
+				pr_warning("Could not initialize trace "
+				"events/%s\n", call->name);
+			return ret;
+		}
+	}
 
-/*
- * Modules must own their file_operations to keep up with
- * reference counting.
- */
-struct ftrace_module_file_ops {
-	struct list_head		list;
-	struct module			*mod;
-	struct file_operations		id;
-	struct file_operations		enable;
-	struct file_operations		format;
-	struct file_operations		filter;
-};
+	d_events = event_trace_events_dir();
+	if (!d_events)
+		return -ENOENT;
+
+	list_add(&call->list, &ftrace_events);
+	return event_create_dir(call, d_events, &ftrace_event_id_fops,
+				&ftrace_enable_fops, &ftrace_event_filter_fops,
+				&ftrace_event_format_fops);
+}
+
+/* Add an additional event_call dynamically */
+int trace_add_event_call(struct ftrace_event_call *call)
+{
+	int ret;
+	mutex_lock(&event_mutex);
+	ret = __trace_add_event_call(call);
+	mutex_unlock(&event_mutex);
+	return ret;
+}
 
 static void remove_subsystem_dir(const char *name)
 {
@@ -1039,6 +1051,48 @@ static void remove_subsystem_dir(const char *name)
 	}
 }
 
+static void __trace_remove_event_call(struct ftrace_event_call *call)
+{
+	ftrace_event_enable_disable(call, 0);
+	if (call->event)
+		__unregister_ftrace_event(call->event);
+	debugfs_remove_recursive(call->dir);
+	list_del(&call->list);
+	trace_destroy_fields(call);
+	destroy_preds(call);
+	remove_subsystem_dir(call->system);
+}
+
+/* Remove an event_call */
+void trace_remove_event_call(struct ftrace_event_call *call)
+{
+	mutex_lock(&event_mutex);
+	__trace_remove_event_call(call);
+	mutex_unlock(&event_mutex);
+}
+
+#define for_each_event(event, start, end)			\
+	for (event = start;					\
+	     (unsigned long)event < (unsigned long)end;		\
+	     event++)
+
+#ifdef CONFIG_MODULES
+
+static LIST_HEAD(ftrace_module_file_list);
+
+/*
+ * Modules must own their file_operations to keep up with
+ * reference counting.
+ */
+struct ftrace_module_file_ops {
+	struct list_head		list;
+	struct module			*mod;
+	struct file_operations		id;
+	struct file_operations		enable;
+	struct file_operations		format;
+	struct file_operations		filter;
+};
+
 static struct ftrace_module_file_ops *
 trace_create_file_ops(struct module *mod)
 {
@@ -1096,7 +1150,7 @@ static void trace_module_add_events(struct module *mod)
 		if (!call->name)
 			continue;
 		if (call->raw_init) {
-			ret = call->raw_init();
+			ret = call->raw_init(call);
 			if (ret < 0) {
 				if (ret != -ENOSYS)
 					pr_warning("Could not initialize trace "
@@ -1131,14 +1185,7 @@ static void trace_module_remove_events(struct module *mod)
 	list_for_each_entry_safe(call, p, &ftrace_events, list) {
 		if (call->mod == mod) {
 			found = true;
-			ftrace_event_enable_disable(call, 0);
-			if (call->event)
-				__unregister_ftrace_event(call->event);
-			debugfs_remove_recursive(call->dir);
-			list_del(&call->list);
-			trace_destroy_fields(call);
-			destroy_preds(call);
-			remove_subsystem_dir(call->system);
+			__trace_remove_event_call(call);
 		}
 	}
 
@@ -1256,7 +1303,7 @@ static __init int event_trace_init(void)
 		if (!call->name)
 			continue;
 		if (call->raw_init) {
-			ret = call->raw_init();
+			ret = call->raw_init(call);
 			if (ret < 0) {
 				if (ret != -ENOSYS)
 					pr_warning("Could not initialize trace "
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 029a91f42287..9cbe7f1930ea 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -117,10 +117,16 @@ ftrace_format_##call(struct ftrace_event_call *unused,			\
 #define TRACE_FIELD_SPECIAL(type_item, item, len, cmd)	\
 	cmd;
 
+static int ftrace_raw_init_event(struct ftrace_event_call *event_call)
+{
+	INIT_LIST_HEAD(&event_call->fields);
+	init_preds(event_call);
+	return 0;
+}
+
 #undef TRACE_EVENT_FORMAT
 #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt)	\
 int ftrace_define_fields_##call(struct ftrace_event_call *event_call);	\
-static int ftrace_raw_init_event_##call(void);				\
 									\
 struct ftrace_event_call __used						\
 __attribute__((__aligned__(4)))						\
@@ -128,16 +134,10 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 	.name			= #call,				\
 	.id			= proto,				\
 	.system			= __stringify(TRACE_SYSTEM),		\
-	.raw_init		= ftrace_raw_init_event_##call,		\
+	.raw_init		= ftrace_raw_init_event,		\
 	.show_format		= ftrace_format_##call,			\
 	.define_fields		= ftrace_define_fields_##call,		\
-};									\
-static int ftrace_raw_init_event_##call(void)				\
-{									\
-	INIT_LIST_HEAD(&event_##call.fields);				\
-	init_preds(&event_##call);					\
-	return 0;							\
-}									\
+};
 
 #undef TRACE_EVENT_FORMAT_NOFILTER
 #define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct,	\
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 85291c4de406..5931933587e9 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -193,8 +193,8 @@ int syscall_enter_define_fields(struct ftrace_event_call *call)
 		return ret;
 
 	for (i = 0; i < meta->nb_args; i++) {
-		ret = trace_define_field(call, meta->types[i],
-					 meta->args[i], offset,
+		ret = trace_define_field(call, (char *)meta->types[i],
+					 (char *)meta->args[i], offset,
 					 sizeof(unsigned long), 0,
 					 FILTER_OTHER);
 		offset += sizeof(unsigned long);
@@ -277,13 +277,13 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
 		trace_current_buffer_unlock_commit(event, 0, 0);
 }
 
-int reg_event_syscall_enter(void *ptr)
+int reg_event_syscall_enter(struct ftrace_event_call *call)
 {
 	int ret = 0;
 	int num;
 	char *name;
 
-	name = (char *)ptr;
+	name = (char *)call->data;
 	num = syscall_name_to_nr(name);
 	if (num < 0 || num >= FTRACE_SYSCALL_MAX)
 		return -ENOSYS;
@@ -301,12 +301,12 @@ int reg_event_syscall_enter(void *ptr)
 	return ret;
 }
 
-void unreg_event_syscall_enter(void *ptr)
+void unreg_event_syscall_enter(struct ftrace_event_call *call)
 {
 	int num;
 	char *name;
 
-	name = (char *)ptr;
+	name = (char *)call->data;
 	num = syscall_name_to_nr(name);
 	if (num < 0 || num >= FTRACE_SYSCALL_MAX)
 		return;
@@ -318,13 +318,13 @@ void unreg_event_syscall_enter(void *ptr)
 	mutex_unlock(&syscall_trace_lock);
 }
 
-int reg_event_syscall_exit(void *ptr)
+int reg_event_syscall_exit(struct ftrace_event_call *call)
 {
 	int ret = 0;
 	int num;
 	char *name;
 
-	name = (char *)ptr;
+	name = (char *)call->data;
 	num = syscall_name_to_nr(name);
 	if (num < 0 || num >= FTRACE_SYSCALL_MAX)
 		return -ENOSYS;
@@ -342,12 +342,12 @@ int reg_event_syscall_exit(void *ptr)
 	return ret;
 }
 
-void unreg_event_syscall_exit(void *ptr)
+void unreg_event_syscall_exit(struct ftrace_event_call *call)
 {
 	int num;
 	char *name;
 
-	name = (char *)ptr;
+	name = (char *)call->data;
 	num = syscall_name_to_nr(name);
 	if (num < 0 || num >= FTRACE_SYSCALL_MAX)
 		return;
-- 
cgit v1.2.3-70-g09d2


From 24851d2447830e6cba4c4b641cb73e713f312373 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 26 Aug 2009 23:38:30 +0200
Subject: tracing/kprobes: Dump the culprit kprobe in case of kprobe recursion

Kprobes can enter into a probing recursion, ie: a kprobe that does an
endless loop because one of its core mechanism function used during
probing is also probed itself.

This patch helps pinpointing the kprobe that raised such recursion
by dumping it and raising a BUG instead of a warning (we also disarm
the kprobe to try avoiding recursion in BUG itself). Having a BUG
instead of a warning stops the stacktrace in the right place and
doesn't pollute the logs with hundreds of traces that eventually end
up in a stack overflow.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
---
 arch/x86/kernel/kprobes.c | 8 ++++++--
 include/linux/kprobes.h   | 2 ++
 kernel/kprobes.c          | 7 +++++++
 3 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 16ae9610f6ff..ecee3d23fef8 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -490,9 +490,13 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
 			/* A probe has been hit in the codepath leading up
 			 * to, or just after, single-stepping of a probed
 			 * instruction. This entire codepath should strictly
-			 * reside in .kprobes.text section. Raise a warning
-			 * to highlight this peculiar case.
+			 * reside in .kprobes.text section.
+			 * Raise a BUG or we'll continue in an endless
+			 * reentering loop and eventually a stack overflow.
 			 */
+			arch_disarm_kprobe(p);
+			dump_kprobe(p);
+			BUG();
 		}
 	default:
 		/* impossible cases */
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index bcd9c07848be..87eb79c9dd60 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -296,6 +296,8 @@ void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head);
 int disable_kprobe(struct kprobe *kp);
 int enable_kprobe(struct kprobe *kp);
 
+void dump_kprobe(struct kprobe *kp);
+
 #else /* !CONFIG_KPROBES: */
 
 static inline int kprobes_built_in(void)
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index ef177d653b2c..f72e96c25a38 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1141,6 +1141,13 @@ static void __kprobes kill_kprobe(struct kprobe *p)
 	arch_remove_kprobe(p);
 }
 
+void __kprobes dump_kprobe(struct kprobe *kp)
+{
+	printk(KERN_WARNING "Dumping kprobe:\n");
+	printk(KERN_WARNING "Name: %s\nAddress: %p\nOffset: %x\n",
+	       kp->symbol_name, kp->addr, kp->offset);
+}
+
 /* Module notifier call back, checking kprobes on the module */
 static int __kprobes kprobes_module_callback(struct notifier_block *nb,
 					     unsigned long val, void *data)
-- 
cgit v1.2.3-70-g09d2


From aeaeae1187d7520f1c5559623f0a149da6a1c96e Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 27 Aug 2009 05:09:51 +0200
Subject: tracing: Restore the const qualifier for field names and types
 definition

Restore the const qualifier in field's name and type parameters of
trace_define_field that was lost while solving a conflict.

Fields names and types are defined as builtin constant strings in
static TRACE_EVENTs. But kprobes allocates these dynamically.

That said, we still want to always pass these strings as const char *
in trace_define_fields() to avoid any further accidental writes on
the pointed strings.

Reported-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h  | 6 +++---
 kernel/trace/trace_events.c   | 4 ++--
 kernel/trace/trace_syscalls.c | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 1ab3089b5c59..73edf5a52e31 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -148,9 +148,9 @@ enum {
 };
 
 extern int trace_define_common_fields(struct ftrace_event_call *call);
-extern int trace_define_field(struct ftrace_event_call *call, char *type,
-			      char *name, int offset, int size, int is_signed,
-			      int filter_type);
+extern int trace_define_field(struct ftrace_event_call *call, const char *type,
+			      const char *name, int offset, int size,
+			      int is_signed, int filter_type);
 extern int trace_add_event_call(struct ftrace_event_call *call);
 extern void trace_remove_event_call(struct ftrace_event_call *call);
 
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 8079bb511c43..197cdaa96c43 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -27,8 +27,8 @@ DEFINE_MUTEX(event_mutex);
 
 LIST_HEAD(ftrace_events);
 
-int trace_define_field(struct ftrace_event_call *call, char *type,
-		       char *name, int offset, int size, int is_signed,
+int trace_define_field(struct ftrace_event_call *call, const char *type,
+		       const char *name, int offset, int size, int is_signed,
 		       int filter_type)
 {
 	struct ftrace_event_field *field;
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 5931933587e9..a928dd004535 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -193,8 +193,8 @@ int syscall_enter_define_fields(struct ftrace_event_call *call)
 		return ret;
 
 	for (i = 0; i < meta->nb_args; i++) {
-		ret = trace_define_field(call, (char *)meta->types[i],
-					 (char *)meta->args[i], offset,
+		ret = trace_define_field(call, meta->types[i],
+					 meta->args[i], offset,
 					 sizeof(unsigned long), 0,
 					 FILTER_OTHER);
 		offset += sizeof(unsigned long);
-- 
cgit v1.2.3-70-g09d2


From 979f693def9084a452846365dfde5dcb28366333 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 22 Sep 2009 14:44:11 +0200
Subject: ratelimit: Use per ratelimit context locking

I'd like to use printk_ratelimit() in atomic context, but that's
not possible right now due to the spinlock usage this commit
introduced more than a year ago:

  717115e: printk ratelimiting rewrite

As a first step push the lock into the ratelimit state structure.
This allows us to deal with locking failures to be considered as an
event related to that state being too busy.

Also clean up the code a bit (without changing functionality):

 - tidy up the definitions

 - clean up the code flow

This also shrinks the code a tiny bit:

   text	   data	    bss	    dec	    hex	filename
    264	      0	      4	    268	    10c	ratelimit.o.before
    255	      0	      0	    255	     ff	ratelimit.o.after

( Whole-kernel data size got a bit larger, because we have
  two ratelimit-state data structures right now. )

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: David S. Miller <davem@davemloft.net>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ratelimit.h | 30 ++++++++++++++++++++----------
 lib/ratelimit.c           | 29 +++++++++++++----------------
 2 files changed, 33 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h
index 00044b856453..187bc16c1f15 100644
--- a/include/linux/ratelimit.h
+++ b/include/linux/ratelimit.h
@@ -1,20 +1,30 @@
 #ifndef _LINUX_RATELIMIT_H
 #define _LINUX_RATELIMIT_H
+
 #include <linux/param.h>
+#include <linux/spinlock_types.h>
 
-#define DEFAULT_RATELIMIT_INTERVAL (5 * HZ)
-#define DEFAULT_RATELIMIT_BURST 10
+#define DEFAULT_RATELIMIT_INTERVAL	(5 * HZ)
+#define DEFAULT_RATELIMIT_BURST		10
 
 struct ratelimit_state {
-	int interval;
-	int burst;
-	int printed;
-	int missed;
-	unsigned long begin;
+	spinlock_t	lock;		/* protect the state */
+
+	int		interval;
+	int		burst;
+	int		printed;
+	int		missed;
+	unsigned long	begin;
 };
 
-#define DEFINE_RATELIMIT_STATE(name, interval, burst)		\
-		struct ratelimit_state name = {interval, burst,}
+#define DEFINE_RATELIMIT_STATE(name, interval_init, burst_init)		\
+									\
+	struct ratelimit_state name = {					\
+		.lock		= __SPIN_LOCK_UNLOCKED(name.lock),	\
+		.interval	= interval_init,			\
+		.burst		= burst_init,				\
+	}
 
 extern int __ratelimit(struct ratelimit_state *rs);
-#endif
+
+#endif /* _LINUX_RATELIMIT_H */
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index 26187edcc7ea..0e2c28e8a0ca 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -7,15 +7,12 @@
  * parameter. Now every user can use their own standalone ratelimit_state.
  *
  * This file is released under the GPLv2.
- *
  */
 
 #include <linux/kernel.h>
 #include <linux/jiffies.h>
 #include <linux/module.h>
 
-static DEFINE_SPINLOCK(ratelimit_lock);
-
 /*
  * __ratelimit - rate limiting
  * @rs: ratelimit_state data
@@ -26,11 +23,12 @@ static DEFINE_SPINLOCK(ratelimit_lock);
 int __ratelimit(struct ratelimit_state *rs)
 {
 	unsigned long flags;
+	int ret;
 
 	if (!rs->interval)
 		return 1;
 
-	spin_lock_irqsave(&ratelimit_lock, flags);
+	spin_lock_irqsave(&rs->lock, flags);
 	if (!rs->begin)
 		rs->begin = jiffies;
 
@@ -38,20 +36,19 @@ int __ratelimit(struct ratelimit_state *rs)
 		if (rs->missed)
 			printk(KERN_WARNING "%s: %d callbacks suppressed\n",
 				__func__, rs->missed);
-		rs->begin = 0;
+		rs->begin   = 0;
 		rs->printed = 0;
-		rs->missed = 0;
+		rs->missed  = 0;
 	}
-	if (rs->burst && rs->burst > rs->printed)
-		goto print;
-
-	rs->missed++;
-	spin_unlock_irqrestore(&ratelimit_lock, flags);
-	return 0;
+	if (rs->burst && rs->burst > rs->printed) {
+		rs->printed++;
+		ret = 1;
+	} else {
+		rs->missed++;
+		ret = 0;
+	}
+	spin_unlock_irqrestore(&rs->lock, flags);
 
-print:
-	rs->printed++;
-	spin_unlock_irqrestore(&ratelimit_lock, flags);
-	return 1;
+	return ret;
 }
 EXPORT_SYMBOL(__ratelimit);
-- 
cgit v1.2.3-70-g09d2


From 3fff4c42bd0a89869a0eb1e7874cc06ffa4aa0f5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 22 Sep 2009 16:18:09 +0200
Subject: printk: Remove ratelimit.h from kernel.h

Decouple kernel.h from ratelimit.h: the global declaration of
printk's ratelimit_state is not needed, and it leads to messy
circular dependencies due to ratelimit.h's (new) adding of a
spinlock_types.h include.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: David S. Miller <davem@davemloft.net>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/kernel.h     | 2 --
 include/linux/net.h        | 1 +
 kernel/printk.c            | 1 +
 kernel/sysctl.c            | 3 +++
 lib/ratelimit.c            | 2 +-
 net/core/sysctl_net_core.c | 2 ++
 net/core/utils.c           | 2 ++
 7 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 2b5b1e0899a8..3305f33201be 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -15,7 +15,6 @@
 #include <linux/bitops.h>
 #include <linux/log2.h>
 #include <linux/typecheck.h>
-#include <linux/ratelimit.h>
 #include <linux/dynamic_debug.h>
 #include <asm/byteorder.h>
 #include <asm/bug.h>
@@ -241,7 +240,6 @@ asmlinkage int vprintk(const char *fmt, va_list args)
 asmlinkage int printk(const char * fmt, ...)
 	__attribute__ ((format (printf, 1, 2))) __cold;
 
-extern struct ratelimit_state printk_ratelimit_state;
 extern int printk_ratelimit(void);
 extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
 				   unsigned int interval_msec);
diff --git a/include/linux/net.h b/include/linux/net.h
index 9040a10584f7..df20f680f455 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -358,6 +358,7 @@ static const struct proto_ops name##_ops = {			\
 
 #ifdef CONFIG_SYSCTL
 #include <linux/sysctl.h>
+#include <linux/ratelimit.h>
 extern struct ratelimit_state net_ratelimit_state;
 #endif
 
diff --git a/kernel/printk.c b/kernel/printk.c
index 602033acd6c7..b997c893cdcf 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -33,6 +33,7 @@
 #include <linux/bootmem.h>
 #include <linux/syscalls.h>
 #include <linux/kexec.h>
+#include <linux/ratelimit.h>
 
 #include <asm/uaccess.h>
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 1a631ba684a4..6c37048b9db9 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -37,6 +37,7 @@
 #include <linux/sysrq.h>
 #include <linux/highuid.h>
 #include <linux/writeback.h>
+#include <linux/ratelimit.h>
 #include <linux/hugetlb.h>
 #include <linux/initrd.h>
 #include <linux/key.h>
@@ -155,6 +156,8 @@ extern int no_unaligned_warning;
 extern int unaligned_dump_stack;
 #endif
 
+extern struct ratelimit_state printk_ratelimit_state;
+
 #ifdef CONFIG_RT_MUTEXES
 extern int max_lock_depth;
 #endif
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index 69bfcacda16d..5551731ae1d4 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -9,7 +9,7 @@
  * This file is released under the GPLv2.
  */
 
-#include <linux/kernel.h>
+#include <linux/ratelimit.h>
 #include <linux/jiffies.h>
 #include <linux/module.h>
 
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 7db1de0497c6..887c03c4e3c6 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -10,7 +10,9 @@
 #include <linux/module.h>
 #include <linux/socket.h>
 #include <linux/netdevice.h>
+#include <linux/ratelimit.h>
 #include <linux/init.h>
+
 #include <net/ip.h>
 #include <net/sock.h>
 
diff --git a/net/core/utils.c b/net/core/utils.c
index 83221aee7084..838250241d26 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -24,6 +24,8 @@
 #include <linux/types.h>
 #include <linux/percpu.h>
 #include <linux/init.h>
+#include <linux/ratelimit.h>
+
 #include <net/sock.h>
 
 #include <asm/byteorder.h>
-- 
cgit v1.2.3-70-g09d2


From 96a2c464de07d7c72988db851c029b204fc59108 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 1 Aug 2009 01:34:24 +0200
Subject: tracing/bkl: Add bkl ftrace events

Add two events lock_kernel and unlock_kernel() to trace the bkl uses.
This opens the door for userspace tools to perform statistics about
the callsites that use it, dependencies with other locks (by pairing
the trace with lock events), use with recursivity and so on...

The {__reacquire,release}_kernel_lock() events are not traced because
these are called from schedule, thus the sched events are sufficient
to trace them.

Example of a trace:

hald-addon-stor-4152  [000]   165.875501: unlock_kernel: depth: 0, fs/block_dev.c:1358 __blkdev_put()
hald-addon-stor-4152  [000]   167.832974: lock_kernel: depth: 0, fs/block_dev.c:1167 __blkdev_get()

How to get the callsites that acquire it recursively:

cd /debug/tracing/events/bkl
echo "lock_depth > 0" > filter

firefox-4951  [001]   206.276967: unlock_kernel: depth: 1, fs/reiserfs/super.c:575 reiserfs_dirty_inode()

You can also filter by file and/or line.

v2: Use of FILTER_PTR_STRING attribute for files and lines fields to
    make them traceable.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
---
 include/linux/smp_lock.h   | 19 ++++++++++++---
 include/trace/events/bkl.h | 61 ++++++++++++++++++++++++++++++++++++++++++++++
 lib/kernel_lock.c          | 11 +++++----
 3 files changed, 82 insertions(+), 9 deletions(-)
 create mode 100644 include/trace/events/bkl.h

(limited to 'include')

diff --git a/include/linux/smp_lock.h b/include/linux/smp_lock.h
index 813be59bf345..d48cc77ba70d 100644
--- a/include/linux/smp_lock.h
+++ b/include/linux/smp_lock.h
@@ -3,6 +3,7 @@
 
 #ifdef CONFIG_LOCK_KERNEL
 #include <linux/sched.h>
+#include <trace/events/bkl.h>
 
 #define kernel_locked()		(current->lock_depth >= 0)
 
@@ -24,8 +25,18 @@ static inline int reacquire_kernel_lock(struct task_struct *task)
 	return 0;
 }
 
-extern void __lockfunc lock_kernel(void)	__acquires(kernel_lock);
-extern void __lockfunc unlock_kernel(void)	__releases(kernel_lock);
+extern void __lockfunc _lock_kernel(void)	__acquires(kernel_lock);
+extern void __lockfunc _unlock_kernel(void)	__releases(kernel_lock);
+
+#define lock_kernel()	{					\
+	trace_lock_kernel(__func__, __FILE__, __LINE__);	\
+	_lock_kernel();						\
+}
+
+#define unlock_kernel()	{					\
+	trace_unlock_kernel(__func__, __FILE__, __LINE__);	\
+	_unlock_kernel();					\
+}
 
 /*
  * Various legacy drivers don't really need the BKL in a specific
@@ -41,8 +52,8 @@ static inline void cycle_kernel_lock(void)
 
 #else
 
-#define lock_kernel()				do { } while(0)
-#define unlock_kernel()				do { } while(0)
+#define lock_kernel()	   trace_lock_kernel(__func__, __FILE__, __LINE__);
+#define unlock_kernel()    trace_unlock_kernel(__func__, __FILE__, __LINE__);
 #define release_kernel_lock(task)		do { } while(0)
 #define cycle_kernel_lock()			do { } while(0)
 #define reacquire_kernel_lock(task)		0
diff --git a/include/trace/events/bkl.h b/include/trace/events/bkl.h
new file mode 100644
index 000000000000..8abd620a490e
--- /dev/null
+++ b/include/trace/events/bkl.h
@@ -0,0 +1,61 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM bkl
+
+#if !defined(_TRACE_BKL_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_BKL_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(lock_kernel,
+
+	TP_PROTO(const char *func, const char *file, int line),
+
+	TP_ARGS(func, file, line),
+
+	TP_STRUCT__entry(
+		__field(	int,		lock_depth		)
+		__field_ext(	const char *,	func, FILTER_PTR_STRING	)
+		__field_ext(	const char *,	file, FILTER_PTR_STRING	)
+		__field(	int,		line			)
+	),
+
+	TP_fast_assign(
+		/* We want to record the lock_depth after lock is acquired */
+		__entry->lock_depth = current->lock_depth + 1;
+		__entry->func = func;
+		__entry->file = file;
+		__entry->line = line;
+	),
+
+	TP_printk("depth: %d, %s:%d %s()", __entry->lock_depth,
+		  __entry->file, __entry->line, __entry->func)
+);
+
+TRACE_EVENT(unlock_kernel,
+
+	TP_PROTO(const char *func, const char *file, int line),
+
+	TP_ARGS(func, file, line),
+
+	TP_STRUCT__entry(
+		__field(int,		lock_depth)
+		__field(const char *,	func)
+		__field(const char *,	file)
+		__field(int,		line)
+	),
+
+	TP_fast_assign(
+		__entry->lock_depth = current->lock_depth;
+		__entry->func = func;
+		__entry->file = file;
+		__entry->line = line;
+	),
+
+	TP_printk("depth: %d, %s:%d %s()", __entry->lock_depth,
+		  __entry->file, __entry->line, __entry->func)
+);
+
+#endif /* _TRACE_BKL_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c
index 39f1029e3525..5c10b2e1fd08 100644
--- a/lib/kernel_lock.c
+++ b/lib/kernel_lock.c
@@ -5,10 +5,11 @@
  * relegated to obsolescence, but used by various less
  * important (or lazy) subsystems.
  */
-#include <linux/smp_lock.h>
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/semaphore.h>
+#define CREATE_TRACE_POINTS
+#include <linux/smp_lock.h>
 
 /*
  * The 'big kernel lock'
@@ -113,7 +114,7 @@ static inline void __unlock_kernel(void)
  * This cannot happen asynchronously, so we only need to
  * worry about other CPU's.
  */
-void __lockfunc lock_kernel(void)
+void __lockfunc _lock_kernel(void)
 {
 	int depth = current->lock_depth+1;
 	if (likely(!depth))
@@ -121,13 +122,13 @@ void __lockfunc lock_kernel(void)
 	current->lock_depth = depth;
 }
 
-void __lockfunc unlock_kernel(void)
+void __lockfunc _unlock_kernel(void)
 {
 	BUG_ON(current->lock_depth < 0);
 	if (likely(--current->lock_depth < 0))
 		__unlock_kernel();
 }
 
-EXPORT_SYMBOL(lock_kernel);
-EXPORT_SYMBOL(unlock_kernel);
+EXPORT_SYMBOL(_lock_kernel);
+EXPORT_SYMBOL(_unlock_kernel);
 
-- 
cgit v1.2.3-70-g09d2


From 9f0cf4adb6aa0bfccf675c938124e68f7f06349d Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Sat, 26 Sep 2009 14:33:01 +0200
Subject: x86: Use __builtin_object_size() to validate the buffer size for
 copy_from_user()

gcc (4.x) supports the __builtin_object_size() builtin, which
reports the size of an object that a pointer point to, when known
at compile time. If the buffer size is not known at compile time, a
constant -1 is returned.

This patch uses this feature to add a sanity check to
copy_from_user(); if the target buffer is known to be smaller than
the copy size, the copy is aborted and a WARNing is emitted in
memory debug mode.

These extra checks compile away when the object size is not known,
or if both the buffer size and the copy length are constants.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
LKML-Reference: <20090926143301.2c396b94@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/uaccess_32.h | 19 ++++++++++++++++++-
 arch/x86/include/asm/uaccess_64.h | 19 ++++++++++++++++++-
 arch/x86/kernel/x8664_ksyms_64.c  |  2 +-
 arch/x86/lib/copy_user_64.S       |  4 ++--
 arch/x86/lib/usercopy_32.c        |  4 ++--
 include/linux/compiler-gcc4.h     |  2 ++
 include/linux/compiler.h          |  4 ++++
 7 files changed, 47 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 632fb44b4cb5..582d6aef7417 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -187,9 +187,26 @@ __copy_from_user_inatomic_nocache(void *to, const void __user *from,
 
 unsigned long __must_check copy_to_user(void __user *to,
 					const void *from, unsigned long n);
-unsigned long __must_check copy_from_user(void *to,
+unsigned long __must_check _copy_from_user(void *to,
 					  const void __user *from,
 					  unsigned long n);
+
+static inline unsigned long __must_check copy_from_user(void *to,
+					  const void __user *from,
+					  unsigned long n)
+{
+	int sz = __compiletime_object_size(to);
+	int ret = -EFAULT;
+
+	if (likely(sz == -1 || sz >= n))
+		ret = _copy_from_user(to, from, n);
+#ifdef CONFIG_DEBUG_VM
+	else
+		WARN(1, "Buffer overflow detected!\n");
+#endif
+	return ret;
+}
+
 long __must_check strncpy_from_user(char *dst, const char __user *src,
 				    long count);
 long __must_check __strncpy_from_user(char *dst,
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index db24b215fc50..ce6fec7ce38d 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -21,10 +21,27 @@ copy_user_generic(void *to, const void *from, unsigned len);
 __must_check unsigned long
 copy_to_user(void __user *to, const void *from, unsigned len);
 __must_check unsigned long
-copy_from_user(void *to, const void __user *from, unsigned len);
+_copy_from_user(void *to, const void __user *from, unsigned len);
 __must_check unsigned long
 copy_in_user(void __user *to, const void __user *from, unsigned len);
 
+static inline unsigned long __must_check copy_from_user(void *to,
+					  const void __user *from,
+					  unsigned long n)
+{
+	int sz = __compiletime_object_size(to);
+	int ret = -EFAULT;
+
+	if (likely(sz == -1 || sz >= n))
+		ret = _copy_from_user(to, from, n);
+#ifdef CONFIG_DEBUG_VM
+	else
+		WARN(1, "Buffer overflow detected!\n");
+#endif
+	return ret;
+}
+
+
 static __always_inline __must_check
 int __copy_from_user(void *dst, const void __user *src, unsigned size)
 {
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 3909e3ba5ce3..a0cdd8cc1d67 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -30,7 +30,7 @@ EXPORT_SYMBOL(__put_user_8);
 
 EXPORT_SYMBOL(copy_user_generic);
 EXPORT_SYMBOL(__copy_user_nocache);
-EXPORT_SYMBOL(copy_from_user);
+EXPORT_SYMBOL(_copy_from_user);
 EXPORT_SYMBOL(copy_to_user);
 EXPORT_SYMBOL(__copy_from_user_inatomic);
 
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 6ba0f7bb85ea..4be3c415b3e9 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -78,7 +78,7 @@ ENTRY(copy_to_user)
 ENDPROC(copy_to_user)
 
 /* Standard copy_from_user with segment limit checking */
-ENTRY(copy_from_user)
+ENTRY(_copy_from_user)
 	CFI_STARTPROC
 	GET_THREAD_INFO(%rax)
 	movq %rsi,%rcx
@@ -88,7 +88,7 @@ ENTRY(copy_from_user)
 	jae bad_from_user
 	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
 	CFI_ENDPROC
-ENDPROC(copy_from_user)
+ENDPROC(_copy_from_user)
 
 ENTRY(copy_user_generic)
 	CFI_STARTPROC
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 1f118d462acc..8498684e45b0 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -874,7 +874,7 @@ EXPORT_SYMBOL(copy_to_user);
  * data to the requested size using zero bytes.
  */
 unsigned long
-copy_from_user(void *to, const void __user *from, unsigned long n)
+_copy_from_user(void *to, const void __user *from, unsigned long n)
 {
 	if (access_ok(VERIFY_READ, from, n))
 		n = __copy_from_user(to, from, n);
@@ -882,4 +882,4 @@ copy_from_user(void *to, const void __user *from, unsigned long n)
 		memset(to, 0, n);
 	return n;
 }
-EXPORT_SYMBOL(copy_from_user);
+EXPORT_SYMBOL(_copy_from_user);
diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index 450fa597c94d..a3aef5d55dba 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -37,3 +37,5 @@
 #define __cold			__attribute__((__cold__))
 
 #endif
+
+#define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 04fb5135b4e1..8e54108688f9 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -266,6 +266,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 # define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
 #endif
 
+/* Compile time object size, -1 for unknown */
+#ifndef __compiletime_object_size
+# define __compiletime_object_size(obj) -1
+#endif
 /*
  * Prevent the compiler from merging or refetching accesses.  The compiler
  * is also forbidden from reordering successive instances of ACCESS_ONCE(),
-- 
cgit v1.2.3-70-g09d2


From 925936ebf35a95c290e010b784c962164e6728f3 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 28 Sep 2009 17:12:49 +0200
Subject: tracing: Pushdown the bkl tracepoints calls

Currently we are calling the bkl tracepoint callbacks just before the
bkl lock/unlock operations, ie the tracepoint call is not inside a
lock_kernel() function but inside a lock_kernel() macro. Hence the
bkl trace event header must be included from smp_lock.h. This raises
some nasty circular header dependencies:

linux/smp_lock.h -> trace/events/bkl.h -> trace/define_trace.h
-> trace/ftrace.h -> linux/ftrace_event.h -> linux/hardirq.h
-> linux/smp_lock.h

This results in incomplete event declarations, spurious event
definitions and other kind of funny behaviours.

This is hardly fixable without ugly workarounds. So instead, we push
the file name, line number and function name as lock_kernel()
parameters, so that we only deal with the trace event header from
lib/kernel_lock.c

This adds two parameters to lock_kernel() and unlock_kernel() but
it should be fine wrt to performances because this pair dos not seem
to be called in fast paths.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Li Zefan <lizf@cn.fujitsu.com>
---
 include/linux/smp_lock.h | 28 +++++++++++++++-------------
 lib/kernel_lock.c        | 15 +++++++++++----
 2 files changed, 26 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/smp_lock.h b/include/linux/smp_lock.h
index d48cc77ba70d..2ea1dd1ba21c 100644
--- a/include/linux/smp_lock.h
+++ b/include/linux/smp_lock.h
@@ -3,7 +3,6 @@
 
 #ifdef CONFIG_LOCK_KERNEL
 #include <linux/sched.h>
-#include <trace/events/bkl.h>
 
 #define kernel_locked()		(current->lock_depth >= 0)
 
@@ -25,18 +24,21 @@ static inline int reacquire_kernel_lock(struct task_struct *task)
 	return 0;
 }
 
-extern void __lockfunc _lock_kernel(void)	__acquires(kernel_lock);
-extern void __lockfunc _unlock_kernel(void)	__releases(kernel_lock);
+extern void __lockfunc
+_lock_kernel(const char *func, const char *file, int line)
+__acquires(kernel_lock);
 
-#define lock_kernel()	{					\
-	trace_lock_kernel(__func__, __FILE__, __LINE__);	\
-	_lock_kernel();						\
-}
+extern void __lockfunc
+_unlock_kernel(const char *func, const char *file, int line)
+__releases(kernel_lock);
 
-#define unlock_kernel()	{					\
-	trace_unlock_kernel(__func__, __FILE__, __LINE__);	\
-	_unlock_kernel();					\
-}
+#define lock_kernel() do {					\
+	_lock_kernel(__func__, __FILE__, __LINE__);		\
+} while (0)
+
+#define unlock_kernel()	do {					\
+	_unlock_kernel(__func__, __FILE__, __LINE__);		\
+} while (0)
 
 /*
  * Various legacy drivers don't really need the BKL in a specific
@@ -52,8 +54,8 @@ static inline void cycle_kernel_lock(void)
 
 #else
 
-#define lock_kernel()	   trace_lock_kernel(__func__, __FILE__, __LINE__);
-#define unlock_kernel()    trace_unlock_kernel(__func__, __FILE__, __LINE__);
+#define lock_kernel()
+#define unlock_kernel()
 #define release_kernel_lock(task)		do { } while(0)
 #define cycle_kernel_lock()			do { } while(0)
 #define reacquire_kernel_lock(task)		0
diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c
index 5c10b2e1fd08..4ebfa5a164d7 100644
--- a/lib/kernel_lock.c
+++ b/lib/kernel_lock.c
@@ -8,9 +8,11 @@
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/semaphore.h>
-#define CREATE_TRACE_POINTS
 #include <linux/smp_lock.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/bkl.h>
+
 /*
  * The 'big kernel lock'
  *
@@ -114,19 +116,24 @@ static inline void __unlock_kernel(void)
  * This cannot happen asynchronously, so we only need to
  * worry about other CPU's.
  */
-void __lockfunc _lock_kernel(void)
+void __lockfunc _lock_kernel(const char *func, const char *file, int line)
 {
-	int depth = current->lock_depth+1;
+	int depth = current->lock_depth + 1;
+
+	trace_lock_kernel(func, file, line);
+
 	if (likely(!depth))
 		__lock_kernel();
 	current->lock_depth = depth;
 }
 
-void __lockfunc _unlock_kernel(void)
+void __lockfunc _unlock_kernel(const char *func, const char *file, int line)
 {
 	BUG_ON(current->lock_depth < 0);
 	if (likely(--current->lock_depth < 0))
 		__unlock_kernel();
+
+	trace_unlock_kernel(func, file, line);
 }
 
 EXPORT_SYMBOL(_lock_kernel);
-- 
cgit v1.2.3-70-g09d2


From 4a3127693001c61a21d1ce680db6340623f52e93 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Wed, 30 Sep 2009 13:05:23 +0200
Subject: x86: Turn the copy_from_user check into an (optional) compile time
 warning

A previous patch added the buffer size check to copy_from_user().

One of the things learned from analyzing the result of the previous
patch is that in general, gcc is really good at proving that the
code contains sufficient security checks to not need to do a
runtime check. But that for those cases where gcc could not prove
this, there was a relatively high percentage of real security
issues.

This patch turns the case of "gcc cannot prove" into a compile time
warning, as long as a sufficiently new gcc is in use that supports
this. The objective is that these warnings will trigger developers
checking new cases out before a security hole enters a linux kernel
release.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: James Morris <jmorris@namei.org>
Cc: Jan Beulich <jbeulich@novell.com>
LKML-Reference: <20090930130523.348ae6c4@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/uaccess_32.h | 12 +++++++++---
 arch/x86/lib/usercopy_32.c        |  6 ++++++
 include/linux/compiler-gcc4.h     |  3 +++
 include/linux/compiler.h          |  4 ++++
 4 files changed, 22 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 582d6aef7417..952f9e793c3e 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -191,6 +191,13 @@ unsigned long __must_check _copy_from_user(void *to,
 					  const void __user *from,
 					  unsigned long n);
 
+
+extern void copy_from_user_overflow(void)
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+	__compiletime_warning("copy_from_user() buffer size is not provably correct")
+#endif
+;
+
 static inline unsigned long __must_check copy_from_user(void *to,
 					  const void __user *from,
 					  unsigned long n)
@@ -200,10 +207,9 @@ static inline unsigned long __must_check copy_from_user(void *to,
 
 	if (likely(sz == -1 || sz >= n))
 		ret = _copy_from_user(to, from, n);
-#ifdef CONFIG_DEBUG_VM
 	else
-		WARN(1, "Buffer overflow detected!\n");
-#endif
+		copy_from_user_overflow();
+
 	return ret;
 }
 
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 8498684e45b0..e218d5df85ff 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -883,3 +883,9 @@ _copy_from_user(void *to, const void __user *from, unsigned long n)
 	return n;
 }
 EXPORT_SYMBOL(_copy_from_user);
+
+void copy_from_user_overflow(void)
+{
+	WARN(1, "Buffer overflow detected!\n");
+}
+EXPORT_SYMBOL(copy_from_user_overflow);
diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index a3aef5d55dba..f1709c1f9eae 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -39,3 +39,6 @@
 #endif
 
 #define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
+#if __GNUC_MINOR__ >= 4
+#define __compiletime_warning(message) __attribute__((warning(message)))
+#endif
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 8e54108688f9..950356311f12 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -270,6 +270,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 #ifndef __compiletime_object_size
 # define __compiletime_object_size(obj) -1
 #endif
+#ifndef __compiletime_warning
+# define __compiletime_warning(message)
+#endif
+
 /*
  * Prevent the compiler from merging or refetching accesses.  The compiler
  * is also forbidden from reordering successive instances of ACCESS_ONCE(),
-- 
cgit v1.2.3-70-g09d2


From 63312b6a6faae3f2e5577f2b001e3b504f10a2aa Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Fri, 2 Oct 2009 07:50:50 -0700
Subject: x86: Add a Kconfig option to turn the copy_from_user warnings into
 errors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For automated testing it is useful to have the option to turn
the warnings on copy_from_user() etc checks into errors:

 In function ‘copy_from_user’,
     inlined from ‘fd_copyin’ at drivers/block/floppy.c:3080,
     inlined from ‘fd_ioctl’ at drivers/block/floppy.c:3503:
   linux/arch/x86/include/asm/uaccess_32.h:213:
  error: call to ‘copy_from_user_overflow’ declared with attribute error:
  copy_from_user buffer size is not provably correct

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
LKML-Reference: <20091002075050.4e9f7641@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig.debug            | 14 ++++++++++++++
 arch/x86/include/asm/uaccess_32.h |  4 +++-
 include/linux/compiler-gcc4.h     |  1 +
 include/linux/compiler.h          |  3 +++
 4 files changed, 21 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index d105f29bb6bb..1bd2e36f1538 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -287,4 +287,18 @@ config OPTIMIZE_INLINING
 
 	  If unsure, say N.
 
+config DEBUG_STRICT_USER_COPY_CHECKS
+	bool "Strict copy size checks"
+	depends on DEBUG_KERNEL
+	---help---
+	  Enabling this option turns a certain set of sanity checks for user
+	  copy operations into compile time failures.
+
+	  The copy_from_user() etc checks are there to help test if there
+	  are sufficient security checks on the length argument of
+	  the copy operation, by having gcc prove that the argument is
+	  within bounds.
+
+	  If unsure, or if you run an older (pre 4.4) gcc, say N.
+
 endmenu
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 952f9e793c3e..0c9825e97f36 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -193,7 +193,9 @@ unsigned long __must_check _copy_from_user(void *to,
 
 
 extern void copy_from_user_overflow(void)
-#ifdef CONFIG_DEBUG_STACKOVERFLOW
+#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS
+	__compiletime_error("copy_from_user() buffer size is not provably correct")
+#else
 	__compiletime_warning("copy_from_user() buffer size is not provably correct")
 #endif
 ;
diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index f1709c1f9eae..77542c57e20a 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -41,4 +41,5 @@
 #define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
 #if __GNUC_MINOR__ >= 4
 #define __compiletime_warning(message) __attribute__((warning(message)))
+#define __compiletime_error(message) __attribute__((error(message)))
 #endif
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 950356311f12..88fd4b673cb4 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -273,6 +273,9 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 #ifndef __compiletime_warning
 # define __compiletime_warning(message)
 #endif
+#ifndef __compiletime_error
+# define __compiletime_error(message)
+#endif
 
 /*
  * Prevent the compiler from merging or refetching accesses.  The compiler
-- 
cgit v1.2.3-70-g09d2


From 26a50744b21fff65bd754874072857bee8967f4d Mon Sep 17 00:00:00 2001
From: Tom Zanussi <tzanussi@gmail.com>
Date: Tue, 6 Oct 2009 01:09:50 -0500
Subject: tracing/events: Add 'signed' field to format files

The sign info used for filters in the kernel is also useful to
applications that process the trace stream.  Add it to the format
files and make it available to userspace.

Signed-off-by: Tom Zanussi <tzanussi@gmail.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: rostedt@goodmis.org
Cc: lizf@cn.fujitsu.com
Cc: hch@infradead.org
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <1254809398-8078-2-git-send-email-tzanussi@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/ftrace.h              | 15 +++++++++------
 kernel/trace/ring_buffer.c          | 15 +++++++++------
 kernel/trace/trace_events.c         | 24 ++++++++++++------------
 kernel/trace/trace_export.c         | 25 ++++++++++++++-----------
 kernel/trace/trace_syscalls.c       | 20 +++++++++++++-------
 tools/perf/util/trace-event-parse.c | 24 ++++++++++++++++++++++++
 tools/perf/util/trace-event.h       |  1 +
 7 files changed, 82 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index cc0d9667e182..c9bbcab95fbe 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -120,9 +120,10 @@
 #undef __field
 #define __field(type, item)					\
 	ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"	\
-			       "offset:%u;\tsize:%u;\n",		\
+			       "offset:%u;\tsize:%u;\tsigned:%u;\n",	\
 			       (unsigned int)offsetof(typeof(field), item), \
-			       (unsigned int)sizeof(field.item));	\
+			       (unsigned int)sizeof(field.item),	\
+			       (unsigned int)is_signed_type(type));	\
 	if (!ret)							\
 		return 0;
 
@@ -132,19 +133,21 @@
 #undef __array
 #define __array(type, item, len)						\
 	ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t"	\
-			       "offset:%u;\tsize:%u;\n",		\
+			       "offset:%u;\tsize:%u;\tsigned:%u;\n",	\
 			       (unsigned int)offsetof(typeof(field), item), \
-			       (unsigned int)sizeof(field.item));	\
+			       (unsigned int)sizeof(field.item),	\
+			       (unsigned int)is_signed_type(type));	\
 	if (!ret)							\
 		return 0;
 
 #undef __dynamic_array
 #define __dynamic_array(type, item, len)				       \
 	ret = trace_seq_printf(s, "\tfield:__data_loc " #type "[] " #item ";\t"\
-			       "offset:%u;\tsize:%u;\n",		       \
+			       "offset:%u;\tsize:%u;\tsigned:%u;\n",	       \
 			       (unsigned int)offsetof(typeof(field),	       \
 					__data_loc_##item),		       \
-			       (unsigned int)sizeof(field.__data_loc_##item)); \
+			       (unsigned int)sizeof(field.__data_loc_##item), \
+			       (unsigned int)is_signed_type(type));	\
 	if (!ret)							       \
 		return 0;
 
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index d4ff01970547..e43c928356ee 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -397,18 +397,21 @@ int ring_buffer_print_page_header(struct trace_seq *s)
 	int ret;
 
 	ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t"
-			       "offset:0;\tsize:%u;\n",
-			       (unsigned int)sizeof(field.time_stamp));
+			       "offset:0;\tsize:%u;\tsigned:%u;\n",
+			       (unsigned int)sizeof(field.time_stamp),
+			       (unsigned int)is_signed_type(u64));
 
 	ret = trace_seq_printf(s, "\tfield: local_t commit;\t"
-			       "offset:%u;\tsize:%u;\n",
+			       "offset:%u;\tsize:%u;\tsigned:%u;\n",
 			       (unsigned int)offsetof(typeof(field), commit),
-			       (unsigned int)sizeof(field.commit));
+			       (unsigned int)sizeof(field.commit),
+			       (unsigned int)is_signed_type(long));
 
 	ret = trace_seq_printf(s, "\tfield: char data;\t"
-			       "offset:%u;\tsize:%u;\n",
+			       "offset:%u;\tsize:%u;\tsigned:%u;\n",
 			       (unsigned int)offsetof(typeof(field), data),
-			       (unsigned int)BUF_PAGE_SIZE);
+			       (unsigned int)BUF_PAGE_SIZE,
+			       (unsigned int)is_signed_type(char));
 
 	return ret;
 }
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index d128f65778e6..cf3cabf6ce14 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -507,7 +507,7 @@ extern char *__bad_type_size(void);
 #define FIELD(type, name)						\
 	sizeof(type) != sizeof(field.name) ? __bad_type_size() :	\
 	#type, "common_" #name, offsetof(typeof(field), name),		\
-		sizeof(field.name)
+		sizeof(field.name), is_signed_type(type)
 
 static int trace_write_header(struct trace_seq *s)
 {
@@ -515,17 +515,17 @@ static int trace_write_header(struct trace_seq *s)
 
 	/* struct trace_entry */
 	return trace_seq_printf(s,
-				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
-				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
-				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
-				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
-				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
-				"\n",
-				FIELD(unsigned short, type),
-				FIELD(unsigned char, flags),
-				FIELD(unsigned char, preempt_count),
-				FIELD(int, pid),
-				FIELD(int, lock_depth));
+			"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
+			"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
+			"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
+			"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
+			"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
+			"\n",
+			FIELD(unsigned short, type),
+			FIELD(unsigned char, flags),
+			FIELD(unsigned char, preempt_count),
+			FIELD(int, pid),
+			FIELD(int, lock_depth));
 }
 
 static ssize_t
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 9753fcc61bc5..31da218ee10f 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -66,44 +66,47 @@ static void __used ____ftrace_check_##name(void)		\
 #undef __field
 #define __field(type, item)						\
 	ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"	\
-			       "offset:%zu;\tsize:%zu;\n",		\
+			       "offset:%zu;\tsize:%zu;\tsigned:%u;\n",	\
 			       offsetof(typeof(field), item),		\
-			       sizeof(field.item));			\
+			       sizeof(field.item), is_signed_type(type)); \
 	if (!ret)							\
 		return 0;
 
 #undef __field_desc
 #define __field_desc(type, container, item)				\
 	ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"	\
-			       "offset:%zu;\tsize:%zu;\n",		\
+			       "offset:%zu;\tsize:%zu;\tsigned:%u;\n",	\
 			       offsetof(typeof(field), container.item),	\
-			       sizeof(field.container.item));		\
+			       sizeof(field.container.item),		\
+			       is_signed_type(type));			\
 	if (!ret)							\
 		return 0;
 
 #undef __array
 #define __array(type, item, len)					\
 	ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
-			       "offset:%zu;\tsize:%zu;\n",		\
-			       offsetof(typeof(field), item),	\
-			       sizeof(field.item));		\
+			       "offset:%zu;\tsize:%zu;\tsigned:%u;\n",	\
+			       offsetof(typeof(field), item),		\
+			       sizeof(field.item), is_signed_type(type)); \
 	if (!ret)							\
 		return 0;
 
 #undef __array_desc
 #define __array_desc(type, container, item, len)			\
 	ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
-			       "offset:%zu;\tsize:%zu;\n",		\
+			       "offset:%zu;\tsize:%zu;\tsigned:%u;\n",	\
 			       offsetof(typeof(field), container.item),	\
-			       sizeof(field.container.item));		\
+			       sizeof(field.container.item),		\
+			       is_signed_type(type));			\
 	if (!ret)							\
 		return 0;
 
 #undef __dynamic_array
 #define __dynamic_array(type, item)					\
 	ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"	\
-			       "offset:%zu;\tsize:0;\n",		\
-			       offsetof(typeof(field), item));		\
+			       "offset:%zu;\tsize:0;\tsigned:%u;\n",	\
+			       offsetof(typeof(field), item),		\
+			       is_signed_type(type));			\
 	if (!ret)							\
 		return 0;
 
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 527e17eae575..d99abc427c39 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -103,7 +103,8 @@ extern char *__bad_type_size(void);
 #define SYSCALL_FIELD(type, name)					\
 	sizeof(type) != sizeof(trace.name) ?				\
 		__bad_type_size() :					\
-		#type, #name, offsetof(typeof(trace), name), sizeof(trace.name)
+		#type, #name, offsetof(typeof(trace), name),		\
+		sizeof(trace.name), is_signed_type(type)
 
 int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
 {
@@ -120,7 +121,8 @@ int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
 	if (!entry)
 		return 0;
 
-	ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n",
+	ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
+			       "\tsigned:%u;\n",
 			       SYSCALL_FIELD(int, nr));
 	if (!ret)
 		return 0;
@@ -130,8 +132,10 @@ int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
 				        entry->args[i]);
 		if (!ret)
 			return 0;
-		ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;\n", offset,
-				       sizeof(unsigned long));
+		ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;"
+				       "\tsigned:%u;\n", offset,
+				       sizeof(unsigned long),
+				       is_signed_type(unsigned long));
 		if (!ret)
 			return 0;
 		offset += sizeof(unsigned long);
@@ -163,8 +167,10 @@ int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
 	struct syscall_trace_exit trace;
 
 	ret = trace_seq_printf(s,
-			       "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
-			       "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n",
+			       "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
+			       "\tsigned:%u;\n"
+			       "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
+			       "\tsigned:%u;\n",
 			       SYSCALL_FIELD(int, nr),
 			       SYSCALL_FIELD(long, ret));
 	if (!ret)
@@ -212,7 +218,7 @@ int syscall_exit_define_fields(struct ftrace_event_call *call)
 	if (ret)
 		return ret;
 
-	ret = trace_define_field(call, SYSCALL_FIELD(long, ret), 0,
+	ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
 				 FILTER_OTHER);
 
 	return ret;
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 55b41b9e3834..be8412d699a1 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -894,6 +894,21 @@ static int event_read_fields(struct event *event, struct format_field **fields)
 		field->size = strtoul(token, NULL, 0);
 		free_token(token);
 
+		if (read_expected(EVENT_OP, (char *)";") < 0)
+			goto fail_expect;
+
+		if (read_expected(EVENT_ITEM, (char *)"signed") < 0)
+			goto fail_expect;
+
+		if (read_expected(EVENT_OP, (char *)":") < 0)
+			goto fail_expect;
+
+		if (read_expect_type(EVENT_ITEM, &token))
+			goto fail;
+		if (strtoul(token, NULL, 0))
+			field->flags |= FIELD_IS_SIGNED;
+		free_token(token);
+
 		if (read_expected(EVENT_OP, (char *)";") < 0)
 			goto fail_expect;
 
@@ -2843,6 +2858,15 @@ static void parse_header_field(char *type,
 		return;
 	*size = atoi(token);
 	free_token(token);
+	if (read_expected(EVENT_OP, (char *)";") < 0)
+		return;
+	if (read_expected(EVENT_ITEM, (char *)"signed") < 0)
+		return;
+	if (read_expected(EVENT_OP, (char *)":") < 0)
+		return;
+	if (read_expect_type(EVENT_ITEM, &token) < 0)
+		return;
+	free_token(token);
 	if (read_expected(EVENT_OP, (char *)";") < 0)
 		return;
 	if (read_expect_type(EVENT_NEWLINE, &token) < 0)
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index 162c3e6deb93..00b440df66d8 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -26,6 +26,7 @@ enum {
 enum format_flags {
 	FIELD_IS_ARRAY		= 1,
 	FIELD_IS_POINTER	= 2,
+	FIELD_IS_SIGNED		= 4,
 };
 
 struct format_field {
-- 
cgit v1.2.3-70-g09d2


From 89eda06837094ce9f34fae269b8773fcfd70f046 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Sun, 4 Oct 2009 21:49:47 +0900
Subject: LSM: Add security_path_chmod() and security_path_chown().

This patch allows pathname based LSM modules to check chmod()/chown()
operations. Since notify_change() does not receive "struct vfsmount *",
we add security_path_chmod() and security_path_chown() to the caller of
notify_change().

These hooks are used by TOMOYO.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: James Morris <jmorris@namei.org>
---
 fs/open.c                | 24 ++++++++++++++++++++----
 include/linux/security.h | 30 ++++++++++++++++++++++++++++++
 security/capability.c    | 13 +++++++++++++
 security/security.c      | 15 +++++++++++++++
 4 files changed, 78 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/fs/open.c b/fs/open.c
index 4f01e06227c6..b5c294d35bd1 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -616,6 +616,9 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode)
 	err = mnt_want_write_file(file);
 	if (err)
 		goto out_putf;
+	err = security_path_chmod(dentry, file->f_vfsmnt, mode);
+	if (err)
+		goto out_drop_write;
 	mutex_lock(&inode->i_mutex);
 	if (mode == (mode_t) -1)
 		mode = inode->i_mode;
@@ -623,6 +626,7 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode)
 	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
 	err = notify_change(dentry, &newattrs);
 	mutex_unlock(&inode->i_mutex);
+out_drop_write:
 	mnt_drop_write(file->f_path.mnt);
 out_putf:
 	fput(file);
@@ -645,6 +649,9 @@ SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode)
 	error = mnt_want_write(path.mnt);
 	if (error)
 		goto dput_and_out;
+	error = security_path_chmod(path.dentry, path.mnt, mode);
+	if (error)
+		goto out_drop_write;
 	mutex_lock(&inode->i_mutex);
 	if (mode == (mode_t) -1)
 		mode = inode->i_mode;
@@ -652,6 +659,7 @@ SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode)
 	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
 	error = notify_change(path.dentry, &newattrs);
 	mutex_unlock(&inode->i_mutex);
+out_drop_write:
 	mnt_drop_write(path.mnt);
 dput_and_out:
 	path_put(&path);
@@ -700,7 +708,9 @@ SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group)
 	error = mnt_want_write(path.mnt);
 	if (error)
 		goto out_release;
-	error = chown_common(path.dentry, user, group);
+	error = security_path_chown(&path, user, group);
+	if (!error)
+		error = chown_common(path.dentry, user, group);
 	mnt_drop_write(path.mnt);
 out_release:
 	path_put(&path);
@@ -725,7 +735,9 @@ SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
 	error = mnt_want_write(path.mnt);
 	if (error)
 		goto out_release;
-	error = chown_common(path.dentry, user, group);
+	error = security_path_chown(&path, user, group);
+	if (!error)
+		error = chown_common(path.dentry, user, group);
 	mnt_drop_write(path.mnt);
 out_release:
 	path_put(&path);
@@ -744,7 +756,9 @@ SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group
 	error = mnt_want_write(path.mnt);
 	if (error)
 		goto out_release;
-	error = chown_common(path.dentry, user, group);
+	error = security_path_chown(&path, user, group);
+	if (!error)
+		error = chown_common(path.dentry, user, group);
 	mnt_drop_write(path.mnt);
 out_release:
 	path_put(&path);
@@ -767,7 +781,9 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
 		goto out_fput;
 	dentry = file->f_path.dentry;
 	audit_inode(NULL, dentry);
-	error = chown_common(dentry, user, group);
+	error = security_path_chown(&file->f_path, user, group);
+	if (!error)
+		error = chown_common(dentry, user, group);
 	mnt_drop_write(file->f_path.mnt);
 out_fput:
 	fput(file);
diff --git a/include/linux/security.h b/include/linux/security.h
index 239e40d0450b..c8a584c26f7b 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -447,6 +447,18 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@new_dir contains the path structure for parent of the new link.
  *	@new_dentry contains the dentry structure of the new link.
  *	Return 0 if permission is granted.
+ * @path_chmod:
+ *	Check for permission to change DAC's permission of a file or directory.
+ *	@dentry contains the dentry structure.
+ *	@mnt contains the vfsmnt structure.
+ *	@mode contains DAC's mode.
+ *	Return 0 if permission is granted.
+ * @path_chown:
+ *	Check for permission to change owner/group of a file or directory.
+ *	@path contains the path structure.
+ *	@uid contains new owner's ID.
+ *	@gid contains new group's ID.
+ *	Return 0 if permission is granted.
  * @inode_readlink:
  *	Check the permission to read the symbolic link.
  *	@dentry contains the dentry structure for the file link.
@@ -1488,6 +1500,9 @@ struct security_operations {
 			  struct dentry *new_dentry);
 	int (*path_rename) (struct path *old_dir, struct dentry *old_dentry,
 			    struct path *new_dir, struct dentry *new_dentry);
+	int (*path_chmod) (struct dentry *dentry, struct vfsmount *mnt,
+			   mode_t mode);
+	int (*path_chown) (struct path *path, uid_t uid, gid_t gid);
 #endif
 
 	int (*inode_alloc_security) (struct inode *inode);
@@ -2952,6 +2967,9 @@ int security_path_link(struct dentry *old_dentry, struct path *new_dir,
 		       struct dentry *new_dentry);
 int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
 			 struct path *new_dir, struct dentry *new_dentry);
+int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt,
+			mode_t mode);
+int security_path_chown(struct path *path, uid_t uid, gid_t gid);
 #else	/* CONFIG_SECURITY_PATH */
 static inline int security_path_unlink(struct path *dir, struct dentry *dentry)
 {
@@ -3001,6 +3019,18 @@ static inline int security_path_rename(struct path *old_dir,
 {
 	return 0;
 }
+
+static inline int security_path_chmod(struct dentry *dentry,
+				      struct vfsmount *mnt,
+				      mode_t mode)
+{
+	return 0;
+}
+
+static inline int security_path_chown(struct path *path, uid_t uid, gid_t gid)
+{
+	return 0;
+}
 #endif	/* CONFIG_SECURITY_PATH */
 
 #ifdef CONFIG_KEYS
diff --git a/security/capability.c b/security/capability.c
index fce07a7bc825..09279a8d4a14 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -308,6 +308,17 @@ static int cap_path_truncate(struct path *path, loff_t length,
 {
 	return 0;
 }
+
+static int cap_path_chmod(struct dentry *dentry, struct vfsmount *mnt,
+			  mode_t mode)
+{
+	return 0;
+}
+
+static int cap_path_chown(struct path *path, uid_t uid, gid_t gid)
+{
+	return 0;
+}
 #endif
 
 static int cap_file_permission(struct file *file, int mask)
@@ -977,6 +988,8 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, path_link);
 	set_to_cap_if_null(ops, path_rename);
 	set_to_cap_if_null(ops, path_truncate);
+	set_to_cap_if_null(ops, path_chmod);
+	set_to_cap_if_null(ops, path_chown);
 #endif
 	set_to_cap_if_null(ops, file_permission);
 	set_to_cap_if_null(ops, file_alloc_security);
diff --git a/security/security.c b/security/security.c
index c4c673240c1c..5259270e558f 100644
--- a/security/security.c
+++ b/security/security.c
@@ -434,6 +434,21 @@ int security_path_truncate(struct path *path, loff_t length,
 		return 0;
 	return security_ops->path_truncate(path, length, time_attrs);
 }
+
+int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt,
+			mode_t mode)
+{
+	if (unlikely(IS_PRIVATE(dentry->d_inode)))
+		return 0;
+	return security_ops->path_chmod(dentry, mnt, mode);
+}
+
+int security_path_chown(struct path *path, uid_t uid, gid_t gid)
+{
+	if (unlikely(IS_PRIVATE(path->dentry->d_inode)))
+		return 0;
+	return security_ops->path_chown(path, uid, gid);
+}
 #endif
 
 int security_inode_create(struct inode *dir, struct dentry *dentry, int mode)
-- 
cgit v1.2.3-70-g09d2


From 8b8efb44033c7e86b3dc76f825c693ec92ae30e9 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Sun, 4 Oct 2009 21:49:48 +0900
Subject: LSM: Add security_path_chroot().

This patch allows pathname based LSM modules to check chroot() operations.

This hook is used by TOMOYO.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: James Morris <jmorris@namei.org>
---
 fs/open.c                |  3 +++
 include/linux/security.h | 11 +++++++++++
 security/capability.c    |  6 ++++++
 security/security.c      |  5 +++++
 4 files changed, 25 insertions(+)

(limited to 'include')

diff --git a/fs/open.c b/fs/open.c
index b5c294d35bd1..201041dfca57 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -587,6 +587,9 @@ SYSCALL_DEFINE1(chroot, const char __user *, filename)
 	error = -EPERM;
 	if (!capable(CAP_SYS_CHROOT))
 		goto dput_and_out;
+	error = security_path_chroot(&path);
+	if (error)
+		goto dput_and_out;
 
 	set_fs_root(current->fs, &path);
 	error = 0;
diff --git a/include/linux/security.h b/include/linux/security.h
index c8a584c26f7b..ed0faea60b82 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -459,6 +459,10 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@uid contains new owner's ID.
  *	@gid contains new group's ID.
  *	Return 0 if permission is granted.
+ * @path_chroot:
+ *	Check for permission to change root directory.
+ *	@path contains the path structure.
+ *	Return 0 if permission is granted.
  * @inode_readlink:
  *	Check the permission to read the symbolic link.
  *	@dentry contains the dentry structure for the file link.
@@ -1503,6 +1507,7 @@ struct security_operations {
 	int (*path_chmod) (struct dentry *dentry, struct vfsmount *mnt,
 			   mode_t mode);
 	int (*path_chown) (struct path *path, uid_t uid, gid_t gid);
+	int (*path_chroot) (struct path *path);
 #endif
 
 	int (*inode_alloc_security) (struct inode *inode);
@@ -2970,6 +2975,7 @@ int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
 int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt,
 			mode_t mode);
 int security_path_chown(struct path *path, uid_t uid, gid_t gid);
+int security_path_chroot(struct path *path);
 #else	/* CONFIG_SECURITY_PATH */
 static inline int security_path_unlink(struct path *dir, struct dentry *dentry)
 {
@@ -3031,6 +3037,11 @@ static inline int security_path_chown(struct path *path, uid_t uid, gid_t gid)
 {
 	return 0;
 }
+
+static inline int security_path_chroot(struct path *path)
+{
+	return 0;
+}
 #endif	/* CONFIG_SECURITY_PATH */
 
 #ifdef CONFIG_KEYS
diff --git a/security/capability.c b/security/capability.c
index 09279a8d4a14..4f3ab476937f 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -319,6 +319,11 @@ static int cap_path_chown(struct path *path, uid_t uid, gid_t gid)
 {
 	return 0;
 }
+
+static int cap_path_chroot(struct path *root)
+{
+	return 0;
+}
 #endif
 
 static int cap_file_permission(struct file *file, int mask)
@@ -990,6 +995,7 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, path_truncate);
 	set_to_cap_if_null(ops, path_chmod);
 	set_to_cap_if_null(ops, path_chown);
+	set_to_cap_if_null(ops, path_chroot);
 #endif
 	set_to_cap_if_null(ops, file_permission);
 	set_to_cap_if_null(ops, file_alloc_security);
diff --git a/security/security.c b/security/security.c
index 5259270e558f..279757314a05 100644
--- a/security/security.c
+++ b/security/security.c
@@ -449,6 +449,11 @@ int security_path_chown(struct path *path, uid_t uid, gid_t gid)
 		return 0;
 	return security_ops->path_chown(path, uid, gid);
 }
+
+int security_path_chroot(struct path *path)
+{
+	return security_ops->path_chroot(path);
+}
 #endif
 
 int security_inode_create(struct inode *dir, struct dentry *dentry, int mode)
-- 
cgit v1.2.3-70-g09d2


From 8968f9d3dc23d9a1821d97c6f11e72a59382e56c Mon Sep 17 00:00:00 2001
From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Date: Tue, 13 Oct 2009 16:19:41 +0900
Subject: perf_event, x86, mce: Use TRACE_EVENT() for MCE logging

This approach is the first baby step towards solving many of the
structural problems the x86 MCE logging code is having today:

 - It has a private ring-buffer implementation that has a number
   of limitations and has been historically fragile and buggy.

 - It is using a quirky /dev/mcelog ioctl driven ABI that is MCE
   specific. /dev/mcelog is not part of any larger logging
   framework and hence has remained on the fringes for many years.

 - The MCE logging code is still very unclean partly due to its ABI
   limitations. Fields are being reused for multiple purposes, and
   the whole message structure is limited and x86 specific to begin
   with.

All in one, the x86 tree would like to move away from this private
implementation of an event logging facility to a broader framework.

By using perf events we gain the following advantages:

 - Multiple user-space agents can access MCE events. We can have an
   mcelog daemon running but also a system-wide tracer capturing
   important events in flight-recorder mode.

 - Sampling support: the kernel and the user-space call-chain of MCE
   events can be stored and analyzed as well. This way actual patterns
   of bad behavior can be matched to precisely what kind of activity
   happened in the kernel (and/or in the app) around that moment in
   time.

 - Coupling with other hardware and software events: the PMU can track a
   number of other anomalies - monitoring software might chose to
   monitor those plus the MCE events as well - in one coherent stream of
   events.

 - Discovery of MCE sources - tracepoints are enumerated and tools can
   act upon the existence (or non-existence) of various channels of MCE
   information.

 - Filtering support: we just subscribe to and act upon the events we
   are interested in. Then even on a per event source basis there's
   in-kernel filter expressions available that can restrict the amount
   of data that hits the event channel.

 - Arbitrary deep per cpu buffering of events - we can buffer 32
   entries or we can buffer as much as we want, as long as we have
   the RAM.

 - An NMI-safe ring-buffer implementation - mappable to user-space.

 - Built-in support for timestamping of events, PID markers, CPU
   markers, etc.

 - A rich ABI accessible over system call interface. Per cpu, per task
   and per workload monitoring of MCE events can be done this way. The
   ABI itself has a nice, meaningful structure.

 - Extensible ABI: new fields can be added without breaking tooling.
   New tracepoints can be added as the hardware side evolves. There's
   various parsers that can be used.

 - Lots of scheduling/buffering/batching modes of operandi for MCE
   events. poll() support. mmap() support. read() support. You name it.

 - Rich tooling support: even without any MCE specific extensions added
   the 'perf' tool today offers various views of MCE data: perf report,
   perf stat, perf trace can all be used to view logged MCE events and
   perhaps correlate them to certain user-space usage patterns. But it
   can be used directly as well, for user-space agents and policy action
   in mcelog, etc.

With this we hope to achieve significant code cleanup and feature
improvements in the MCE code, and we hope to be able to drop the
/dev/mcelog facility in the end.

This patch is just a plain dumb dump of mce_log() records to
the tracepoints / perf events framework - a first proof of
concept step.

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
LKML-Reference: <4AD42A0D.7050104@jp.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/mcheck/mce.c |  6 ++++
 include/trace/events/mce.h       | 69 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 75 insertions(+)
 create mode 100644 include/trace/events/mce.h

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index b1598a9436d0..39caea3d8bc3 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -46,6 +46,9 @@
 
 #include "mce-internal.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/mce.h>
+
 int mce_disabled __read_mostly;
 
 #define MISC_MCELOG_MINOR	227
@@ -141,6 +144,9 @@ void mce_log(struct mce *mce)
 {
 	unsigned next, entry;
 
+	/* Emit the trace record: */
+	trace_mce_record(mce);
+
 	mce->finished = 0;
 	wmb();
 	for (;;) {
diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h
new file mode 100644
index 000000000000..7eee77895cb3
--- /dev/null
+++ b/include/trace/events/mce.h
@@ -0,0 +1,69 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mce
+
+#if !defined(_TRACE_MCE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MCE_H
+
+#include <linux/ktime.h>
+#include <linux/tracepoint.h>
+#include <asm/mce.h>
+
+TRACE_EVENT(mce_record,
+
+	TP_PROTO(struct mce *m),
+
+	TP_ARGS(m),
+
+	TP_STRUCT__entry(
+		__field(	u64,		mcgcap		)
+		__field(	u64,		mcgstatus	)
+		__field(	u8,		bank		)
+		__field(	u64,		status		)
+		__field(	u64,		addr		)
+		__field(	u64,		misc		)
+		__field(	u64,		ip		)
+		__field(	u8,		cs		)
+		__field(	u64,		tsc		)
+		__field(	u64,		walltime	)
+		__field(	u32,		cpu		)
+		__field(	u32,		cpuid		)
+		__field(	u32,		apicid		)
+		__field(	u32,		socketid	)
+		__field(	u8,		cpuvendor	)
+	),
+
+	TP_fast_assign(
+		__entry->mcgcap		= m->mcgcap;
+		__entry->mcgstatus	= m->mcgstatus;
+		__entry->bank		= m->bank;
+		__entry->status		= m->status;
+		__entry->addr		= m->addr;
+		__entry->misc		= m->misc;
+		__entry->ip		= m->ip;
+		__entry->cs		= m->cs;
+		__entry->tsc		= m->tsc;
+		__entry->walltime	= m->time;
+		__entry->cpu		= m->extcpu;
+		__entry->cpuid		= m->cpuid;
+		__entry->apicid		= m->apicid;
+		__entry->socketid	= m->socketid;
+		__entry->cpuvendor	= m->cpuvendor;
+	),
+
+	TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, ADDR/MISC: %016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x",
+		__entry->cpu,
+		__entry->mcgcap, __entry->mcgstatus,
+		__entry->bank, __entry->status,
+		__entry->addr, __entry->misc,
+		__entry->cs, __entry->ip,
+		__entry->tsc,
+		__entry->cpuvendor, __entry->cpuid,
+		__entry->walltime,
+		__entry->socketid,
+		__entry->apicid)
+);
+
+#endif /* _TRACE_MCE_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
-- 
cgit v1.2.3-70-g09d2


From c44fc770845163f8d9e573f37f92a7b7a7ade14e Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 19 Sep 2009 06:50:42 +0200
Subject: tracing: Move syscalls metadata handling from arch to core

Most of the syscalls metadata processing is done from arch.
But these operations are mostly generic accross archs. Especially now
that we have a common variable name that expresses the number of
syscalls supported by an arch: NR_syscalls, the only remaining bits
that need to reside in arch is the syscall nr to addr translation.

v2: Compare syscalls symbols only after the "sys" prefix so that we
    avoid spurious mismatches with archs that have syscalls wrappers,
    in which case syscalls symbols have "SyS" prefixed aliases.
    (Reported by: Heiko Carstens)

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
---
 arch/s390/kernel/ftrace.c     | 67 +--------------------------------
 arch/x86/kernel/ftrace.c      | 76 +-------------------------------------
 include/trace/syscall.h       |  2 +-
 kernel/trace/trace_syscalls.c | 86 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 91 insertions(+), 140 deletions(-)

(limited to 'include')

diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 57bdcb1e3cdf..7c5752c3423d 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -206,73 +206,10 @@ out:
 
 #ifdef CONFIG_FTRACE_SYSCALLS
 
-extern unsigned long __start_syscalls_metadata[];
-extern unsigned long __stop_syscalls_metadata[];
 extern unsigned int sys_call_table[];
 
-static struct syscall_metadata **syscalls_metadata;
-
-struct syscall_metadata *syscall_nr_to_meta(int nr)
-{
-	if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
-		return NULL;
-
-	return syscalls_metadata[nr];
-}
-
-int syscall_name_to_nr(char *name)
-{
-	int i;
-
-	if (!syscalls_metadata)
-		return -1;
-	for (i = 0; i < NR_syscalls; i++)
-		if (syscalls_metadata[i])
-			if (!strcmp(syscalls_metadata[i]->name, name))
-				return i;
-	return -1;
-}
-
-void set_syscall_enter_id(int num, int id)
-{
-	syscalls_metadata[num]->enter_id = id;
-}
-
-void set_syscall_exit_id(int num, int id)
+unsigned long __init arch_syscall_addr(int nr)
 {
-	syscalls_metadata[num]->exit_id = id;
-}
-
-static struct syscall_metadata *find_syscall_meta(unsigned long syscall)
-{
-	struct syscall_metadata *start;
-	struct syscall_metadata *stop;
-	char str[KSYM_SYMBOL_LEN];
-
-	start = (struct syscall_metadata *)__start_syscalls_metadata;
-	stop = (struct syscall_metadata *)__stop_syscalls_metadata;
-	kallsyms_lookup(syscall, NULL, NULL, NULL, str);
-
-	for ( ; start < stop; start++) {
-		if (start->name && !strcmp(start->name + 3, str + 3))
-			return start;
-	}
-	return NULL;
-}
-
-static int __init arch_init_ftrace_syscalls(void)
-{
-	struct syscall_metadata *meta;
-	int i;
-	syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * NR_syscalls,
-				    GFP_KERNEL);
-	if (!syscalls_metadata)
-		return -ENOMEM;
-	for (i = 0; i < NR_syscalls; i++) {
-		meta = find_syscall_meta((unsigned long)sys_call_table[i]);
-		syscalls_metadata[i] = meta;
-	}
-	return 0;
+	return (unsigned long)sys_call_table[nr];
 }
-arch_initcall(arch_init_ftrace_syscalls);
 #endif
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 25e6f5fc4b1e..5a1b9758fd62 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -470,82 +470,10 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
 
 #ifdef CONFIG_FTRACE_SYSCALLS
 
-extern unsigned long __start_syscalls_metadata[];
-extern unsigned long __stop_syscalls_metadata[];
 extern unsigned long *sys_call_table;
 
-static struct syscall_metadata **syscalls_metadata;
-
-static struct syscall_metadata *find_syscall_meta(unsigned long *syscall)
-{
-	struct syscall_metadata *start;
-	struct syscall_metadata *stop;
-	char str[KSYM_SYMBOL_LEN];
-
-
-	start = (struct syscall_metadata *)__start_syscalls_metadata;
-	stop = (struct syscall_metadata *)__stop_syscalls_metadata;
-	kallsyms_lookup((unsigned long) syscall, NULL, NULL, NULL, str);
-
-	for ( ; start < stop; start++) {
-		if (start->name && !strcmp(start->name, str))
-			return start;
-	}
-	return NULL;
-}
-
-struct syscall_metadata *syscall_nr_to_meta(int nr)
-{
-	if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
-		return NULL;
-
-	return syscalls_metadata[nr];
-}
-
-int syscall_name_to_nr(char *name)
-{
-	int i;
-
-	if (!syscalls_metadata)
-		return -1;
-
-	for (i = 0; i < NR_syscalls; i++) {
-		if (syscalls_metadata[i]) {
-			if (!strcmp(syscalls_metadata[i]->name, name))
-				return i;
-		}
-	}
-	return -1;
-}
-
-void set_syscall_enter_id(int num, int id)
-{
-	syscalls_metadata[num]->enter_id = id;
-}
-
-void set_syscall_exit_id(int num, int id)
+unsigned long __init arch_syscall_addr(int nr)
 {
-	syscalls_metadata[num]->exit_id = id;
-}
-
-static int __init arch_init_ftrace_syscalls(void)
-{
-	int i;
-	struct syscall_metadata *meta;
-	unsigned long **psys_syscall_table = &sys_call_table;
-
-	syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
-					NR_syscalls, GFP_KERNEL);
-	if (!syscalls_metadata) {
-		WARN_ON(1);
-		return -ENOMEM;
-	}
-
-	for (i = 0; i < NR_syscalls; i++) {
-		meta = find_syscall_meta(psys_syscall_table[i]);
-		syscalls_metadata[i] = meta;
-	}
-	return 0;
+	return (unsigned long)(&sys_call_table)[nr];
 }
-arch_initcall(arch_init_ftrace_syscalls);
 #endif
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 5dc283ba5ae0..e972f0a40f8d 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -33,7 +33,7 @@ struct syscall_metadata {
 };
 
 #ifdef CONFIG_FTRACE_SYSCALLS
-extern struct syscall_metadata *syscall_nr_to_meta(int nr);
+extern unsigned long arch_syscall_addr(int nr);
 extern int syscall_name_to_nr(char *name);
 void set_syscall_enter_id(int num, int id);
 void set_syscall_exit_id(int num, int id);
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 9fbce6c9d2e1..8bda4bff2286 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -14,6 +14,69 @@ static int sys_refcount_exit;
 static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
 static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
 
+extern unsigned long __start_syscalls_metadata[];
+extern unsigned long __stop_syscalls_metadata[];
+
+static struct syscall_metadata **syscalls_metadata;
+
+static struct syscall_metadata *find_syscall_meta(unsigned long syscall)
+{
+	struct syscall_metadata *start;
+	struct syscall_metadata *stop;
+	char str[KSYM_SYMBOL_LEN];
+
+
+	start = (struct syscall_metadata *)__start_syscalls_metadata;
+	stop = (struct syscall_metadata *)__stop_syscalls_metadata;
+	kallsyms_lookup(syscall, NULL, NULL, NULL, str);
+
+	for ( ; start < stop; start++) {
+		/*
+		 * Only compare after the "sys" prefix. Archs that use
+		 * syscall wrappers may have syscalls symbols aliases prefixed
+		 * with "SyS" instead of "sys", leading to an unwanted
+		 * mismatch.
+		 */
+		if (start->name && !strcmp(start->name + 3, str + 3))
+			return start;
+	}
+	return NULL;
+}
+
+static struct syscall_metadata *syscall_nr_to_meta(int nr)
+{
+	if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
+		return NULL;
+
+	return syscalls_metadata[nr];
+}
+
+int syscall_name_to_nr(char *name)
+{
+	int i;
+
+	if (!syscalls_metadata)
+		return -1;
+
+	for (i = 0; i < NR_syscalls; i++) {
+		if (syscalls_metadata[i]) {
+			if (!strcmp(syscalls_metadata[i]->name, name))
+				return i;
+		}
+	}
+	return -1;
+}
+
+void set_syscall_enter_id(int num, int id)
+{
+	syscalls_metadata[num]->enter_id = id;
+}
+
+void set_syscall_exit_id(int num, int id)
+{
+	syscalls_metadata[num]->exit_id = id;
+}
+
 enum print_line_t
 print_syscall_enter(struct trace_iterator *iter, int flags)
 {
@@ -375,6 +438,29 @@ struct trace_event event_syscall_exit = {
 	.trace			= print_syscall_exit,
 };
 
+int __init init_ftrace_syscalls(void)
+{
+	struct syscall_metadata *meta;
+	unsigned long addr;
+	int i;
+
+	syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
+					NR_syscalls, GFP_KERNEL);
+	if (!syscalls_metadata) {
+		WARN_ON(1);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < NR_syscalls; i++) {
+		addr = arch_syscall_addr(i);
+		meta = find_syscall_meta(addr);
+		syscalls_metadata[i] = meta;
+	}
+
+	return 0;
+}
+core_initcall(init_ftrace_syscalls);
+
 #ifdef CONFIG_EVENT_PROFILE
 
 static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls);
-- 
cgit v1.2.3-70-g09d2


From 6fb2915df7f0747d9044da9dbff5b46dc2e20830 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Thu, 15 Oct 2009 11:21:42 +0800
Subject: tracing/profile: Add filter support

- Add an ioctl to allocate a filter for a perf event.

- Free the filter when the associated perf event is to be freed.

- Do the filtering in perf_swevent_match().

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tom Zanussi <tzanussi@gmail.com>
LKML-Reference: <4AD69546.8050401@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace_event.h       |  11 ++-
 include/linux/perf_counter.h       |   1 +
 include/linux/perf_event.h         |   6 ++
 kernel/perf_event.c                |  80 ++++++++++++++++++++--
 kernel/trace/trace.h               |   3 +-
 kernel/trace/trace_events_filter.c | 133 +++++++++++++++++++++++++++++--------
 6 files changed, 199 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 4ec5e67e18cf..d11770472bc8 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -144,7 +144,7 @@ extern char			*trace_profile_buf_nmi;
 #define MAX_FILTER_STR_VAL	256	/* Should handle KSYM_SYMBOL_LEN */
 
 extern void destroy_preds(struct ftrace_event_call *call);
-extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
+extern int filter_match_preds(struct event_filter *filter, void *rec);
 extern int filter_current_check_discard(struct ring_buffer *buffer,
 					struct ftrace_event_call *call,
 					void *rec,
@@ -186,4 +186,13 @@ do {									\
 		__trace_printk(ip, fmt, ##args);			\
 } while (0)
 
+#ifdef CONFIG_EVENT_PROFILE
+struct perf_event;
+extern int ftrace_profile_enable(int event_id);
+extern void ftrace_profile_disable(int event_id);
+extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
+				     char *filter_str);
+extern void ftrace_profile_free_filter(struct perf_event *event);
+#endif
+
 #endif /* _LINUX_FTRACE_EVENT_H */
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 7b7fbf433cff..91a2b4309e7a 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -225,6 +225,7 @@ struct perf_counter_attr {
 #define PERF_COUNTER_IOC_RESET		_IO ('$', 3)
 #define PERF_COUNTER_IOC_PERIOD		_IOW('$', 4, u64)
 #define PERF_COUNTER_IOC_SET_OUTPUT	_IO ('$', 5)
+#define PERF_COUNTER_IOC_SET_FILTER	_IOW('$', 6, char *)
 
 enum perf_counter_ioc_flags {
 	PERF_IOC_FLAG_GROUP		= 1U << 0,
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2e6d95f97419..df9d964c15fc 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -221,6 +221,7 @@ struct perf_event_attr {
 #define PERF_EVENT_IOC_RESET		_IO ('$', 3)
 #define PERF_EVENT_IOC_PERIOD		_IOW('$', 4, u64)
 #define PERF_EVENT_IOC_SET_OUTPUT	_IO ('$', 5)
+#define PERF_EVENT_IOC_SET_FILTER	_IOW('$', 6, char *)
 
 enum perf_event_ioc_flags {
 	PERF_IOC_FLAG_GROUP		= 1U << 0,
@@ -633,7 +634,12 @@ struct perf_event {
 
 	struct pid_namespace		*ns;
 	u64				id;
+
+#ifdef CONFIG_EVENT_PROFILE
+	struct event_filter		*filter;
 #endif
+
+#endif /* CONFIG_PERF_EVENTS */
 };
 
 /**
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 9d0b5c665883..12b5ec39bf97 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -28,6 +28,7 @@
 #include <linux/anon_inodes.h>
 #include <linux/kernel_stat.h>
 #include <linux/perf_event.h>
+#include <linux/ftrace_event.h>
 
 #include <asm/irq_regs.h>
 
@@ -1658,6 +1659,8 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
 	return ERR_PTR(err);
 }
 
+static void perf_event_free_filter(struct perf_event *event);
+
 static void free_event_rcu(struct rcu_head *head)
 {
 	struct perf_event *event;
@@ -1665,6 +1668,7 @@ static void free_event_rcu(struct rcu_head *head)
 	event = container_of(head, struct perf_event, rcu_head);
 	if (event->ns)
 		put_pid_ns(event->ns);
+	perf_event_free_filter(event);
 	kfree(event);
 }
 
@@ -1974,7 +1978,8 @@ unlock:
 	return ret;
 }
 
-int perf_event_set_output(struct perf_event *event, int output_fd);
+static int perf_event_set_output(struct perf_event *event, int output_fd);
+static int perf_event_set_filter(struct perf_event *event, void __user *arg);
 
 static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
@@ -2002,6 +2007,9 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case PERF_EVENT_IOC_SET_OUTPUT:
 		return perf_event_set_output(event, arg);
 
+	case PERF_EVENT_IOC_SET_FILTER:
+		return perf_event_set_filter(event, (void __user *)arg);
+
 	default:
 		return -ENOTTY;
 	}
@@ -3806,9 +3814,14 @@ static int perf_swevent_is_counting(struct perf_event *event)
 	return 1;
 }
 
+static int perf_tp_event_match(struct perf_event *event,
+				struct perf_sample_data *data);
+
 static int perf_swevent_match(struct perf_event *event,
 				enum perf_type_id type,
-				u32 event_id, struct pt_regs *regs)
+				u32 event_id,
+				struct perf_sample_data *data,
+				struct pt_regs *regs)
 {
 	if (!perf_swevent_is_counting(event))
 		return 0;
@@ -3826,6 +3839,10 @@ static int perf_swevent_match(struct perf_event *event,
 			return 0;
 	}
 
+	if (event->attr.type == PERF_TYPE_TRACEPOINT &&
+	    !perf_tp_event_match(event, data))
+		return 0;
+
 	return 1;
 }
 
@@ -3842,7 +3859,7 @@ static void perf_swevent_ctx_event(struct perf_event_context *ctx,
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
-		if (perf_swevent_match(event, type, event_id, regs))
+		if (perf_swevent_match(event, type, event_id, data, regs))
 			perf_swevent_add(event, nr, nmi, data, regs);
 	}
 	rcu_read_unlock();
@@ -4086,6 +4103,7 @@ static const struct pmu perf_ops_task_clock = {
 };
 
 #ifdef CONFIG_EVENT_PROFILE
+
 void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
 			  int entry_size)
 {
@@ -4109,8 +4127,15 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
 }
 EXPORT_SYMBOL_GPL(perf_tp_event);
 
-extern int ftrace_profile_enable(int);
-extern void ftrace_profile_disable(int);
+static int perf_tp_event_match(struct perf_event *event,
+				struct perf_sample_data *data)
+{
+	void *record = data->raw->data;
+
+	if (likely(!event->filter) || filter_match_preds(event->filter, record))
+		return 1;
+	return 0;
+}
 
 static void tp_perf_event_destroy(struct perf_event *event)
 {
@@ -4135,12 +4160,53 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event)
 
 	return &perf_ops_generic;
 }
+
+static int perf_event_set_filter(struct perf_event *event, void __user *arg)
+{
+	char *filter_str;
+	int ret;
+
+	if (event->attr.type != PERF_TYPE_TRACEPOINT)
+		return -EINVAL;
+
+	filter_str = strndup_user(arg, PAGE_SIZE);
+	if (IS_ERR(filter_str))
+		return PTR_ERR(filter_str);
+
+	ret = ftrace_profile_set_filter(event, event->attr.config, filter_str);
+
+	kfree(filter_str);
+	return ret;
+}
+
+static void perf_event_free_filter(struct perf_event *event)
+{
+	ftrace_profile_free_filter(event);
+}
+
 #else
+
+static int perf_tp_event_match(struct perf_event *event,
+				struct perf_sample_data *data)
+{
+	return 1;
+}
+
 static const struct pmu *tp_perf_event_init(struct perf_event *event)
 {
 	return NULL;
 }
-#endif
+
+static int perf_event_set_filter(struct perf_event *event, void __user *arg)
+{
+	return -ENOENT;
+}
+
+static void perf_event_free_filter(struct perf_event *event)
+{
+}
+
+#endif /* CONFIG_EVENT_PROFILE */
 
 atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
 
@@ -4394,7 +4460,7 @@ err_size:
 	goto out;
 }
 
-int perf_event_set_output(struct perf_event *event, int output_fd)
+static int perf_event_set_output(struct perf_event *event, int output_fd)
 {
 	struct perf_event *output_event = NULL;
 	struct file *output_file = NULL;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index ffe53ddbe67a..4959ada9e0bb 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -743,7 +743,8 @@ filter_check_discard(struct ftrace_event_call *call, void *rec,
 		     struct ring_buffer *buffer,
 		     struct ring_buffer_event *event)
 {
-	if (unlikely(call->filter_active) && !filter_match_preds(call, rec)) {
+	if (unlikely(call->filter_active) &&
+	    !filter_match_preds(call->filter, rec)) {
 		ring_buffer_discard_commit(buffer, event);
 		return 1;
 	}
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 273845fce393..e27bb6acc2dd 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -21,6 +21,7 @@
 #include <linux/module.h>
 #include <linux/ctype.h>
 #include <linux/mutex.h>
+#include <linux/perf_event.h>
 
 #include "trace.h"
 #include "trace_output.h"
@@ -363,9 +364,8 @@ static void filter_build_regex(struct filter_pred *pred)
 }
 
 /* return 1 if event matches, 0 otherwise (discard) */
-int filter_match_preds(struct ftrace_event_call *call, void *rec)
+int filter_match_preds(struct event_filter *filter, void *rec)
 {
-	struct event_filter *filter = call->filter;
 	int match, top = 0, val1 = 0, val2 = 0;
 	int stack[MAX_FILTER_PRED];
 	struct filter_pred *pred;
@@ -538,9 +538,8 @@ static void filter_disable_preds(struct ftrace_event_call *call)
 		filter->preds[i]->fn = filter_pred_none;
 }
 
-void destroy_preds(struct ftrace_event_call *call)
+static void __free_preds(struct event_filter *filter)
 {
-	struct event_filter *filter = call->filter;
 	int i;
 
 	if (!filter)
@@ -553,21 +552,24 @@ void destroy_preds(struct ftrace_event_call *call)
 	kfree(filter->preds);
 	kfree(filter->filter_string);
 	kfree(filter);
+}
+
+void destroy_preds(struct ftrace_event_call *call)
+{
+	__free_preds(call->filter);
 	call->filter = NULL;
+	call->filter_active = 0;
 }
 
-static int init_preds(struct ftrace_event_call *call)
+static struct event_filter *__alloc_preds(void)
 {
 	struct event_filter *filter;
 	struct filter_pred *pred;
 	int i;
 
-	if (call->filter)
-		return 0;
-
-	filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL);
-	if (!call->filter)
-		return -ENOMEM;
+	filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+	if (!filter)
+		return ERR_PTR(-ENOMEM);
 
 	filter->n_preds = 0;
 
@@ -583,12 +585,24 @@ static int init_preds(struct ftrace_event_call *call)
 		filter->preds[i] = pred;
 	}
 
-	return 0;
+	return filter;
 
 oom:
-	destroy_preds(call);
+	__free_preds(filter);
+	return ERR_PTR(-ENOMEM);
+}
+
+static int init_preds(struct ftrace_event_call *call)
+{
+	if (call->filter)
+		return 0;
+
+	call->filter_active = 0;
+	call->filter = __alloc_preds();
+	if (IS_ERR(call->filter))
+		return PTR_ERR(call->filter);
 
-	return -ENOMEM;
+	return 0;
 }
 
 static int init_subsystem_preds(struct event_subsystem *system)
@@ -629,10 +643,10 @@ static void filter_free_subsystem_preds(struct event_subsystem *system)
 
 static int filter_add_pred_fn(struct filter_parse_state *ps,
 			      struct ftrace_event_call *call,
+			      struct event_filter *filter,
 			      struct filter_pred *pred,
 			      filter_pred_fn_t fn)
 {
-	struct event_filter *filter = call->filter;
 	int idx, err;
 
 	if (filter->n_preds == MAX_FILTER_PRED) {
@@ -647,7 +661,6 @@ static int filter_add_pred_fn(struct filter_parse_state *ps,
 		return err;
 
 	filter->n_preds++;
-	call->filter_active = 1;
 
 	return 0;
 }
@@ -726,6 +739,7 @@ static filter_pred_fn_t select_comparison_fn(int op, int field_size,
 
 static int filter_add_pred(struct filter_parse_state *ps,
 			   struct ftrace_event_call *call,
+			   struct event_filter *filter,
 			   struct filter_pred *pred,
 			   bool dry_run)
 {
@@ -795,7 +809,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
 
 add_pred_fn:
 	if (!dry_run)
-		return filter_add_pred_fn(ps, call, pred, fn);
+		return filter_add_pred_fn(ps, call, filter, pred, fn);
 	return 0;
 }
 
@@ -1154,6 +1168,7 @@ static int check_preds(struct filter_parse_state *ps)
 }
 
 static int replace_preds(struct ftrace_event_call *call,
+			 struct event_filter *filter,
 			 struct filter_parse_state *ps,
 			 char *filter_string,
 			 bool dry_run)
@@ -1200,7 +1215,7 @@ static int replace_preds(struct ftrace_event_call *call,
 add_pred:
 		if (!pred)
 			return -ENOMEM;
-		err = filter_add_pred(ps, call, pred, dry_run);
+		err = filter_add_pred(ps, call, filter, pred, dry_run);
 		filter_free_pred(pred);
 		if (err)
 			return err;
@@ -1216,6 +1231,7 @@ static int replace_system_preds(struct event_subsystem *system,
 				char *filter_string)
 {
 	struct ftrace_event_call *call;
+	struct event_filter *filter;
 	int err;
 	bool fail = true;
 
@@ -1228,17 +1244,19 @@ static int replace_system_preds(struct event_subsystem *system,
 			continue;
 
 		/* try to see if the filter can be applied */
-		err = replace_preds(call, ps, filter_string, true);
+		err = replace_preds(call, filter, ps, filter_string, true);
 		if (err)
 			continue;
 
 		/* really apply the filter */
 		filter_disable_preds(call);
-		err = replace_preds(call, ps, filter_string, false);
+		err = replace_preds(call, filter, ps, filter_string, false);
 		if (err)
 			filter_disable_preds(call);
-		else
-			replace_filter_string(call->filter, filter_string);
+		else {
+			call->filter_active = 1;
+			replace_filter_string(filter, filter_string);
+		}
 		fail = false;
 	}
 
@@ -1252,7 +1270,6 @@ static int replace_system_preds(struct event_subsystem *system,
 int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
 {
 	int err;
-
 	struct filter_parse_state *ps;
 
 	mutex_lock(&event_mutex);
@@ -1283,10 +1300,11 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
 		goto out;
 	}
 
-	err = replace_preds(call, ps, filter_string, false);
+	err = replace_preds(call, call->filter, ps, filter_string, false);
 	if (err)
 		append_filter_err(ps, call->filter);
-
+	else
+		call->filter_active = 1;
 out:
 	filter_opstack_clear(ps);
 	postfix_clear(ps);
@@ -1301,7 +1319,6 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
 				 char *filter_string)
 {
 	int err;
-
 	struct filter_parse_state *ps;
 
 	mutex_lock(&event_mutex);
@@ -1345,3 +1362,67 @@ out_unlock:
 	return err;
 }
 
+#ifdef CONFIG_EVENT_PROFILE
+
+void ftrace_profile_free_filter(struct perf_event *event)
+{
+	struct event_filter *filter = event->filter;
+
+	event->filter = NULL;
+	__free_preds(filter);
+}
+
+int ftrace_profile_set_filter(struct perf_event *event, int event_id,
+			      char *filter_str)
+{
+	int err;
+	struct event_filter *filter;
+	struct filter_parse_state *ps;
+	struct ftrace_event_call *call = NULL;
+
+	mutex_lock(&event_mutex);
+
+	list_for_each_entry(call, &ftrace_events, list) {
+		if (call->id == event_id)
+			break;
+	}
+	if (!call)
+		return -EINVAL;
+
+	if (event->filter)
+		return -EEXIST;
+
+	filter = __alloc_preds();
+	if (IS_ERR(filter))
+		return PTR_ERR(filter);
+
+	err = -ENOMEM;
+	ps = kzalloc(sizeof(*ps), GFP_KERNEL);
+	if (!ps)
+		goto free_preds;
+
+	parse_init(ps, filter_ops, filter_str);
+	err = filter_parse(ps);
+	if (err)
+		goto free_ps;
+
+	err = replace_preds(call, filter, ps, filter_str, false);
+	if (!err)
+		event->filter = filter;
+
+free_ps:
+	filter_opstack_clear(ps);
+	postfix_clear(ps);
+	kfree(ps);
+
+free_preds:
+	if (err)
+		__free_preds(filter);
+
+	mutex_unlock(&event_mutex);
+
+	return err;
+}
+
+#endif /* CONFIG_EVENT_PROFILE */
+
-- 
cgit v1.2.3-70-g09d2


From 434a83c3fbb951908a3a52040f7f0e0b8ba00dd0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 15 Oct 2009 11:50:39 +0200
Subject: events: Harmonize event field names and print output names

Now that we can filter based on fields via perf record, people
will start using filter expressions and will expect them to
be obvious.

The primary way to see which fields are available is by looking
at the trace output, such as:

  gcc-18676 [000]   343.011728: irq_handler_entry: irq=0 handler=timer
  cc1-18677 [000]   343.012727: irq_handler_entry: irq=0 handler=timer
  cc1-18677 [000]   343.032692: irq_handler_entry: irq=0 handler=timer
  cc1-18677 [000]   343.033690: irq_handler_entry: irq=0 handler=timer
  cc1-18677 [000]   343.034687: irq_handler_entry: irq=0 handler=timer
  cc1-18677 [000]   343.035686: irq_handler_entry: irq=0 handler=timer
  cc1-18677 [000]   343.036684: irq_handler_entry: irq=0 handler=timer

While 'irq==0' filters work, the 'handler==<x>' filter expression
does not work:

  $ perf record -R -f -a -e irq:irq_handler_entry --filter handler=timer sleep 1
   Error: failed to set filter with 22 (Invalid argument)

The problem is that while an 'irq' field exists and is recognized
as a filter field - 'handler' does not exist - its name is 'name'
in the output.

To solve this, we need to synchronize the printout and the field
names, wherever possible.

In cases where the printout prints a non-field, we enclose
that information in square brackets, such as:

  perf-1380  [013]   724.903505: softirq_exit: vec=9 [action=RCU]
  perf-1380  [013]   724.904482: softirq_exit: vec=1 [action=TIMER]

This way users can use filter expressions more intuitively: all
fields that show up as 'primary' (non-bracketed) information is
filterable.

This patch harmonizes the field names for all irq, bkl, power,
sched and timer events.

We might in fact think about dropping the print format bit of
generic tracepoints altogether, and just print the fields that
are being recorded.

Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/events/bkl.h   | 18 +++++-----
 include/trace/events/irq.h   |  8 ++---
 include/trace/events/power.h |  2 --
 include/trace/events/sched.h | 44 ++++++++++++------------
 include/trace/events/timer.h | 79 ++++++++++++++++++++++----------------------
 5 files changed, 74 insertions(+), 77 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/bkl.h b/include/trace/events/bkl.h
index 8abd620a490e..1af72dc24278 100644
--- a/include/trace/events/bkl.h
+++ b/include/trace/events/bkl.h
@@ -13,7 +13,7 @@ TRACE_EVENT(lock_kernel,
 	TP_ARGS(func, file, line),
 
 	TP_STRUCT__entry(
-		__field(	int,		lock_depth		)
+		__field(	int,		depth			)
 		__field_ext(	const char *,	func, FILTER_PTR_STRING	)
 		__field_ext(	const char *,	file, FILTER_PTR_STRING	)
 		__field(	int,		line			)
@@ -21,13 +21,13 @@ TRACE_EVENT(lock_kernel,
 
 	TP_fast_assign(
 		/* We want to record the lock_depth after lock is acquired */
-		__entry->lock_depth = current->lock_depth + 1;
+		__entry->depth = current->lock_depth + 1;
 		__entry->func = func;
 		__entry->file = file;
 		__entry->line = line;
 	),
 
-	TP_printk("depth: %d, %s:%d %s()", __entry->lock_depth,
+	TP_printk("depth=%d file:line=%s:%d func=%s()", __entry->depth,
 		  __entry->file, __entry->line, __entry->func)
 );
 
@@ -38,20 +38,20 @@ TRACE_EVENT(unlock_kernel,
 	TP_ARGS(func, file, line),
 
 	TP_STRUCT__entry(
-		__field(int,		lock_depth)
-		__field(const char *,	func)
-		__field(const char *,	file)
-		__field(int,		line)
+		__field(int,		depth		)
+		__field(const char *,	func		)
+		__field(const char *,	file		)
+		__field(int,		line		)
 	),
 
 	TP_fast_assign(
-		__entry->lock_depth = current->lock_depth;
+		__entry->depth = current->lock_depth;
 		__entry->func = func;
 		__entry->file = file;
 		__entry->line = line;
 	),
 
-	TP_printk("depth: %d, %s:%d %s()", __entry->lock_depth,
+	TP_printk("depth=%d file:line=%s:%d func=%s()", __entry->depth,
 		  __entry->file, __entry->line, __entry->func)
 );
 
diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index b89f9db4a404..dcfcd4407623 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -48,7 +48,7 @@ TRACE_EVENT(irq_handler_entry,
 		__assign_str(name, action->name);
 	),
 
-	TP_printk("irq=%d handler=%s", __entry->irq, __get_str(name))
+	TP_printk("irq=%d name=%s", __entry->irq, __get_str(name))
 );
 
 /**
@@ -78,7 +78,7 @@ TRACE_EVENT(irq_handler_exit,
 		__entry->ret	= ret;
 	),
 
-	TP_printk("irq=%d return=%s",
+	TP_printk("irq=%d ret=%s",
 		  __entry->irq, __entry->ret ? "handled" : "unhandled")
 );
 
@@ -107,7 +107,7 @@ TRACE_EVENT(softirq_entry,
 		__entry->vec = (int)(h - vec);
 	),
 
-	TP_printk("softirq=%d action=%s", __entry->vec,
+	TP_printk("vec=%d [action=%s]", __entry->vec,
 		  show_softirq_name(__entry->vec))
 );
 
@@ -136,7 +136,7 @@ TRACE_EVENT(softirq_exit,
 		__entry->vec = (int)(h - vec);
 	),
 
-	TP_printk("softirq=%d action=%s", __entry->vec,
+	TP_printk("vec=%d [action=%s]", __entry->vec,
 		  show_softirq_name(__entry->vec))
 );
 
diff --git a/include/trace/events/power.h b/include/trace/events/power.h
index ea6d579261ad..9bb96e5a2848 100644
--- a/include/trace/events/power.h
+++ b/include/trace/events/power.h
@@ -16,8 +16,6 @@ enum {
 };
 #endif
 
-
-
 TRACE_EVENT(power_start,
 
 	TP_PROTO(unsigned int type, unsigned int state),
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 4069c43f4187..b50b9856c59f 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -26,7 +26,7 @@ TRACE_EVENT(sched_kthread_stop,
 		__entry->pid	= t->pid;
 	),
 
-	TP_printk("task %s:%d", __entry->comm, __entry->pid)
+	TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid)
 );
 
 /*
@@ -46,7 +46,7 @@ TRACE_EVENT(sched_kthread_stop_ret,
 		__entry->ret	= ret;
 	),
 
-	TP_printk("ret %d", __entry->ret)
+	TP_printk("ret=%d", __entry->ret)
 );
 
 /*
@@ -73,7 +73,7 @@ TRACE_EVENT(sched_wait_task,
 		__entry->prio	= p->prio;
 	),
 
-	TP_printk("task %s:%d [%d]",
+	TP_printk("comm=%s pid=%d prio=%d",
 		  __entry->comm, __entry->pid, __entry->prio)
 );
 
@@ -94,7 +94,7 @@ TRACE_EVENT(sched_wakeup,
 		__field(	pid_t,	pid			)
 		__field(	int,	prio			)
 		__field(	int,	success			)
-		__field(	int,	cpu			)
+		__field(	int,	target_cpu		)
 	),
 
 	TP_fast_assign(
@@ -102,12 +102,12 @@ TRACE_EVENT(sched_wakeup,
 		__entry->pid		= p->pid;
 		__entry->prio		= p->prio;
 		__entry->success	= success;
-		__entry->cpu		= task_cpu(p);
+		__entry->target_cpu	= task_cpu(p);
 	),
 
-	TP_printk("task %s:%d [%d] success=%d [%03d]",
+	TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d",
 		  __entry->comm, __entry->pid, __entry->prio,
-		  __entry->success, __entry->cpu)
+		  __entry->success, __entry->target_cpu)
 );
 
 /*
@@ -127,7 +127,7 @@ TRACE_EVENT(sched_wakeup_new,
 		__field(	pid_t,	pid			)
 		__field(	int,	prio			)
 		__field(	int,	success			)
-		__field(	int,	cpu			)
+		__field(	int,	target_cpu		)
 	),
 
 	TP_fast_assign(
@@ -135,12 +135,12 @@ TRACE_EVENT(sched_wakeup_new,
 		__entry->pid		= p->pid;
 		__entry->prio		= p->prio;
 		__entry->success	= success;
-		__entry->cpu		= task_cpu(p);
+		__entry->target_cpu	= task_cpu(p);
 	),
 
-	TP_printk("task %s:%d [%d] success=%d [%03d]",
+	TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d",
 		  __entry->comm, __entry->pid, __entry->prio,
-		  __entry->success, __entry->cpu)
+		  __entry->success, __entry->target_cpu)
 );
 
 /*
@@ -176,7 +176,7 @@ TRACE_EVENT(sched_switch,
 		__entry->next_prio	= next->prio;
 	),
 
-	TP_printk("task %s:%d [%d] (%s) ==> %s:%d [%d]",
+	TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s ==> next_comm=%s next_pid=%d next_prio=%d",
 		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
 		__entry->prev_state ?
 		  __print_flags(__entry->prev_state, "|",
@@ -211,7 +211,7 @@ TRACE_EVENT(sched_migrate_task,
 		__entry->dest_cpu	= dest_cpu;
 	),
 
-	TP_printk("task %s:%d [%d] from: %d  to: %d",
+	TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d",
 		  __entry->comm, __entry->pid, __entry->prio,
 		  __entry->orig_cpu, __entry->dest_cpu)
 );
@@ -237,7 +237,7 @@ TRACE_EVENT(sched_process_free,
 		__entry->prio		= p->prio;
 	),
 
-	TP_printk("task %s:%d [%d]",
+	TP_printk("comm=%s pid=%d prio=%d",
 		  __entry->comm, __entry->pid, __entry->prio)
 );
 
@@ -262,7 +262,7 @@ TRACE_EVENT(sched_process_exit,
 		__entry->prio		= p->prio;
 	),
 
-	TP_printk("task %s:%d [%d]",
+	TP_printk("comm=%s pid=%d prio=%d",
 		  __entry->comm, __entry->pid, __entry->prio)
 );
 
@@ -287,7 +287,7 @@ TRACE_EVENT(sched_process_wait,
 		__entry->prio		= current->prio;
 	),
 
-	TP_printk("task %s:%d [%d]",
+	TP_printk("comm=%s pid=%d prio=%d",
 		  __entry->comm, __entry->pid, __entry->prio)
 );
 
@@ -314,7 +314,7 @@ TRACE_EVENT(sched_process_fork,
 		__entry->child_pid	= child->pid;
 	),
 
-	TP_printk("parent %s:%d  child %s:%d",
+	TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d",
 		__entry->parent_comm, __entry->parent_pid,
 		__entry->child_comm, __entry->child_pid)
 );
@@ -340,7 +340,7 @@ TRACE_EVENT(sched_signal_send,
 		__entry->sig	= sig;
 	),
 
-	TP_printk("sig: %d  task %s:%d",
+	TP_printk("sig=%d comm=%s pid=%d",
 		  __entry->sig, __entry->comm, __entry->pid)
 );
 
@@ -374,7 +374,7 @@ TRACE_EVENT(sched_stat_wait,
 		__perf_count(delay);
 	),
 
-	TP_printk("task: %s:%d wait: %Lu [ns]",
+	TP_printk("comm=%s pid=%d delay=%Lu [ns]",
 			__entry->comm, __entry->pid,
 			(unsigned long long)__entry->delay)
 );
@@ -406,7 +406,7 @@ TRACE_EVENT(sched_stat_runtime,
 		__perf_count(runtime);
 	),
 
-	TP_printk("task: %s:%d runtime: %Lu [ns], vruntime: %Lu [ns]",
+	TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]",
 			__entry->comm, __entry->pid,
 			(unsigned long long)__entry->runtime,
 			(unsigned long long)__entry->vruntime)
@@ -437,7 +437,7 @@ TRACE_EVENT(sched_stat_sleep,
 		__perf_count(delay);
 	),
 
-	TP_printk("task: %s:%d sleep: %Lu [ns]",
+	TP_printk("comm=%s pid=%d delay=%Lu [ns]",
 			__entry->comm, __entry->pid,
 			(unsigned long long)__entry->delay)
 );
@@ -467,7 +467,7 @@ TRACE_EVENT(sched_stat_iowait,
 		__perf_count(delay);
 	),
 
-	TP_printk("task: %s:%d iowait: %Lu [ns]",
+	TP_printk("comm=%s pid=%d delay=%Lu [ns]",
 			__entry->comm, __entry->pid,
 			(unsigned long long)__entry->delay)
 );
diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index 1844c48d640e..e5ce87a0498d 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -26,7 +26,7 @@ TRACE_EVENT(timer_init,
 		__entry->timer	= timer;
 	),
 
-	TP_printk("timer %p", __entry->timer)
+	TP_printk("timer=%p", __entry->timer)
 );
 
 /**
@@ -54,7 +54,7 @@ TRACE_EVENT(timer_start,
 		__entry->now		= jiffies;
 	),
 
-	TP_printk("timer %p: func %pf, expires %lu, timeout %ld",
+	TP_printk("timer=%p function=%pf expires=%lu [timeout=%ld]",
 		  __entry->timer, __entry->function, __entry->expires,
 		  (long)__entry->expires - __entry->now)
 );
@@ -81,7 +81,7 @@ TRACE_EVENT(timer_expire_entry,
 		__entry->now		= jiffies;
 	),
 
-	TP_printk("timer %p: now %lu", __entry->timer, __entry->now)
+	TP_printk("timer=%p now=%lu", __entry->timer, __entry->now)
 );
 
 /**
@@ -108,7 +108,7 @@ TRACE_EVENT(timer_expire_exit,
 		__entry->timer	= timer;
 	),
 
-	TP_printk("timer %p", __entry->timer)
+	TP_printk("timer=%p", __entry->timer)
 );
 
 /**
@@ -129,7 +129,7 @@ TRACE_EVENT(timer_cancel,
 		__entry->timer	= timer;
 	),
 
-	TP_printk("timer %p", __entry->timer)
+	TP_printk("timer=%p", __entry->timer)
 );
 
 /**
@@ -140,24 +140,24 @@ TRACE_EVENT(timer_cancel,
  */
 TRACE_EVENT(hrtimer_init,
 
-	TP_PROTO(struct hrtimer *timer, clockid_t clockid,
+	TP_PROTO(struct hrtimer *hrtimer, clockid_t clockid,
 		 enum hrtimer_mode mode),
 
-	TP_ARGS(timer, clockid, mode),
+	TP_ARGS(hrtimer, clockid, mode),
 
 	TP_STRUCT__entry(
-		__field( void *,		timer		)
+		__field( void *,		hrtimer		)
 		__field( clockid_t,		clockid		)
 		__field( enum hrtimer_mode,	mode		)
 	),
 
 	TP_fast_assign(
-		__entry->timer		= timer;
+		__entry->hrtimer	= hrtimer;
 		__entry->clockid	= clockid;
 		__entry->mode		= mode;
 	),
 
-	TP_printk("hrtimer %p, clockid %s, mode %s", __entry->timer,
+	TP_printk("hrtimer=%p clockid=%s mode=%s", __entry->hrtimer,
 		  __entry->clockid == CLOCK_REALTIME ?
 			"CLOCK_REALTIME" : "CLOCK_MONOTONIC",
 		  __entry->mode == HRTIMER_MODE_ABS ?
@@ -170,26 +170,26 @@ TRACE_EVENT(hrtimer_init,
  */
 TRACE_EVENT(hrtimer_start,
 
-	TP_PROTO(struct hrtimer *timer),
+	TP_PROTO(struct hrtimer *hrtimer),
 
-	TP_ARGS(timer),
+	TP_ARGS(hrtimer),
 
 	TP_STRUCT__entry(
-		__field( void *,	timer		)
+		__field( void *,	hrtimer		)
 		__field( void *,	function	)
 		__field( s64,		expires		)
 		__field( s64,		softexpires	)
 	),
 
 	TP_fast_assign(
-		__entry->timer		= timer;
-		__entry->function	= timer->function;
-		__entry->expires	= hrtimer_get_expires(timer).tv64;
-		__entry->softexpires	= hrtimer_get_softexpires(timer).tv64;
+		__entry->hrtimer	= hrtimer;
+		__entry->function	= hrtimer->function;
+		__entry->expires	= hrtimer_get_expires(hrtimer).tv64;
+		__entry->softexpires	= hrtimer_get_softexpires(hrtimer).tv64;
 	),
 
-	TP_printk("hrtimer %p, func %pf, expires %llu, softexpires %llu",
-		  __entry->timer, __entry->function,
+	TP_printk("hrtimer=%p function=%pf expires=%llu softexpires=%llu",
+		  __entry->hrtimer, __entry->function,
 		  (unsigned long long)ktime_to_ns((ktime_t) {
 				  .tv64 = __entry->expires }),
 		  (unsigned long long)ktime_to_ns((ktime_t) {
@@ -206,23 +206,22 @@ TRACE_EVENT(hrtimer_start,
  */
 TRACE_EVENT(hrtimer_expire_entry,
 
-	TP_PROTO(struct hrtimer *timer, ktime_t *now),
+	TP_PROTO(struct hrtimer *hrtimer, ktime_t *now),
 
-	TP_ARGS(timer, now),
+	TP_ARGS(hrtimer, now),
 
 	TP_STRUCT__entry(
-		__field( void *,	timer	)
+		__field( void *,	hrtimer	)
 		__field( s64,		now	)
 	),
 
 	TP_fast_assign(
-		__entry->timer	= timer;
-		__entry->now	= now->tv64;
+		__entry->hrtimer	= hrtimer;
+		__entry->now		= now->tv64;
 	),
 
-	TP_printk("hrtimer %p, now %llu", __entry->timer,
-		  (unsigned long long)ktime_to_ns((ktime_t) {
-				  .tv64 = __entry->now }))
+	TP_printk("hrtimer=%p now=%llu", __entry->hrtimer,
+		  (unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->now }))
  );
 
 /**
@@ -234,40 +233,40 @@ TRACE_EVENT(hrtimer_expire_entry,
  */
 TRACE_EVENT(hrtimer_expire_exit,
 
-	TP_PROTO(struct hrtimer *timer),
+	TP_PROTO(struct hrtimer *hrtimer),
 
-	TP_ARGS(timer),
+	TP_ARGS(hrtimer),
 
 	TP_STRUCT__entry(
-		__field( void *,	timer	)
+		__field( void *,	hrtimer	)
 	),
 
 	TP_fast_assign(
-		__entry->timer	= timer;
+		__entry->hrtimer	= hrtimer;
 	),
 
-	TP_printk("hrtimer %p", __entry->timer)
+	TP_printk("hrtimer=%p", __entry->hrtimer)
 );
 
 /**
  * hrtimer_cancel - called when the hrtimer is canceled
- * @timer:	pointer to struct hrtimer
+ * @hrtimer:	pointer to struct hrtimer
  */
 TRACE_EVENT(hrtimer_cancel,
 
-	TP_PROTO(struct hrtimer *timer),
+	TP_PROTO(struct hrtimer *hrtimer),
 
-	TP_ARGS(timer),
+	TP_ARGS(hrtimer),
 
 	TP_STRUCT__entry(
-		__field( void *,	timer	)
+		__field( void *,	hrtimer	)
 	),
 
 	TP_fast_assign(
-		__entry->timer	= timer;
+		__entry->hrtimer	= hrtimer;
 	),
 
-	TP_printk("hrtimer %p", __entry->timer)
+	TP_printk("hrtimer=%p", __entry->hrtimer)
 );
 
 /**
@@ -302,7 +301,7 @@ TRACE_EVENT(itimer_state,
 		__entry->interval_usec	= value->it_interval.tv_usec;
 	),
 
-	TP_printk("which %d, expires %lu, it_value %lu.%lu, it_interval %lu.%lu",
+	TP_printk("which=%d expires=%lu it_value=%lu.%lu it_interval=%lu.%lu",
 		  __entry->which, __entry->expires,
 		  __entry->value_sec, __entry->value_usec,
 		  __entry->interval_sec, __entry->interval_usec)
@@ -332,7 +331,7 @@ TRACE_EVENT(itimer_expire,
 		__entry->pid	= pid_nr(pid);
 	),
 
-	    TP_printk("which %d, pid %d, now %lu", __entry->which,
+	    TP_printk("which=%d pid=%d now=%lu", __entry->which,
 		      (int) __entry->pid, __entry->now)
 );
 
-- 
cgit v1.2.3-70-g09d2


From 40b1f4e5113eafc5e84f2ba86822df66087fcb25 Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Thu, 22 Oct 2009 14:39:28 +1100
Subject: irq: trivial: Fix typo in comment for #endif

The comment suggests this #endif is CONFIG_X86 but it's really
CONFIG_TRACE_IRQFLAGS_SUPPORT

Signed-off-by: Michael Neuling <mikey@neuling.org>
Cc: michael@ellerman.id.au
LKML-Reference: <18191.1256182768@neuling.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irqflags.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index b02a3f1d46a0..006bf45eae30 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -124,6 +124,6 @@
 	typecheck(unsigned long, flags);	\
 	raw_irqs_disabled_flags(flags);		\
 })
-#endif		/* CONFIG_X86 */
+#endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */
 
 #endif
-- 
cgit v1.2.3-70-g09d2


From 5c828713358cb9df8aa174371edcbbb62203a490 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Fri, 23 Oct 2009 14:58:11 +0200
Subject: ratelimit: Make suppressed output messages more useful

Today I got:

  [39648.224782] Registered led device: iwl-phy0::TX
  [40676.545099] __ratelimit: 246 callbacks suppressed
  [40676.545103] abcdef[23675]: segfault at 0 ...

as you can see the ratelimit message contains a function prefix.
Since this is always __ratelimit, this wont help much.

This patch changes __ratelimit and printk_ratelimit to print the
function name that calls ratelimit.

This will pinpoint the responsible function, as long as not several
different places call ratelimit with the same ratelimit state at
the same time. In that case we catch only one random function that
calls ratelimit after the wait period.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Dave Young <hidave.darkstar@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
CC: Andrew Morton <akpm@linux-foundation.org>
LKML-Reference: <200910231458.11832.borntraeger@de.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/kernel.h    | 3 ++-
 include/linux/ratelimit.h | 3 ++-
 kernel/printk.c           | 6 +++---
 lib/ratelimit.c           | 6 +++---
 4 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3305f33201be..21d0d822c716 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -240,7 +240,8 @@ asmlinkage int vprintk(const char *fmt, va_list args)
 asmlinkage int printk(const char * fmt, ...)
 	__attribute__ ((format (printf, 1, 2))) __cold;
 
-extern int printk_ratelimit(void);
+extern int __printk_ratelimit(const char *func);
+#define printk_ratelimit() __printk_ratelimit(__func__)
 extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
 				   unsigned int interval_msec);
 
diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h
index 187bc16c1f15..668cf1bef030 100644
--- a/include/linux/ratelimit.h
+++ b/include/linux/ratelimit.h
@@ -25,6 +25,7 @@ struct ratelimit_state {
 		.burst		= burst_init,				\
 	}
 
-extern int __ratelimit(struct ratelimit_state *rs);
+extern int ___ratelimit(struct ratelimit_state *rs, const char *func);
+#define __ratelimit(state) ___ratelimit(state, __func__)
 
 #endif /* _LINUX_RATELIMIT_H */
diff --git a/kernel/printk.c b/kernel/printk.c
index b997c893cdcf..8283dbe15af4 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1364,11 +1364,11 @@ late_initcall(disable_boot_consoles);
  */
 DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10);
 
-int printk_ratelimit(void)
+int __printk_ratelimit(const char *func)
 {
-	return __ratelimit(&printk_ratelimit_state);
+	return ___ratelimit(&printk_ratelimit_state, func);
 }
-EXPORT_SYMBOL(printk_ratelimit);
+EXPORT_SYMBOL(__printk_ratelimit);
 
 /**
  * printk_timed_ratelimit - caller-controlled printk ratelimiting
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index 5551731ae1d4..09f5ce1810dc 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -20,7 +20,7 @@
  * This enforces a rate limit: not more than @rs->ratelimit_burst callbacks
  * in every @rs->ratelimit_jiffies
  */
-int __ratelimit(struct ratelimit_state *rs)
+int ___ratelimit(struct ratelimit_state *rs, const char *func)
 {
 	unsigned long flags;
 	int ret;
@@ -43,7 +43,7 @@ int __ratelimit(struct ratelimit_state *rs)
 	if (time_is_before_jiffies(rs->begin + rs->interval)) {
 		if (rs->missed)
 			printk(KERN_WARNING "%s: %d callbacks suppressed\n",
-				__func__, rs->missed);
+				func, rs->missed);
 		rs->begin   = 0;
 		rs->printed = 0;
 		rs->missed  = 0;
@@ -59,4 +59,4 @@ int __ratelimit(struct ratelimit_state *rs)
 
 	return ret;
 }
-EXPORT_SYMBOL(__ratelimit);
+EXPORT_SYMBOL(___ratelimit);
-- 
cgit v1.2.3-70-g09d2


From bb015f0c85362aa767f8f00f50a40d85e489414f Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Mon, 19 Oct 2009 11:43:32 +0200
Subject: pcmcia: drop already defined PCI_IDs

Out of 10 PCI_IDs found in the PCMCIA subsystem, only two were not defined in
pci_ids.h. Move them and drop the duplicates. Successfully build-tested.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 drivers/pcmcia/cirrus.h  | 10 ----------
 drivers/pcmcia/o2micro.h | 22 ----------------------
 include/linux/pci_ids.h  |  2 ++
 3 files changed, 2 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/drivers/pcmcia/cirrus.h b/drivers/pcmcia/cirrus.h
index ecd4fc7f666f..446a4576e73e 100644
--- a/drivers/pcmcia/cirrus.h
+++ b/drivers/pcmcia/cirrus.h
@@ -30,16 +30,6 @@
 #ifndef _LINUX_CIRRUS_H
 #define _LINUX_CIRRUS_H
 
-#ifndef PCI_VENDOR_ID_CIRRUS
-#define PCI_VENDOR_ID_CIRRUS		0x1013
-#endif
-#ifndef PCI_DEVICE_ID_CIRRUS_6729
-#define PCI_DEVICE_ID_CIRRUS_6729	0x1100
-#endif
-#ifndef PCI_DEVICE_ID_CIRRUS_6832
-#define PCI_DEVICE_ID_CIRRUS_6832	0x1110
-#endif
-
 #define PD67_MISC_CTL_1		0x16	/* Misc control 1 */
 #define PD67_FIFO_CTL		0x17	/* FIFO control */
 #define PD67_MISC_CTL_2		0x1E	/* Misc control 2 */
diff --git a/drivers/pcmcia/o2micro.h b/drivers/pcmcia/o2micro.h
index 72188c462c9c..624442fc0d35 100644
--- a/drivers/pcmcia/o2micro.h
+++ b/drivers/pcmcia/o2micro.h
@@ -30,28 +30,6 @@
 #ifndef _LINUX_O2MICRO_H
 #define _LINUX_O2MICRO_H
 
-#ifndef PCI_VENDOR_ID_O2
-#define PCI_VENDOR_ID_O2		0x1217
-#endif
-#ifndef PCI_DEVICE_ID_O2_6729
-#define PCI_DEVICE_ID_O2_6729		0x6729
-#endif
-#ifndef PCI_DEVICE_ID_O2_6730
-#define PCI_DEVICE_ID_O2_6730		0x673a
-#endif
-#ifndef PCI_DEVICE_ID_O2_6832
-#define PCI_DEVICE_ID_O2_6832		0x6832
-#endif
-#ifndef PCI_DEVICE_ID_O2_6836
-#define PCI_DEVICE_ID_O2_6836		0x6836
-#endif
-#ifndef PCI_DEVICE_ID_O2_6812
-#define PCI_DEVICE_ID_O2_6812		0x6872
-#endif
-#ifndef PCI_DEVICE_ID_O2_6933
-#define PCI_DEVICE_ID_O2_6933           0x6933
-#endif
-
 /* Additional PCI configuration registers */
 
 #define O2_MUX_CONTROL		0x90	/* 32 bit */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index f490e7a7307a..857cc349bf71 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1632,6 +1632,8 @@
 #define PCI_DEVICE_ID_O2_6730		0x673a
 #define PCI_DEVICE_ID_O2_6832		0x6832
 #define PCI_DEVICE_ID_O2_6836		0x6836
+#define PCI_DEVICE_ID_O2_6812		0x6872
+#define PCI_DEVICE_ID_O2_6933		0x6933
 
 #define PCI_VENDOR_ID_3DFX		0x121a
 #define PCI_DEVICE_ID_3DFX_VOODOO	0x0001
-- 
cgit v1.2.3-70-g09d2


From ce0e7b28fb75cb003cfc8d0238613aaf1c55e797 Mon Sep 17 00:00:00 2001
From: Ryota Ozaki <ozaki.ryota@gmail.com>
Date: Sat, 24 Oct 2009 01:20:10 +0900
Subject: sched, cpuacct: Fix niced guest time accounting

CPU time of a guest is always accounted in 'user' time
without concern for the nice value of its counterpart
process although the guest is scheduled under the nice
value.

This patch fixes the defect and accounts cpu time of
a niced guest in 'nice' time as same as a niced process.

And also the patch adds 'guest_nice' to cpuacct. The
value provides niced guest cpu time which is like 'nice'
to 'user'.

The original discussions can be found here:

  http://www.mail-archive.com/kvm@vger.kernel.org/msg23982.html
  http://www.mail-archive.com/kvm@vger.kernel.org/msg23860.html

Signed-off-by: Ryota Ozaki <ozaki.ryota@gmail.com>
Acked-by: Avi Kivity <avi@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1256314810-7897-1-git-send-email-ozaki.ryota@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/filesystems/proc.txt |  3 ++-
 fs/proc/stat.c                     | 19 +++++++++++++------
 include/linux/kernel_stat.h        |  1 +
 kernel/sched.c                     |  9 +++++++--
 4 files changed, 23 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 2c48f945546b..4af0018533f2 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1072,7 +1072,8 @@ second).  The meanings of the columns are as follows, from left to right:
 - irq: servicing interrupts
 - softirq: servicing softirqs
 - steal: involuntary wait
-- guest: running a guest
+- guest: running a normal guest
+- guest_nice: running a niced guest
 
 The "intr" line gives counts of interrupts  serviced since boot time, for each
 of the  possible system interrupts.   The first  column  is the  total of  all
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 7cc726c6d70a..b9b7aad2003d 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -27,7 +27,7 @@ static int show_stat(struct seq_file *p, void *v)
 	int i, j;
 	unsigned long jif;
 	cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
-	cputime64_t guest;
+	cputime64_t guest, guest_nice;
 	u64 sum = 0;
 	u64 sum_softirq = 0;
 	unsigned int per_softirq_sums[NR_SOFTIRQS] = {0};
@@ -36,7 +36,7 @@ static int show_stat(struct seq_file *p, void *v)
 
 	user = nice = system = idle = iowait =
 		irq = softirq = steal = cputime64_zero;
-	guest = cputime64_zero;
+	guest = guest_nice = cputime64_zero;
 	getboottime(&boottime);
 	jif = boottime.tv_sec;
 
@@ -51,6 +51,8 @@ static int show_stat(struct seq_file *p, void *v)
 		softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
 		steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
 		guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
+		guest_nice = cputime64_add(guest_nice,
+			kstat_cpu(i).cpustat.guest_nice);
 		for_each_irq_nr(j) {
 			sum += kstat_irqs_cpu(j, i);
 		}
@@ -65,7 +67,8 @@ static int show_stat(struct seq_file *p, void *v)
 	}
 	sum += arch_irq_stat();
 
-	seq_printf(p, "cpu  %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
+	seq_printf(p, "cpu  %llu %llu %llu %llu %llu %llu %llu %llu %llu "
+		"%llu\n",
 		(unsigned long long)cputime64_to_clock_t(user),
 		(unsigned long long)cputime64_to_clock_t(nice),
 		(unsigned long long)cputime64_to_clock_t(system),
@@ -74,7 +77,8 @@ static int show_stat(struct seq_file *p, void *v)
 		(unsigned long long)cputime64_to_clock_t(irq),
 		(unsigned long long)cputime64_to_clock_t(softirq),
 		(unsigned long long)cputime64_to_clock_t(steal),
-		(unsigned long long)cputime64_to_clock_t(guest));
+		(unsigned long long)cputime64_to_clock_t(guest),
+		(unsigned long long)cputime64_to_clock_t(guest_nice));
 	for_each_online_cpu(i) {
 
 		/* Copy values here to work around gcc-2.95.3, gcc-2.96 */
@@ -88,8 +92,10 @@ static int show_stat(struct seq_file *p, void *v)
 		softirq = kstat_cpu(i).cpustat.softirq;
 		steal = kstat_cpu(i).cpustat.steal;
 		guest = kstat_cpu(i).cpustat.guest;
+		guest_nice = kstat_cpu(i).cpustat.guest_nice;
 		seq_printf(p,
-			"cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
+			"cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu "
+			"%llu\n",
 			i,
 			(unsigned long long)cputime64_to_clock_t(user),
 			(unsigned long long)cputime64_to_clock_t(nice),
@@ -99,7 +105,8 @@ static int show_stat(struct seq_file *p, void *v)
 			(unsigned long long)cputime64_to_clock_t(irq),
 			(unsigned long long)cputime64_to_clock_t(softirq),
 			(unsigned long long)cputime64_to_clock_t(steal),
-			(unsigned long long)cputime64_to_clock_t(guest));
+			(unsigned long long)cputime64_to_clock_t(guest),
+			(unsigned long long)cputime64_to_clock_t(guest_nice));
 	}
 	seq_printf(p, "intr %llu", (unsigned long long)sum);
 
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 348fa8874b52..c059044bc6dc 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -25,6 +25,7 @@ struct cpu_usage_stat {
 	cputime64_t iowait;
 	cputime64_t steal;
 	cputime64_t guest;
+	cputime64_t guest_nice;
 };
 
 struct kernel_stat {
diff --git a/kernel/sched.c b/kernel/sched.c
index e5205811c19e..67be4d0dddaa 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5017,8 +5017,13 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime,
 	p->gtime = cputime_add(p->gtime, cputime);
 
 	/* Add guest time to cpustat. */
-	cpustat->user = cputime64_add(cpustat->user, tmp);
-	cpustat->guest = cputime64_add(cpustat->guest, tmp);
+	if (TASK_NICE(p) > 0) {
+		cpustat->nice = cputime64_add(cpustat->nice, tmp);
+		cpustat->guest_nice = cputime64_add(cpustat->guest_nice, tmp);
+	} else {
+		cpustat->user = cputime64_add(cpustat->user, tmp);
+		cpustat->guest = cputime64_add(cpustat->guest, tmp);
+	}
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 9b1d82fa1611706fa7ee1505f290160a18caf95d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sun, 25 Oct 2009 19:03:50 -0700
Subject: rcu: "Tiny RCU", The Bloatwatch Edition

This patch is a version of RCU designed for !SMP provided for a
small-footprint RCU implementation.  In particular, the
implementation of synchronize_rcu() is extremely lightweight and
high performance. It passes rcutorture testing in each of the
four relevant configurations (combinations of NO_HZ and PREEMPT)
on x86.  This saves about 1K bytes compared to old Classic RCU
(which is no longer in mainline), and more than three kilobytes
compared to Hierarchical RCU (updated to 2.6.30):

	CONFIG_TREE_RCU:

	   text	   data	    bss	    dec	    filename
	    183       4       0     187     kernel/rcupdate.o
	   2783     520      36    3339     kernel/rcutree.o
				   3526 Total (vs 4565 for v7)

	CONFIG_TREE_PREEMPT_RCU:

	   text	   data	    bss	    dec	    filename
	    263       4       0     267     kernel/rcupdate.o
	   4594     776      52    5422     kernel/rcutree.o
	   			   5689 Total (6155 for v7)

	CONFIG_TINY_RCU:

	   text	   data	    bss	    dec	    filename
	     96       4       0     100     kernel/rcupdate.o
	    734      24       0     758     kernel/rcutiny.o
	    			    858 Total (vs 848 for v7)

The above is for x86.  Your mileage may vary on other platforms.
Further compression is possible, but is being procrastinated.

Changes from v7 (http://lkml.org/lkml/2009/10/9/388)

o	Apply Lai Jiangshan's review comments (aside from
might_sleep() 	in synchronize_sched(), which is covered by SMP builds).

o	Fix up expedited primitives.

Changes from v6 (http://lkml.org/lkml/2009/9/23/293).

o	Forward ported to put it into the 2.6.33 stream.

o	Added lockdep support.

o	Make lightweight rcu_barrier.

Changes from v5 (http://lkml.org/lkml/2009/6/23/12).

o	Ported to latest pre-2.6.32 merge window kernel.

	- Renamed rcu_qsctr_inc() to rcu_sched_qs().
	- Renamed rcu_bh_qsctr_inc() to rcu_bh_qs().
	- Provided trivial rcu_cpu_notify().
	- Provided trivial exit_rcu().
	- Provided trivial rcu_needs_cpu().
	- Fixed up the rcu_*_enter/exit() functions in linux/hardirq.h.

o	Removed the dependence on EMBEDDED, with a view to making
	TINY_RCU default for !SMP at some time in the future.

o	Added (trivial) support for expedited grace periods.

Changes from v4 (http://lkml.org/lkml/2009/5/2/91) include:

o	Squeeze the size down a bit further by removing the
	->completed field from struct rcu_ctrlblk.

o	This permits synchronize_rcu() to become the empty function.
	Previous concerns about rcutorture were unfounded, as
	rcutorture correctly handles a constant value from
	rcu_batches_completed() and rcu_batches_completed_bh().

Changes from v3 (http://lkml.org/lkml/2009/3/29/221) include:

o	Changed rcu_batches_completed(), rcu_batches_completed_bh()
	rcu_enter_nohz(), rcu_exit_nohz(), rcu_nmi_enter(), and
	rcu_nmi_exit(), to be static inlines, as suggested by David
	Howells.  Doing this saves about 100 bytes from rcutiny.o.
	(The numbers between v3 and this v4 of the patch are not directly
	comparable, since they are against different versions of Linux.)

Changes from v2 (http://lkml.org/lkml/2009/2/3/333) include:

o	Fix whitespace issues.

o	Change short-circuit "||" operator to instead be "+" in order
to 	fix performance bug noted by "kraai" on LWN.

		(http://lwn.net/Articles/324348/)

Changes from v1 (http://lkml.org/lkml/2009/1/13/440) include:

o	This version depends on EMBEDDED as well as !SMP, as suggested
	by Ingo.

o	Updated rcu_needs_cpu() to unconditionally return zero,
	permitting the CPU to enter dynticks-idle mode at any time.
	This works because callbacks can be invoked upon entry to
	dynticks-idle mode.

o	Paul is now OK with this being included, based on a poll at
the 	Kernel Miniconf at linux.conf.au, where about ten people said
	that they cared about saving 900 bytes on single-CPU systems.

o	Applies to both mainline and tip/core/rcu.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: David Howells <dhowells@redhat.com>
Acked-by: Josh Triplett <josh@joshtriplett.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: avi@redhat.com
Cc: mtosatti@redhat.com
LKML-Reference: <12565226351355-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/hardirq.h  |  24 ++++
 include/linux/rcupdate.h |   6 +
 include/linux/rcutiny.h  |  97 ++++++++++++++++
 init/Kconfig             |   9 ++
 kernel/Makefile          |   1 +
 kernel/rcupdate.c        |   4 +
 kernel/rcutiny.c         | 294 +++++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 435 insertions(+)
 create mode 100644 include/linux/rcutiny.h
 create mode 100644 kernel/rcutiny.c

(limited to 'include')

diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 6d527ee82b2b..d5b387669dab 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -139,10 +139,34 @@ static inline void account_system_vtime(struct task_struct *tsk)
 #endif
 
 #if defined(CONFIG_NO_HZ)
+#if defined(CONFIG_TINY_RCU)
+extern void rcu_enter_nohz(void);
+extern void rcu_exit_nohz(void);
+
+static inline void rcu_irq_enter(void)
+{
+	rcu_exit_nohz();
+}
+
+static inline void rcu_irq_exit(void)
+{
+	rcu_enter_nohz();
+}
+
+static inline void rcu_nmi_enter(void)
+{
+}
+
+static inline void rcu_nmi_exit(void)
+{
+}
+
+#else
 extern void rcu_irq_enter(void);
 extern void rcu_irq_exit(void);
 extern void rcu_nmi_enter(void);
 extern void rcu_nmi_exit(void);
+#endif
 #else
 # define rcu_irq_enter() do { } while (0)
 # define rcu_irq_exit() do { } while (0)
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 3ebd0b7bcb08..6dd71fa48429 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -68,11 +68,17 @@ extern int sched_expedited_torture_stats(char *page);
 /* Internal to kernel */
 extern void rcu_init(void);
 extern void rcu_scheduler_starting(void);
+#ifndef CONFIG_TINY_RCU
 extern int rcu_needs_cpu(int cpu);
+#else
+static inline int rcu_needs_cpu(int cpu) { return 0; }
+#endif
 extern int rcu_scheduler_active;
 
 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
 #include <linux/rcutree.h>
+#elif CONFIG_TINY_RCU
+#include <linux/rcutiny.h>
 #else
 #error "Unknown RCU implementation specified to kernel configuration"
 #endif
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
new file mode 100644
index 000000000000..891073c264dc
--- /dev/null
+++ b/include/linux/rcutiny.h
@@ -0,0 +1,97 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2008
+ *
+ * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ * 		Documentation/RCU
+ */
+
+#ifndef __LINUX_TINY_H
+#define __LINUX_TINY_H
+
+#include <linux/cache.h>
+
+void rcu_sched_qs(int cpu);
+void rcu_bh_qs(int cpu);
+
+#define __rcu_read_lock()	preempt_disable()
+#define __rcu_read_unlock()	preempt_enable()
+#define __rcu_read_lock_bh()	local_bh_disable()
+#define __rcu_read_unlock_bh()	local_bh_enable()
+#define call_rcu_sched		call_rcu
+
+#define rcu_init_sched()	do { } while (0)
+extern void rcu_check_callbacks(int cpu, int user);
+extern void __rcu_init(void);
+
+/*
+ * Return the number of grace periods.
+ */
+static inline long rcu_batches_completed(void)
+{
+	return 0;
+}
+
+/*
+ * Return the number of bottom-half grace periods.
+ */
+static inline long rcu_batches_completed_bh(void)
+{
+	return 0;
+}
+
+extern int rcu_expedited_torture_stats(char *page);
+
+static inline void synchronize_rcu_expedited(void)
+{
+	synchronize_sched();
+}
+
+static inline void synchronize_rcu_bh_expedited(void)
+{
+	synchronize_sched();
+}
+
+struct notifier_block;
+extern int rcu_cpu_notify(struct notifier_block *self,
+			  unsigned long action, void *hcpu);
+
+#ifdef CONFIG_NO_HZ
+
+extern void rcu_enter_nohz(void);
+extern void rcu_exit_nohz(void);
+
+#else /* #ifdef CONFIG_NO_HZ */
+
+static inline void rcu_enter_nohz(void)
+{
+}
+
+static inline void rcu_exit_nohz(void)
+{
+}
+
+#endif /* #else #ifdef CONFIG_NO_HZ */
+
+static inline void exit_rcu(void)
+{
+}
+
+#endif /* __LINUX_RCUTINY_H */
diff --git a/init/Kconfig b/init/Kconfig
index 09c5c6431f42..d367bf6bcc34 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -334,6 +334,15 @@ config TREE_PREEMPT_RCU
 	  is also required.  It also scales down nicely to
 	  smaller systems.
 
+config TINY_RCU
+	bool "UP-only small-memory-footprint RCU"
+	depends on !SMP
+	help
+	  This option selects the RCU implementation that is
+	  designed for UP systems from which real-time response
+	  is not required.  This option greatly reduces the
+	  memory footprint of RCU.
+
 endchoice
 
 config RCU_TRACE
diff --git a/kernel/Makefile b/kernel/Makefile
index b8d4cd8ac0b9..e9a0e6ed25cc 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -82,6 +82,7 @@ obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
 obj-$(CONFIG_TREE_RCU) += rcutree.o
 obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o
 obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o
+obj-$(CONFIG_TINY_RCU) += rcutiny.o
 obj-$(CONFIG_RELAY) += relay.o
 obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
 obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 400183346ad2..7625f207f65e 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -67,6 +67,8 @@ void wakeme_after_rcu(struct rcu_head  *head)
 	complete(&rcu->completion);
 }
 
+#ifndef CONFIG_TINY_RCU
+
 #ifdef CONFIG_TREE_PREEMPT_RCU
 
 /**
@@ -157,6 +159,8 @@ void synchronize_rcu_bh(void)
 }
 EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
 
+#endif /* #ifndef CONFIG_TINY_RCU */
+
 static int __cpuinit rcu_barrier_cpu_hotplug(struct notifier_block *self,
 		unsigned long action, void *hcpu)
 {
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
new file mode 100644
index 000000000000..0b54efde66ac
--- /dev/null
+++ b/kernel/rcutiny.c
@@ -0,0 +1,294 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2008
+ *
+ * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ * 		Documentation/RCU
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/rcupdate.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/completion.h>
+#include <linux/moduleparam.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/mutex.h>
+#include <linux/time.h>
+
+/* Global control variables for rcupdate callback mechanism. */
+struct rcu_ctrlblk {
+	struct rcu_head *rcucblist;	/* List of pending callbacks (CBs). */
+	struct rcu_head **donetail;	/* ->next pointer of last "done" CB. */
+	struct rcu_head **curtail;	/* ->next pointer of last CB. */
+};
+
+/* Definition for rcupdate control block. */
+static struct rcu_ctrlblk rcu_ctrlblk = {
+	.rcucblist = NULL,
+	.donetail = &rcu_ctrlblk.rcucblist,
+	.curtail = &rcu_ctrlblk.rcucblist,
+};
+static struct rcu_ctrlblk rcu_bh_ctrlblk = {
+	.rcucblist = NULL,
+	.donetail = &rcu_bh_ctrlblk.rcucblist,
+	.curtail = &rcu_bh_ctrlblk.rcucblist,
+};
+
+#ifdef CONFIG_NO_HZ
+
+static long rcu_dynticks_nesting = 1;
+
+/*
+ * Enter dynticks-idle mode, which is an extended quiescent state
+ * if we have fully entered that mode (i.e., if the new value of
+ * dynticks_nesting is zero).
+ */
+void rcu_enter_nohz(void)
+{
+	if (--rcu_dynticks_nesting == 0)
+		rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */
+}
+
+/*
+ * Exit dynticks-idle mode, so that we are no longer in an extended
+ * quiescent state.
+ */
+void rcu_exit_nohz(void)
+{
+	rcu_dynticks_nesting++;
+}
+
+#endif /* #ifdef CONFIG_NO_HZ */
+
+/*
+ * Helper function for rcu_qsctr_inc() and rcu_bh_qsctr_inc().
+ * Also disable irqs to avoid confusion due to interrupt handlers invoking
+ * call_rcu().
+ */
+static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	if (rcp->rcucblist != NULL &&
+	    rcp->donetail != rcp->curtail) {
+		rcp->donetail = rcp->curtail;
+		local_irq_restore(flags);
+		return 1;
+	}
+	local_irq_restore(flags);
+	return 0;
+}
+
+/*
+ * Record an rcu quiescent state.  And an rcu_bh quiescent state while we
+ * are at it, given that any rcu quiescent state is also an rcu_bh
+ * quiescent state.  Use "+" instead of "||" to defeat short circuiting.
+ */
+void rcu_sched_qs(int cpu)
+{
+	if (rcu_qsctr_help(&rcu_ctrlblk) + rcu_qsctr_help(&rcu_bh_ctrlblk))
+		raise_softirq(RCU_SOFTIRQ);
+}
+
+/*
+ * Record an rcu_bh quiescent state.
+ */
+void rcu_bh_qs(int cpu)
+{
+	if (rcu_qsctr_help(&rcu_bh_ctrlblk))
+		raise_softirq(RCU_SOFTIRQ);
+}
+
+/*
+ * Check to see if the scheduling-clock interrupt came from an extended
+ * quiescent state, and, if so, tell RCU about it.
+ */
+void rcu_check_callbacks(int cpu, int user)
+{
+	if (user ||
+	    (idle_cpu(cpu) &&
+	     !in_softirq() &&
+	     hardirq_count() <= (1 << HARDIRQ_SHIFT)))
+		rcu_sched_qs(cpu);
+	else if (!in_softirq())
+		rcu_bh_qs(cpu);
+}
+
+/*
+ * Helper function for rcu_process_callbacks() that operates on the
+ * specified rcu_ctrlkblk structure.
+ */
+static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
+{
+	unsigned long flags;
+	struct rcu_head *next, *list;
+
+	/* If no RCU callbacks ready to invoke, just return. */
+	if (&rcp->rcucblist == rcp->donetail)
+		return;
+
+	/* Move the ready-to-invoke callbacks to a local list. */
+	local_irq_save(flags);
+	list = rcp->rcucblist;
+	rcp->rcucblist = *rcp->donetail;
+	*rcp->donetail = NULL;
+	if (rcp->curtail == rcp->donetail)
+		rcp->curtail = &rcp->rcucblist;
+	rcp->donetail = &rcp->rcucblist;
+	local_irq_restore(flags);
+
+	/* Invoke the callbacks on the local list. */
+	while (list) {
+		next = list->next;
+		prefetch(next);
+		list->func(list);
+		list = next;
+	}
+}
+
+/*
+ * Invoke any callbacks whose grace period has completed.
+ */
+static void rcu_process_callbacks(struct softirq_action *unused)
+{
+	__rcu_process_callbacks(&rcu_ctrlblk);
+	__rcu_process_callbacks(&rcu_bh_ctrlblk);
+}
+
+/*
+ * Null function to handle CPU being onlined.  Longer term, we want to
+ * make TINY_RCU avoid using rcupdate.c, but later...
+ */
+int rcu_cpu_notify(struct notifier_block *self,
+		   unsigned long action, void *hcpu)
+{
+	return NOTIFY_OK;
+}
+
+/*
+ * Wait for a grace period to elapse.  But it is illegal to invoke
+ * synchronize_sched() from within an RCU read-side critical section.
+ * Therefore, any legal call to synchronize_sched() is a quiescent
+ * state, and so on a UP system, synchronize_sched() need do nothing.
+ * Ditto for synchronize_rcu_bh().  (But Lai Jiangshan points out the
+ * benefits of doing might_sleep() to reduce latency.)
+ *
+ * Cool, huh?  (Due to Josh Triplett.)
+ *
+ * But we want to make this a static inline later.
+ */
+void synchronize_sched(void)
+{
+	cond_resched();
+}
+EXPORT_SYMBOL_GPL(synchronize_sched);
+
+void synchronize_rcu_bh(void)
+{
+	synchronize_sched();
+}
+EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
+
+/*
+ * Helper function for call_rcu() and call_rcu_bh().
+ */
+static void __call_rcu(struct rcu_head *head,
+		       void (*func)(struct rcu_head *rcu),
+		       struct rcu_ctrlblk *rcp)
+{
+	unsigned long flags;
+
+	head->func = func;
+	head->next = NULL;
+	local_irq_save(flags);
+	*rcp->curtail = head;
+	rcp->curtail = &head->next;
+	local_irq_restore(flags);
+}
+
+/*
+ * Post an RCU callback to be invoked after the end of an RCU grace
+ * period.  But since we have but one CPU, that would be after any
+ * quiescent state.
+ */
+void call_rcu(struct rcu_head *head,
+	      void (*func)(struct rcu_head *rcu))
+{
+	__call_rcu(head, func, &rcu_ctrlblk);
+}
+EXPORT_SYMBOL_GPL(call_rcu);
+
+/*
+ * Post an RCU bottom-half callback to be invoked after any subsequent
+ * quiescent state.
+ */
+void call_rcu_bh(struct rcu_head *head,
+		 void (*func)(struct rcu_head *rcu))
+{
+	__call_rcu(head, func, &rcu_bh_ctrlblk);
+}
+EXPORT_SYMBOL_GPL(call_rcu_bh);
+
+void rcu_barrier(void)
+{
+	struct rcu_synchronize rcu;
+
+	init_completion(&rcu.completion);
+	/* Will wake me after RCU finished. */
+	call_rcu(&rcu.head, wakeme_after_rcu);
+	/* Wait for it. */
+	wait_for_completion(&rcu.completion);
+}
+EXPORT_SYMBOL_GPL(rcu_barrier);
+
+void rcu_barrier_bh(void)
+{
+	struct rcu_synchronize rcu;
+
+	init_completion(&rcu.completion);
+	/* Will wake me after RCU finished. */
+	call_rcu_bh(&rcu.head, wakeme_after_rcu);
+	/* Wait for it. */
+	wait_for_completion(&rcu.completion);
+}
+EXPORT_SYMBOL_GPL(rcu_barrier_bh);
+
+void rcu_barrier_sched(void)
+{
+	struct rcu_synchronize rcu;
+
+	init_completion(&rcu.completion);
+	/* Will wake me after RCU finished. */
+	call_rcu_sched(&rcu.head, wakeme_after_rcu);
+	/* Wait for it. */
+	wait_for_completion(&rcu.completion);
+}
+EXPORT_SYMBOL_GPL(rcu_barrier_sched);
+
+void __rcu_init(void)
+{
+	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
+}
-- 
cgit v1.2.3-70-g09d2


From 0cd397d33608ae6c97d2ee6c8c43462b419b7e26 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sun, 25 Oct 2009 19:03:51 -0700
Subject: rcu: Add synchronize_srcu_expedited()

This patch creates a synchronize_srcu_expedited() that uses
synchronize_sched_expedited() where synchronize_srcu()
uses synchronize_sched().  The synchronize_srcu() and
synchronize_srcu_expedited() functions become one-liners that
pass synchronize_sched() or synchronize_sched_expedited(),
repectively, to a new __synchronize_srcu() function.

While in the file, move the EXPORT_SYMBOL_GPL()s to immediately
follow the corresponding functions.

Requested-by: Avi Kivity <avi@redhat.com>
Tested-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Josh Triplett <josh@joshtriplett.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
Cc: avi@redhat.com
LKML-Reference: <12565226354038-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/srcu.h |  1 +
 kernel/srcu.c        | 74 ++++++++++++++++++++++++++++++++++++----------------
 2 files changed, 52 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index aca0eee53930..4765d97dcafb 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -48,6 +48,7 @@ void cleanup_srcu_struct(struct srcu_struct *sp);
 int srcu_read_lock(struct srcu_struct *sp) __acquires(sp);
 void srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp);
 void synchronize_srcu(struct srcu_struct *sp);
+void synchronize_srcu_expedited(struct srcu_struct *sp);
 long srcu_batches_completed(struct srcu_struct *sp);
 
 #endif
diff --git a/kernel/srcu.c b/kernel/srcu.c
index b0aeeaf22ce4..818d7d9aa03c 100644
--- a/kernel/srcu.c
+++ b/kernel/srcu.c
@@ -49,6 +49,7 @@ int init_srcu_struct(struct srcu_struct *sp)
 	sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array);
 	return (sp->per_cpu_ref ? 0 : -ENOMEM);
 }
+EXPORT_SYMBOL_GPL(init_srcu_struct);
 
 /*
  * srcu_readers_active_idx -- returns approximate number of readers
@@ -97,6 +98,7 @@ void cleanup_srcu_struct(struct srcu_struct *sp)
 	free_percpu(sp->per_cpu_ref);
 	sp->per_cpu_ref = NULL;
 }
+EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
 
 /**
  * srcu_read_lock - register a new reader for an SRCU-protected structure.
@@ -118,6 +120,7 @@ int srcu_read_lock(struct srcu_struct *sp)
 	preempt_enable();
 	return idx;
 }
+EXPORT_SYMBOL_GPL(srcu_read_lock);
 
 /**
  * srcu_read_unlock - unregister a old reader from an SRCU-protected structure.
@@ -136,22 +139,12 @@ void srcu_read_unlock(struct srcu_struct *sp, int idx)
 	per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]--;
 	preempt_enable();
 }
+EXPORT_SYMBOL_GPL(srcu_read_unlock);
 
-/**
- * synchronize_srcu - wait for prior SRCU read-side critical-section completion
- * @sp: srcu_struct with which to synchronize.
- *
- * Flip the completed counter, and wait for the old count to drain to zero.
- * As with classic RCU, the updater must use some separate means of
- * synchronizing concurrent updates.  Can block; must be called from
- * process context.
- *
- * Note that it is illegal to call synchornize_srcu() from the corresponding
- * SRCU read-side critical section; doing so will result in deadlock.
- * However, it is perfectly legal to call synchronize_srcu() on one
- * srcu_struct from some other srcu_struct's read-side critical section.
+/*
+ * Helper function for synchronize_srcu() and synchronize_srcu_expedited().
  */
-void synchronize_srcu(struct srcu_struct *sp)
+void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void))
 {
 	int idx;
 
@@ -173,7 +166,7 @@ void synchronize_srcu(struct srcu_struct *sp)
 		return;
 	}
 
-	synchronize_sched();  /* Force memory barrier on all CPUs. */
+	sync_func();  /* Force memory barrier on all CPUs. */
 
 	/*
 	 * The preceding synchronize_sched() ensures that any CPU that
@@ -190,7 +183,7 @@ void synchronize_srcu(struct srcu_struct *sp)
 	idx = sp->completed & 0x1;
 	sp->completed++;
 
-	synchronize_sched();  /* Force memory barrier on all CPUs. */
+	sync_func();  /* Force memory barrier on all CPUs. */
 
 	/*
 	 * At this point, because of the preceding synchronize_sched(),
@@ -203,7 +196,7 @@ void synchronize_srcu(struct srcu_struct *sp)
 	while (srcu_readers_active_idx(sp, idx))
 		schedule_timeout_interruptible(1);
 
-	synchronize_sched();  /* Force memory barrier on all CPUs. */
+	sync_func();  /* Force memory barrier on all CPUs. */
 
 	/*
 	 * The preceding synchronize_sched() forces all srcu_read_unlock()
@@ -236,6 +229,47 @@ void synchronize_srcu(struct srcu_struct *sp)
 	mutex_unlock(&sp->mutex);
 }
 
+/**
+ * synchronize_srcu - wait for prior SRCU read-side critical-section completion
+ * @sp: srcu_struct with which to synchronize.
+ *
+ * Flip the completed counter, and wait for the old count to drain to zero.
+ * As with classic RCU, the updater must use some separate means of
+ * synchronizing concurrent updates.  Can block; must be called from
+ * process context.
+ *
+ * Note that it is illegal to call synchronize_srcu() from the corresponding
+ * SRCU read-side critical section; doing so will result in deadlock.
+ * However, it is perfectly legal to call synchronize_srcu() on one
+ * srcu_struct from some other srcu_struct's read-side critical section.
+ */
+void synchronize_srcu(struct srcu_struct *sp)
+{
+	__synchronize_srcu(sp, synchronize_sched);
+}
+EXPORT_SYMBOL_GPL(synchronize_srcu);
+
+/**
+ * synchronize_srcu_expedited - like synchronize_srcu, but less patient
+ * @sp: srcu_struct with which to synchronize.
+ *
+ * Flip the completed counter, and wait for the old count to drain to zero.
+ * As with classic RCU, the updater must use some separate means of
+ * synchronizing concurrent updates.  Can block; must be called from
+ * process context.
+ *
+ * Note that it is illegal to call synchronize_srcu_expedited()
+ * from the corresponding SRCU read-side critical section; doing so
+ * will result in deadlock.  However, it is perfectly legal to call
+ * synchronize_srcu_expedited() on one srcu_struct from some other
+ * srcu_struct's read-side critical section.
+ */
+void synchronize_srcu_expedited(struct srcu_struct *sp)
+{
+	__synchronize_srcu(sp, synchronize_sched_expedited);
+}
+EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
+
 /**
  * srcu_batches_completed - return batches completed.
  * @sp: srcu_struct on which to report batch completion.
@@ -248,10 +282,4 @@ long srcu_batches_completed(struct srcu_struct *sp)
 {
 	return sp->completed;
 }
-
-EXPORT_SYMBOL_GPL(init_srcu_struct);
-EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
-EXPORT_SYMBOL_GPL(srcu_read_lock);
-EXPORT_SYMBOL_GPL(srcu_read_unlock);
-EXPORT_SYMBOL_GPL(synchronize_srcu);
 EXPORT_SYMBOL_GPL(srcu_batches_completed);
-- 
cgit v1.2.3-70-g09d2


From 4ce5b90340879ce93d169b7b523c2cbbe7c45843 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 26 Oct 2009 07:55:55 +0100
Subject: rcu: Do tiny cleanups in rcutiny

No change in functionality - just straighten out a few small
stylistic details.

Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: avi@redhat.com
Cc: mtosatti@redhat.com
LKML-Reference: <12565226351355-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcutiny.h |  6 ++----
 kernel/rcutiny.c        | 49 +++++++++++++++++++++++--------------------------
 2 files changed, 25 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 891073c264dc..2c1fe8373e71 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -20,9 +20,8 @@
  * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  *
  * For detailed explanation of Read-Copy Update mechanism see -
- * 		Documentation/RCU
+ *		Documentation/RCU
  */
-
 #ifndef __LINUX_TINY_H
 #define __LINUX_TINY_H
 
@@ -70,8 +69,7 @@ static inline void synchronize_rcu_bh_expedited(void)
 }
 
 struct notifier_block;
-extern int rcu_cpu_notify(struct notifier_block *self,
-			  unsigned long action, void *hcpu);
+extern int rcu_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu);
 
 #ifdef CONFIG_NO_HZ
 
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 0b54efde66ac..b33ec3aa377a 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -20,22 +20,21 @@
  * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  *
  * For detailed explanation of Read-Copy Update mechanism see -
- * 		Documentation/RCU
+ *		Documentation/RCU
  */
-
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/rcupdate.h>
-#include <linux/interrupt.h>
-#include <linux/sched.h>
-#include <linux/module.h>
-#include <linux/completion.h>
 #include <linux/moduleparam.h>
+#include <linux/completion.h>
+#include <linux/interrupt.h>
 #include <linux/notifier.h>
-#include <linux/cpu.h>
+#include <linux/rcupdate.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/init.h>
 #include <linux/time.h>
+#include <linux/cpu.h>
 
 /* Global control variables for rcupdate callback mechanism. */
 struct rcu_ctrlblk {
@@ -46,14 +45,13 @@ struct rcu_ctrlblk {
 
 /* Definition for rcupdate control block. */
 static struct rcu_ctrlblk rcu_ctrlblk = {
-	.rcucblist = NULL,
-	.donetail = &rcu_ctrlblk.rcucblist,
-	.curtail = &rcu_ctrlblk.rcucblist,
+	.donetail	= &rcu_ctrlblk.rcucblist,
+	.curtail	= &rcu_ctrlblk.rcucblist,
 };
+
 static struct rcu_ctrlblk rcu_bh_ctrlblk = {
-	.rcucblist = NULL,
-	.donetail = &rcu_bh_ctrlblk.rcucblist,
-	.curtail = &rcu_bh_ctrlblk.rcucblist,
+	.donetail	= &rcu_bh_ctrlblk.rcucblist,
+	.curtail	= &rcu_bh_ctrlblk.rcucblist,
 };
 
 #ifdef CONFIG_NO_HZ
@@ -84,8 +82,8 @@ void rcu_exit_nohz(void)
 
 /*
  * Helper function for rcu_qsctr_inc() and rcu_bh_qsctr_inc().
- * Also disable irqs to avoid confusion due to interrupt handlers invoking
- * call_rcu().
+ * Also disable irqs to avoid confusion due to interrupt handlers
+ * invoking call_rcu().
  */
 static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
 {
@@ -99,6 +97,7 @@ static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
 		return 1;
 	}
 	local_irq_restore(flags);
+
 	return 0;
 }
 
@@ -143,8 +142,8 @@ void rcu_check_callbacks(int cpu, int user)
  */
 static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
 {
-	unsigned long flags;
 	struct rcu_head *next, *list;
+	unsigned long flags;
 
 	/* If no RCU callbacks ready to invoke, just return. */
 	if (&rcp->rcucblist == rcp->donetail)
@@ -182,8 +181,7 @@ static void rcu_process_callbacks(struct softirq_action *unused)
  * Null function to handle CPU being onlined.  Longer term, we want to
  * make TINY_RCU avoid using rcupdate.c, but later...
  */
-int rcu_cpu_notify(struct notifier_block *self,
-		   unsigned long action, void *hcpu)
+int rcu_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
 {
 	return NOTIFY_OK;
 }
@@ -223,6 +221,7 @@ static void __call_rcu(struct rcu_head *head,
 
 	head->func = func;
 	head->next = NULL;
+
 	local_irq_save(flags);
 	*rcp->curtail = head;
 	rcp->curtail = &head->next;
@@ -234,8 +233,7 @@ static void __call_rcu(struct rcu_head *head,
  * period.  But since we have but one CPU, that would be after any
  * quiescent state.
  */
-void call_rcu(struct rcu_head *head,
-	      void (*func)(struct rcu_head *rcu))
+void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 {
 	__call_rcu(head, func, &rcu_ctrlblk);
 }
@@ -245,8 +243,7 @@ EXPORT_SYMBOL_GPL(call_rcu);
  * Post an RCU bottom-half callback to be invoked after any subsequent
  * quiescent state.
  */
-void call_rcu_bh(struct rcu_head *head,
-		 void (*func)(struct rcu_head *rcu))
+void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 {
 	__call_rcu(head, func, &rcu_bh_ctrlblk);
 }
-- 
cgit v1.2.3-70-g09d2


From 2c28e2451dba2260e9f88811b29a7787db7e7616 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 26 Oct 2009 13:57:44 -0700
Subject: rcu: Fix TINY_RCU #elif condition

Some compilers are happy with "#elif CONFIG_RCU_TINY", while
others strongly prefer "#elif defined(CONFIG_RCU_TINY)".  Change
to the latter to make more compilers happy.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <12565906642768-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcupdate.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 6dd71fa48429..2f1bc42a3b82 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -77,7 +77,7 @@ extern int rcu_scheduler_active;
 
 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
 #include <linux/rcutree.h>
-#elif CONFIG_TINY_RCU
+#elif defined(CONFIG_TINY_RCU)
 #include <linux/rcutiny.h>
 #else
 #error "Unknown RCU implementation specified to kernel configuration"
-- 
cgit v1.2.3-70-g09d2


From d6ba452128178091dab7a04d54f7e66fdc32fb39 Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Mon, 26 Oct 2009 09:26:18 -0400
Subject: tpm add default function definitions

Add default tpm_pcr_read/extend function definitions required
by IMA/Kconfig changes.

Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
Reviewed-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/tpm.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index 3338b3f5c21a..8eaa8f83effb 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -31,5 +31,12 @@
 
 extern int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf);
 extern int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash);
+#else
+static inline int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf) {
+	return -ENODEV;
+}
+static inline int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash) {
+	return -ENODEV;
+}
 #endif
 #endif
-- 
cgit v1.2.3-70-g09d2


From f7d7986060b2890fc26db6ab5203efbd33aa2497 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Sun, 18 Oct 2009 01:09:29 +0000
Subject: perf_event: Add alignment-faults and emulation-faults software events

Add two more software events that are common to many cpus.

Alignment faults: When a load or store is not aligned properly.

Emulation faults: When an instruction is emulated in software.

Both cause a very significant slowdown (100x or worse), so identifying and
fixing them is very important.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 include/linux/perf_counter.h   | 2 ++
 include/linux/perf_event.h     | 2 ++
 kernel/perf_event.c            | 2 ++
 tools/perf/design.txt          | 2 ++
 tools/perf/util/parse-events.c | 4 ++++
 5 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 7b7fbf433cff..d6b95d1e79f0 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -106,6 +106,8 @@ enum perf_sw_ids {
 	PERF_COUNT_SW_CPU_MIGRATIONS		= 4,
 	PERF_COUNT_SW_PAGE_FAULTS_MIN		= 5,
 	PERF_COUNT_SW_PAGE_FAULTS_MAJ		= 6,
+	PERF_COUNT_SW_ALIGNMENT_FAULTS		= 7,
+	PERF_COUNT_SW_EMULATION_FAULTS		= 8,
 
 	PERF_COUNT_SW_MAX,			/* non-ABI */
 };
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2e6d95f97419..a33707a3a788 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -102,6 +102,8 @@ enum perf_sw_ids {
 	PERF_COUNT_SW_CPU_MIGRATIONS		= 4,
 	PERF_COUNT_SW_PAGE_FAULTS_MIN		= 5,
 	PERF_COUNT_SW_PAGE_FAULTS_MAJ		= 6,
+	PERF_COUNT_SW_ALIGNMENT_FAULTS		= 7,
+	PERF_COUNT_SW_EMULATION_FAULTS		= 8,
 
 	PERF_COUNT_SW_MAX,			/* non-ABI */
 };
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 9d0b5c665883..0683b33cbb28 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -4186,6 +4186,8 @@ static const struct pmu *sw_perf_event_init(struct perf_event *event)
 	case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
 	case PERF_COUNT_SW_CONTEXT_SWITCHES:
 	case PERF_COUNT_SW_CPU_MIGRATIONS:
+	case PERF_COUNT_SW_ALIGNMENT_FAULTS:
+	case PERF_COUNT_SW_EMULATION_FAULTS:
 		if (!event->parent) {
 			atomic_inc(&perf_swevent_enabled[event_id]);
 			event->destroy = sw_perf_event_destroy;
diff --git a/tools/perf/design.txt b/tools/perf/design.txt
index fdd42a824c98..f000c30877ac 100644
--- a/tools/perf/design.txt
+++ b/tools/perf/design.txt
@@ -137,6 +137,8 @@ enum sw_event_ids {
 	PERF_COUNT_SW_CPU_MIGRATIONS	= 4,
 	PERF_COUNT_SW_PAGE_FAULTS_MIN	= 5,
 	PERF_COUNT_SW_PAGE_FAULTS_MAJ	= 6,
+	PERF_COUNT_SW_ALIGNMENT_FAULTS	= 7,
+	PERF_COUNT_SW_EMULATION_FAULTS	= 8,
 };
 
 Counters of the type PERF_TYPE_TRACEPOINT are available when the ftrace event
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 8cfb48cbbea0..34bd84423933 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -46,6 +46,8 @@ static struct event_symbol event_symbols[] = {
   { CSW(PAGE_FAULTS_MAJ),	"major-faults",		""		},
   { CSW(CONTEXT_SWITCHES),	"context-switches",	"cs"		},
   { CSW(CPU_MIGRATIONS),	"cpu-migrations",	"migrations"	},
+  { CSW(ALIGNMENT_FAULTS),	"alignment-faults",	""		},
+  { CSW(EMULATION_FAULTS),	"emulation-faults",	""		},
 };
 
 #define __PERF_EVENT_FIELD(config, name) \
@@ -74,6 +76,8 @@ static const char *sw_event_names[] = {
 	"CPU-migrations",
 	"minor-faults",
 	"major-faults",
+	"alignment-faults",
+	"emulation-faults",
 };
 
 #define MAX_ALIASES 8
-- 
cgit v1.2.3-70-g09d2


From ff76ec18cabb12a6c8f3c65bd1d23f1a770fe908 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed, 28 Oct 2009 12:26:39 -0700
Subject: tpm: fix header for modular build

Fix build for TCG_TPM=m.  Header file doesn't handle this
and incorrectly builds stubs.

drivers/char/tpm/tpm.c:720: error: redefinition of 'tpm_pcr_read'
include/linux/tpm.h:35: error:previous definition of 'tpm_pcr_read' was here
drivers/char/tpm/tpm.c:752: error: redefinition of 'tpm_pcr_extend'
include/linux/tpm.h:38: error:previous definition of 'tpm_pcr_extend' was here

Repairs linux-next's

commit d6ba452128178091dab7a04d54f7e66fdc32fb39
Author: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date:   Mon Oct 26 09:26:18 2009 -0400

    tpm add default function definitions

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Rajiv Andrade <srajiv@linux.vnet.ibm.com>
Cc: Mimi Zohar <zohar@us.ibm.com>
Cc: James Morris <jmorris@namei.org>
Cc: Eric Paris <eparis@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/tpm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index 8eaa8f83effb..ac5d1c1285d9 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -27,7 +27,7 @@
  */
 #define	TPM_ANY_NUM 0xFFFF
 
-#if defined(CONFIG_TCG_TPM)
+#if defined(CONFIG_TCG_TPM) || defined(CONFIG_TCG_TPM_MODULE)
 
 extern int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf);
 extern int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash);
-- 
cgit v1.2.3-70-g09d2


From 5975c725dfd6f7d36f493ab1453fbdbd35c1f0e3 Mon Sep 17 00:00:00 2001
From: "Serge E. Hallyn" <serue@us.ibm.com>
Date: Thu, 29 Oct 2009 11:40:17 -0500
Subject: define convenient securebits masks for prctl users (v2)

Hi James, would you mind taking the following into
security-testing?

The securebits are used by passing them to prctl with the
PR_{S,G}ET_SECUREBITS commands.  But the defines must be
shifted to be used in prctl, which begs to be confused and
misused by userspace.  So define some more convenient
values for userspace to specify.  This way userspace does

	prctl(PR_SET_SECUREBITS, SECBIT_NOROOT);

instead of

	prctl(PR_SET_SECUREBITS, 1 << SECURE_NOROOT);

(Thanks to Michael for the idea)

This patch also adds include/linux/securebits to the installed headers.
Then perhaps it can be included by glibc's sys/prctl.h.

Changelog:
	Oct 29: Stephen Rothwell points out that issecure can
		be under __KERNEL__.
	Oct 14: (Suggestions by Michael Kerrisk):
		1. spell out SETUID in SECBIT_NO_SETUID*
		2. SECBIT_X_LOCKED does not imply SECBIT_X
		3. add definitions for keepcaps
        Oct 14: As suggested by Michael Kerrisk, don't
		use SB_* as that convention is already in
		use.  Use SECBIT_ prefix instead.

Signed-off-by: Serge E. Hallyn <serue@us.ibm.com>
Acked-by: Andrew G. Morgan <morgan@kernel.org>
Acked-by: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Ulrich Drepper <drepper@redhat.com>
Cc: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/Kbuild       |  1 +
 include/linux/securebits.h | 24 ++++++++++++++++++------
 2 files changed, 19 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index cff4a101f266..ffcdb9b509db 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -329,6 +329,7 @@ unifdef-y += scc.h
 unifdef-y += sched.h
 unifdef-y += screen_info.h
 unifdef-y += sdla.h
+unifdef-y += securebits.h
 unifdef-y += selinux_netlink.h
 unifdef-y += sem.h
 unifdef-y += serial_core.h
diff --git a/include/linux/securebits.h b/include/linux/securebits.h
index d2c5ed845bcc..33406174cbe8 100644
--- a/include/linux/securebits.h
+++ b/include/linux/securebits.h
@@ -1,6 +1,15 @@
 #ifndef _LINUX_SECUREBITS_H
 #define _LINUX_SECUREBITS_H 1
 
+/* Each securesetting is implemented using two bits. One bit specifies
+   whether the setting is on or off. The other bit specify whether the
+   setting is locked or not. A setting which is locked cannot be
+   changed from user-level. */
+#define issecure_mask(X)	(1 << (X))
+#ifdef __KERNEL__
+#define issecure(X)		(issecure_mask(X) & current_cred_xxx(securebits))
+#endif
+
 #define SECUREBITS_DEFAULT 0x00000000
 
 /* When set UID 0 has no special privileges. When unset, we support
@@ -12,6 +21,9 @@
 #define SECURE_NOROOT			0
 #define SECURE_NOROOT_LOCKED		1  /* make bit-0 immutable */
 
+#define SECBIT_NOROOT		(issecure_mask(SECURE_NOROOT))
+#define SECBIT_NOROOT_LOCKED	(issecure_mask(SECURE_NOROOT_LOCKED))
+
 /* When set, setuid to/from uid 0 does not trigger capability-"fixup".
    When unset, to provide compatiblility with old programs relying on
    set*uid to gain/lose privilege, transitions to/from uid 0 cause
@@ -19,6 +31,10 @@
 #define SECURE_NO_SETUID_FIXUP		2
 #define SECURE_NO_SETUID_FIXUP_LOCKED	3  /* make bit-2 immutable */
 
+#define SECBIT_NO_SETUID_FIXUP	(issecure_mask(SECURE_NO_SETUID_FIXUP))
+#define SECBIT_NO_SETUID_FIXUP_LOCKED \
+			(issecure_mask(SECURE_NO_SETUID_FIXUP_LOCKED))
+
 /* When set, a process can retain its capabilities even after
    transitioning to a non-root user (the set-uid fixup suppressed by
    bit 2). Bit-4 is cleared when a process calls exec(); setting both
@@ -27,12 +43,8 @@
 #define SECURE_KEEP_CAPS		4
 #define SECURE_KEEP_CAPS_LOCKED		5  /* make bit-4 immutable */
 
-/* Each securesetting is implemented using two bits. One bit specifies
-   whether the setting is on or off. The other bit specify whether the
-   setting is locked or not. A setting which is locked cannot be
-   changed from user-level. */
-#define issecure_mask(X)	(1 << (X))
-#define issecure(X)		(issecure_mask(X) & current_cred_xxx(securebits))
+#define SECBIT_KEEP_CAPS	(issecure_mask(SECURE_KEEP_CAPS))
+#define SECBIT_KEEP_CAPS_LOCKED (issecure_mask(SECURE_KEEP_CAPS_LOCKED))
 
 #define SECURE_ALL_BITS		(issecure_mask(SECURE_NOROOT) | \
 				 issecure_mask(SECURE_NO_SETUID_FIXUP) | \
-- 
cgit v1.2.3-70-g09d2


From 7b2a35132ad0a70902dcd2844c27ed64cda0ce9b Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 2 Nov 2009 08:50:52 +0800
Subject: compiler: Introduce __always_unused

I wrote some code which is used as compile-time checker, and the
code should be elided after compile.

So I need to annotate the code as "always unused", compared to
"maybe unused".

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <4AEE2CEC.8040206@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/compiler-gcc.h | 1 +
 include/linux/compiler.h     | 4 ++++
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index a3ed7cb8ca34..73dcf804bc94 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -79,6 +79,7 @@
 #define  noinline			__attribute__((noinline))
 #define __attribute_const__		__attribute__((__const__))
 #define __maybe_unused			__attribute__((unused))
+#define __always_unused			__attribute__((unused))
 
 #define __gcc_header(x) #x
 #define _gcc_header(x) __gcc_header(linux/compiler-gcc##x.h)
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 04fb5135b4e1..7947f4f6fa51 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -213,6 +213,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 # define __maybe_unused		/* unimplemented */
 #endif
 
+#ifndef __always_unused
+# define __always_unused	/* unimplemented */
+#endif
+
 #ifndef noinline
 #define noinline
 #endif
-- 
cgit v1.2.3-70-g09d2


From fb0459d75c1d0a4ba3cafdd2c754e7486968a676 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Fri, 25 Sep 2009 12:25:56 +0200
Subject: perf/core: Provide a kernel-internal interface to get to performance
 counters

There are reasons for kernel code to ask for, and use, performance
counters.
For example, in CPU freq governors this tends to be a good idea, but
there are other examples possible as well of course.

This patch adds the needed bits to do enable this functionality; they
have been tested in an experimental cpufreq driver that I'm working on,
and the changes are all that I needed to access counters properly.

[fweisbec@gmail.com: added pid to perf_event_create_kernel_counter so
that we can profile a particular task too

TODO: Have a better error reporting, don't just return NULL in fail
case.]

v2: Remove the wrong comment about the fact
    perf_event_create_kernel_counter must be called from a kernel
    thread.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: "K.Prasad" <prasad@linux.vnet.ibm.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jan Kiszka <jan.kiszka@siemens.com>
Cc: Jiri Slaby <jirislaby@gmail.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Jan Kiszka <jan.kiszka@web.de>
Cc: Avi Kivity <avi@redhat.com>
LKML-Reference: <20090925122556.2f8bd939@infradead.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/perf_event.h |  6 ++++
 kernel/perf_event.c        | 75 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 80 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index df9d964c15fc..fa151d49a2ee 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -744,6 +744,12 @@ extern int hw_perf_group_sched_in(struct perf_event *group_leader,
 	       struct perf_cpu_context *cpuctx,
 	       struct perf_event_context *ctx, int cpu);
 extern void perf_event_update_userpage(struct perf_event *event);
+extern int perf_event_release_kernel(struct perf_event *event);
+extern struct perf_event *
+perf_event_create_kernel_counter(struct perf_event_attr *attr,
+				int cpu,
+				pid_t pid);
+extern u64 perf_event_read_value(struct perf_event *event);
 
 struct perf_sample_data {
 	u64				type;
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 12b5ec39bf97..02d4ff041b01 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1725,6 +1725,26 @@ static int perf_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
+int perf_event_release_kernel(struct perf_event *event)
+{
+	struct perf_event_context *ctx = event->ctx;
+
+	WARN_ON_ONCE(ctx->parent_ctx);
+	mutex_lock(&ctx->mutex);
+	perf_event_remove_from_context(event);
+	mutex_unlock(&ctx->mutex);
+
+	mutex_lock(&event->owner->perf_event_mutex);
+	list_del_init(&event->owner_entry);
+	mutex_unlock(&event->owner->perf_event_mutex);
+	put_task_struct(event->owner);
+
+	free_event(event);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(perf_event_release_kernel);
+
 static int perf_event_read_size(struct perf_event *event)
 {
 	int entry = sizeof(u64); /* value */
@@ -1750,7 +1770,7 @@ static int perf_event_read_size(struct perf_event *event)
 	return size;
 }
 
-static u64 perf_event_read_value(struct perf_event *event)
+u64 perf_event_read_value(struct perf_event *event)
 {
 	struct perf_event *child;
 	u64 total = 0;
@@ -1761,6 +1781,7 @@ static u64 perf_event_read_value(struct perf_event *event)
 
 	return total;
 }
+EXPORT_SYMBOL_GPL(perf_event_read_value);
 
 static int perf_event_read_entry(struct perf_event *event,
 				   u64 read_format, char __user *buf)
@@ -4638,6 +4659,58 @@ err_put_context:
 	return err;
 }
 
+/**
+ * perf_event_create_kernel_counter
+ *
+ * @attr: attributes of the counter to create
+ * @cpu: cpu in which the counter is bound
+ * @pid: task to profile
+ */
+struct perf_event *
+perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
+				 pid_t pid)
+{
+	struct perf_event *event;
+	struct perf_event_context *ctx;
+	int err;
+
+	/*
+	 * Get the target context (task or percpu):
+	 */
+
+	ctx = find_get_context(pid, cpu);
+	if (IS_ERR(ctx))
+		return NULL ;
+
+	event = perf_event_alloc(attr, cpu, ctx, NULL,
+				     NULL, GFP_KERNEL);
+	err = PTR_ERR(event);
+	if (IS_ERR(event))
+		goto err_put_context;
+
+	event->filp = NULL;
+	WARN_ON_ONCE(ctx->parent_ctx);
+	mutex_lock(&ctx->mutex);
+	perf_install_in_context(ctx, event, cpu);
+	++ctx->generation;
+	mutex_unlock(&ctx->mutex);
+
+	event->owner = current;
+	get_task_struct(current);
+	mutex_lock(&current->perf_event_mutex);
+	list_add_tail(&event->owner_entry, &current->perf_event_list);
+	mutex_unlock(&current->perf_event_mutex);
+
+	return event;
+
+err_put_context:
+	if (err < 0)
+		put_ctx(ctx);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
+
 /*
  * inherit a event from parent task to child task:
  */
-- 
cgit v1.2.3-70-g09d2


From 97eaf5300b9d0cd99c310bf8c4a0f2f3296d88a3 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sun, 18 Oct 2009 15:33:50 +0200
Subject: perf/core: Add a callback to perf events

A simple callback in a perf event can be used for multiple purposes.
For example it is useful for triggered based events like hardware
breakpoints that need a callback to dispatch a triggered breakpoint
event.

v2: Simplify a bit the callback attribution as suggested by Paul
    Mackerras

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "K.Prasad" <prasad@linux.vnet.ibm.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mundt <lethal@linux-sh.org>
---
 include/linux/perf_event.h |  7 ++++++-
 kernel/perf_event.c        | 14 ++++++++++----
 2 files changed, 16 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index fa151d49a2ee..8d54e6d25eeb 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -544,6 +544,8 @@ struct perf_pending_entry {
 	void (*func)(struct perf_pending_entry *);
 };
 
+typedef void (*perf_callback_t)(struct perf_event *, void *);
+
 /**
  * struct perf_event - performance event kernel representation:
  */
@@ -639,6 +641,8 @@ struct perf_event {
 	struct event_filter		*filter;
 #endif
 
+	perf_callback_t			callback;
+
 #endif /* CONFIG_PERF_EVENTS */
 };
 
@@ -748,7 +752,8 @@ extern int perf_event_release_kernel(struct perf_event *event);
 extern struct perf_event *
 perf_event_create_kernel_counter(struct perf_event_attr *attr,
 				int cpu,
-				pid_t pid);
+				pid_t pid,
+				perf_callback_t callback);
 extern u64 perf_event_read_value(struct perf_event *event);
 
 struct perf_sample_data {
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 02d4ff041b01..5087125e2a00 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -4293,6 +4293,7 @@ perf_event_alloc(struct perf_event_attr *attr,
 		   struct perf_event_context *ctx,
 		   struct perf_event *group_leader,
 		   struct perf_event *parent_event,
+		   perf_callback_t callback,
 		   gfp_t gfpflags)
 {
 	const struct pmu *pmu;
@@ -4335,6 +4336,11 @@ perf_event_alloc(struct perf_event_attr *attr,
 
 	event->state		= PERF_EVENT_STATE_INACTIVE;
 
+	if (!callback && parent_event)
+		callback = parent_event->callback;
+	
+	event->callback	= callback;
+
 	if (attr->disabled)
 		event->state = PERF_EVENT_STATE_OFF;
 
@@ -4611,7 +4617,7 @@ SYSCALL_DEFINE5(perf_event_open,
 	}
 
 	event = perf_event_alloc(&attr, cpu, ctx, group_leader,
-				     NULL, GFP_KERNEL);
+				     NULL, NULL, GFP_KERNEL);
 	err = PTR_ERR(event);
 	if (IS_ERR(event))
 		goto err_put_context;
@@ -4668,7 +4674,7 @@ err_put_context:
  */
 struct perf_event *
 perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
-				 pid_t pid)
+				 pid_t pid, perf_callback_t callback)
 {
 	struct perf_event *event;
 	struct perf_event_context *ctx;
@@ -4683,7 +4689,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 		return NULL ;
 
 	event = perf_event_alloc(attr, cpu, ctx, NULL,
-				     NULL, GFP_KERNEL);
+				     NULL, callback, GFP_KERNEL);
 	err = PTR_ERR(event);
 	if (IS_ERR(event))
 		goto err_put_context;
@@ -4736,7 +4742,7 @@ inherit_event(struct perf_event *parent_event,
 	child_event = perf_event_alloc(&parent_event->attr,
 					   parent_event->cpu, child_ctx,
 					   group_leader, parent_event,
-					   GFP_KERNEL);
+					   NULL, GFP_KERNEL);
 	if (IS_ERR(child_event))
 		return child_event;
 	get_ctx(child_ctx);
-- 
cgit v1.2.3-70-g09d2


From 1477b6a7edd9ffa7bba4f9779ce9a76ce92761ed Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 4 Nov 2009 16:14:16 +0900
Subject: sched: Remove unused __schedule() declaration

__schedule() had been removed.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4AF129C8.3030008@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 1 -
 kernel/kgdb.c         | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 75e6e60bf583..f18102c4d0b8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -349,7 +349,6 @@ extern signed long schedule_timeout(signed long timeout);
 extern signed long schedule_timeout_interruptible(signed long timeout);
 extern signed long schedule_timeout_killable(signed long timeout);
 extern signed long schedule_timeout_uninterruptible(signed long timeout);
-asmlinkage void __schedule(void);
 asmlinkage void schedule(void);
 extern int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner);
 
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index 9147a3190c9d..7d7014634022 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -870,7 +870,7 @@ static void gdb_cmd_getregs(struct kgdb_state *ks)
 
 	/*
 	 * All threads that don't have debuggerinfo should be
-	 * in __schedule() sleeping, since all other CPUs
+	 * in schedule() sleeping, since all other CPUs
 	 * are in kgdb_wait, and thus have debuggerinfo.
 	 */
 	if (local_debuggerinfo) {
-- 
cgit v1.2.3-70-g09d2


From 2a2bb3142d326bb28b03875cabfc49baaac9a14a Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 4 Nov 2009 16:16:10 +0900
Subject: sched: Remove unused time_sync_thresh declaration

time_sync_thresh had been removed.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4AF12A3A.5050200@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f18102c4d0b8..754b3deed02b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -171,8 +171,6 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 }
 #endif
 
-extern unsigned long long time_sync_thresh;
-
 /*
  * Task state bitmask. NOTE! These bits are also
  * encoded in fs/proc/array.c: get_task_state().
-- 
cgit v1.2.3-70-g09d2


From 9824a2b728b63e7ff586b9fd9293c819be79f0f3 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 4 Nov 2009 16:16:54 +0900
Subject: sched: Remove unused cpu_nr_migrations()

cpu_nr_migrations() is not used, remove it.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4AF12A66.6020609@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  1 -
 kernel/sched.c        | 11 -----------
 2 files changed, 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 754b3deed02b..dfc21fb76bf1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -145,7 +145,6 @@ extern unsigned long this_cpu_load(void);
 
 
 extern void calc_global_load(void);
-extern u64 cpu_nr_migrations(int cpu);
 
 extern unsigned long get_parent_ip(unsigned long addr);
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 67be4d0dddaa..30fd0ba5f603 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -541,7 +541,6 @@ struct rq {
 	struct load_weight load;
 	unsigned long nr_load_updates;
 	u64 nr_switches;
-	u64 nr_migrations_in;
 
 	struct cfs_rq cfs;
 	struct rt_rq rt;
@@ -2049,7 +2048,6 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 #endif
 	if (old_cpu != new_cpu) {
 		p->se.nr_migrations++;
-		new_rq->nr_migrations_in++;
 #ifdef CONFIG_SCHEDSTATS
 		if (task_hot(p, old_rq->clock, NULL))
 			schedstat_inc(p, se.nr_forced2_migrations);
@@ -2988,15 +2986,6 @@ static void calc_load_account_active(struct rq *this_rq)
 	}
 }
 
-/*
- * Externally visible per-cpu scheduler statistics:
- * cpu_nr_migrations(cpu) - number of migrations into that cpu
- */
-u64 cpu_nr_migrations(int cpu)
-{
-	return cpu_rq(cpu)->nr_migrations_in;
-}
-
 /*
  * Update rq->cpu_load[] statistics. This function is usually called every
  * scheduler tick (TICK_NSEC).
-- 
cgit v1.2.3-70-g09d2


From acc3f5d7cabbfd6cec71f0c1f9900621fa2d6ae7 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 3 Nov 2009 14:53:40 +1030
Subject: cpumask: Partition_sched_domains takes array of cpumask_var_t

Currently partition_sched_domains() takes a 'struct cpumask
*doms_new' which is a kmalloc'ed array of cpumask_t.  You can't
have such an array if 'struct cpumask' is undefined, as we plan
for CONFIG_CPUMASK_OFFSTACK=y.

So, we make this an array of cpumask_var_t instead: this is the
same for the CONFIG_CPUMASK_OFFSTACK=n case, but requires
multiple allocations for the CONFIG_CPUMASK_OFFSTACK=y case.
Hence we add alloc_sched_domains() and free_sched_domains()
functions.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <200911031453.40668.rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  8 ++++--
 kernel/cpuset.c       | 19 +++++++-------
 kernel/sched.c        | 68 ++++++++++++++++++++++++++++++++++-----------------
 3 files changed, 61 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index dfc21fb76bf1..78ba664474f3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1009,9 +1009,13 @@ static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
 	return to_cpumask(sd->span);
 }
 
-extern void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
+extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
 				    struct sched_domain_attr *dattr_new);
 
+/* Allocate an array of sched domains, for partition_sched_domains(). */
+cpumask_var_t *alloc_sched_domains(unsigned int ndoms);
+void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms);
+
 /* Test a flag in parent sched domain */
 static inline int test_sd_parent(struct sched_domain *sd, int flag)
 {
@@ -1029,7 +1033,7 @@ unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu);
 struct sched_domain_attr;
 
 static inline void
-partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
+partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
 			struct sched_domain_attr *dattr_new)
 {
 }
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index d247381e7371..3cf2183b472d 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -537,8 +537,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
  *	element of the partition (one sched domain) to be passed to
  *	partition_sched_domains().
  */
-/* FIXME: see the FIXME in partition_sched_domains() */
-static int generate_sched_domains(struct cpumask **domains,
+static int generate_sched_domains(cpumask_var_t **domains,
 			struct sched_domain_attr **attributes)
 {
 	LIST_HEAD(q);		/* queue of cpusets to be scanned */
@@ -546,7 +545,7 @@ static int generate_sched_domains(struct cpumask **domains,
 	struct cpuset **csa;	/* array of all cpuset ptrs */
 	int csn;		/* how many cpuset ptrs in csa so far */
 	int i, j, k;		/* indices for partition finding loops */
-	struct cpumask *doms;	/* resulting partition; i.e. sched domains */
+	cpumask_var_t *doms;	/* resulting partition; i.e. sched domains */
 	struct sched_domain_attr *dattr;  /* attributes for custom domains */
 	int ndoms = 0;		/* number of sched domains in result */
 	int nslot;		/* next empty doms[] struct cpumask slot */
@@ -557,7 +556,8 @@ static int generate_sched_domains(struct cpumask **domains,
 
 	/* Special case for the 99% of systems with one, full, sched domain */
 	if (is_sched_load_balance(&top_cpuset)) {
-		doms = kmalloc(cpumask_size(), GFP_KERNEL);
+		ndoms = 1;
+		doms = alloc_sched_domains(ndoms);
 		if (!doms)
 			goto done;
 
@@ -566,9 +566,8 @@ static int generate_sched_domains(struct cpumask **domains,
 			*dattr = SD_ATTR_INIT;
 			update_domain_attr_tree(dattr, &top_cpuset);
 		}
-		cpumask_copy(doms, top_cpuset.cpus_allowed);
+		cpumask_copy(doms[0], top_cpuset.cpus_allowed);
 
-		ndoms = 1;
 		goto done;
 	}
 
@@ -636,7 +635,7 @@ restart:
 	 * Now we know how many domains to create.
 	 * Convert <csn, csa> to <ndoms, doms> and populate cpu masks.
 	 */
-	doms = kmalloc(ndoms * cpumask_size(), GFP_KERNEL);
+	doms = alloc_sched_domains(ndoms);
 	if (!doms)
 		goto done;
 
@@ -656,7 +655,7 @@ restart:
 			continue;
 		}
 
-		dp = doms + nslot;
+		dp = doms[nslot];
 
 		if (nslot == ndoms) {
 			static int warnings = 10;
@@ -718,7 +717,7 @@ done:
 static void do_rebuild_sched_domains(struct work_struct *unused)
 {
 	struct sched_domain_attr *attr;
-	struct cpumask *doms;
+	cpumask_var_t *doms;
 	int ndoms;
 
 	get_online_cpus();
@@ -2052,7 +2051,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
 				unsigned long phase, void *unused_cpu)
 {
 	struct sched_domain_attr *attr;
-	struct cpumask *doms;
+	cpumask_var_t *doms;
 	int ndoms;
 
 	switch (phase) {
diff --git a/kernel/sched.c b/kernel/sched.c
index 30fd0ba5f603..ae026aad145b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -8846,7 +8846,7 @@ static int build_sched_domains(const struct cpumask *cpu_map)
 	return __build_sched_domains(cpu_map, NULL);
 }
 
-static struct cpumask *doms_cur;	/* current sched domains */
+static cpumask_var_t *doms_cur;	/* current sched domains */
 static int ndoms_cur;		/* number of sched domains in 'doms_cur' */
 static struct sched_domain_attr *dattr_cur;
 				/* attribues of custom domains in 'doms_cur' */
@@ -8868,6 +8868,31 @@ int __attribute__((weak)) arch_update_cpu_topology(void)
 	return 0;
 }
 
+cpumask_var_t *alloc_sched_domains(unsigned int ndoms)
+{
+	int i;
+	cpumask_var_t *doms;
+
+	doms = kmalloc(sizeof(*doms) * ndoms, GFP_KERNEL);
+	if (!doms)
+		return NULL;
+	for (i = 0; i < ndoms; i++) {
+		if (!alloc_cpumask_var(&doms[i], GFP_KERNEL)) {
+			free_sched_domains(doms, i);
+			return NULL;
+		}
+	}
+	return doms;
+}
+
+void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms)
+{
+	unsigned int i;
+	for (i = 0; i < ndoms; i++)
+		free_cpumask_var(doms[i]);
+	kfree(doms);
+}
+
 /*
  * Set up scheduler domains and groups. Callers must hold the hotplug lock.
  * For now this just excludes isolated cpus, but could be used to
@@ -8879,12 +8904,12 @@ static int arch_init_sched_domains(const struct cpumask *cpu_map)
 
 	arch_update_cpu_topology();
 	ndoms_cur = 1;
-	doms_cur = kmalloc(cpumask_size(), GFP_KERNEL);
+	doms_cur = alloc_sched_domains(ndoms_cur);
 	if (!doms_cur)
-		doms_cur = fallback_doms;
-	cpumask_andnot(doms_cur, cpu_map, cpu_isolated_map);
+		doms_cur = &fallback_doms;
+	cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map);
 	dattr_cur = NULL;
-	err = build_sched_domains(doms_cur);
+	err = build_sched_domains(doms_cur[0]);
 	register_sched_domain_sysctl();
 
 	return err;
@@ -8934,19 +8959,19 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
  * doms_new[] to the current sched domain partitioning, doms_cur[].
  * It destroys each deleted domain and builds each new domain.
  *
- * 'doms_new' is an array of cpumask's of length 'ndoms_new'.
+ * 'doms_new' is an array of cpumask_var_t's of length 'ndoms_new'.
  * The masks don't intersect (don't overlap.) We should setup one
  * sched domain for each mask. CPUs not in any of the cpumasks will
  * not be load balanced. If the same cpumask appears both in the
  * current 'doms_cur' domains and in the new 'doms_new', we can leave
  * it as it is.
  *
- * The passed in 'doms_new' should be kmalloc'd. This routine takes
- * ownership of it and will kfree it when done with it. If the caller
- * failed the kmalloc call, then it can pass in doms_new == NULL &&
- * ndoms_new == 1, and partition_sched_domains() will fallback to
- * the single partition 'fallback_doms', it also forces the domains
- * to be rebuilt.
+ * The passed in 'doms_new' should be allocated using
+ * alloc_sched_domains.  This routine takes ownership of it and will
+ * free_sched_domains it when done with it. If the caller failed the
+ * alloc call, then it can pass in doms_new == NULL && ndoms_new == 1,
+ * and partition_sched_domains() will fallback to the single partition
+ * 'fallback_doms', it also forces the domains to be rebuilt.
  *
  * If doms_new == NULL it will be replaced with cpu_online_mask.
  * ndoms_new == 0 is a special case for destroying existing domains,
@@ -8954,8 +8979,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
  *
  * Call with hotplug lock held
  */
-/* FIXME: Change to struct cpumask *doms_new[] */
-void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
+void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
 			     struct sched_domain_attr *dattr_new)
 {
 	int i, j, n;
@@ -8974,40 +8998,40 @@ void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
 	/* Destroy deleted domains */
 	for (i = 0; i < ndoms_cur; i++) {
 		for (j = 0; j < n && !new_topology; j++) {
-			if (cpumask_equal(&doms_cur[i], &doms_new[j])
+			if (cpumask_equal(doms_cur[i], doms_new[j])
 			    && dattrs_equal(dattr_cur, i, dattr_new, j))
 				goto match1;
 		}
 		/* no match - a current sched domain not in new doms_new[] */
-		detach_destroy_domains(doms_cur + i);
+		detach_destroy_domains(doms_cur[i]);
 match1:
 		;
 	}
 
 	if (doms_new == NULL) {
 		ndoms_cur = 0;
-		doms_new = fallback_doms;
-		cpumask_andnot(&doms_new[0], cpu_online_mask, cpu_isolated_map);
+		doms_new = &fallback_doms;
+		cpumask_andnot(doms_new[0], cpu_online_mask, cpu_isolated_map);
 		WARN_ON_ONCE(dattr_new);
 	}
 
 	/* Build new domains */
 	for (i = 0; i < ndoms_new; i++) {
 		for (j = 0; j < ndoms_cur && !new_topology; j++) {
-			if (cpumask_equal(&doms_new[i], &doms_cur[j])
+			if (cpumask_equal(doms_new[i], doms_cur[j])
 			    && dattrs_equal(dattr_new, i, dattr_cur, j))
 				goto match2;
 		}
 		/* no match - add a new doms_new */
-		__build_sched_domains(doms_new + i,
+		__build_sched_domains(doms_new[i],
 					dattr_new ? dattr_new + i : NULL);
 match2:
 		;
 	}
 
 	/* Remember the new sched domains */
-	if (doms_cur != fallback_doms)
-		kfree(doms_cur);
+	if (doms_cur != &fallback_doms)
+		free_sched_domains(doms_cur, ndoms_cur);
 	kfree(dattr_cur);	/* kfree(NULL) is safe */
 	doms_cur = doms_new;
 	dattr_cur = dattr_new;
-- 
cgit v1.2.3-70-g09d2


From 663e69592856df53ef52969482ef413a96bc4e06 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 4 Nov 2009 14:22:21 +0100
Subject: irq: Remove unused debug_poll_all_shared_irqs()

commit 74296a8ed added this function for debug purposes, but it was
never used for anything. Remove it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/interrupt.h |  6 ------
 kernel/irq/spurious.c     | 14 +-------------
 2 files changed, 1 insertion(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 7ca72b74eec7..75f3f00ac1e5 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -603,12 +603,6 @@ static inline void init_irq_proc(void)
 }
 #endif
 
-#if defined(CONFIG_GENERIC_HARDIRQS) && defined(CONFIG_DEBUG_SHIRQ)
-extern void debug_poll_all_shared_irqs(void);
-#else
-static inline void debug_poll_all_shared_irqs(void) { }
-#endif
-
 struct seq_file;
 int show_interrupts(struct seq_file *p, void *v);
 
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 114e704760fe..8996b98f9eb2 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -104,7 +104,7 @@ static int misrouted_irq(int irq)
 	return ok;
 }
 
-static void poll_all_shared_irqs(void)
+static void poll_spurious_irqs(unsigned long dummy)
 {
 	struct irq_desc *desc;
 	int i;
@@ -123,23 +123,11 @@ static void poll_all_shared_irqs(void)
 
 		try_one_irq(i, desc);
 	}
-}
-
-static void poll_spurious_irqs(unsigned long dummy)
-{
-	poll_all_shared_irqs();
 
 	mod_timer(&poll_spurious_irq_timer,
 		  jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
 }
 
-#ifdef CONFIG_DEBUG_SHIRQ
-void debug_poll_all_shared_irqs(void)
-{
-	poll_all_shared_irqs();
-}
-#endif
-
 /*
  * If 99,900 of the previous 100,000 interrupts have not been handled
  * then assume that the IRQ is stuck in some manner. Drop a diagnostic
-- 
cgit v1.2.3-70-g09d2


From 2a855dd01bc1539111adb7233f587c5c468732ac Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Date: Sun, 25 Oct 2009 15:37:58 +0100
Subject: signal: Fix alternate signal stack check

All architectures in the kernel increment/decrement the stack pointer
before storing values on the stack.

On architectures which have the stack grow down sas_ss_sp == sp is not
on the alternate signal stack while sas_ss_sp + sas_ss_size == sp is
on the alternate signal stack.

On architectures which have the stack grow up sas_ss_sp == sp is on
the alternate signal stack while sas_ss_sp + sas_ss_size == sp is not
on the alternate signal stack.

The current implementation fails for architectures which have the
stack grow down on the corner case where sas_ss_sp == sp.This was
reported as Debian bug #544905 on AMD64.
Simplified test case: http://download.breakpoint.cc/tc-sig-stack.c

The test case creates the following stack scenario:
   0xn0300	stack top
   0xn0200	alt stack pointer top (when switching to alt stack)
   0xn01ff	alt stack end
   0xn0100	alt stack start == stack pointer

If the signal is sent the stack pointer is pointing to the base
address of the alt stack and the kernel erroneously decides that it
has already switched to the alternate stack because of the current
check for "sp - sas_ss_sp < sas_ss_size"

On parisc (stack grows up) the scenario would be:
   0xn0200	stack pointer
   0xn01ff	alt stack end
   0xn0100	alt stack start = alt stack pointer base
   		    	  	  (when switching to alt stack)
   0xn0000	stack base

This is handled correctly by the current implementation.

[ tglx: Modified for archs which have the stack grow up (parisc) which
  	would fail with the correct implementation for stack grows
  	down. Added a check for sp >= current->sas_ss_sp which is
  	strictly not necessary but makes the code symetric for both
  	variants ]

Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: stable@kernel.org
LKML-Reference: <20091025143758.GA6653@Chamillionaire.breakpoint.cc>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/sched.h | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 75e6e60bf583..0f67914a43c9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2086,11 +2086,18 @@ static inline int is_si_special(const struct siginfo *info)
 	return info <= SEND_SIG_FORCED;
 }
 
-/* True if we are on the alternate signal stack.  */
-
+/*
+ * True if we are on the alternate signal stack.
+ */
 static inline int on_sig_stack(unsigned long sp)
 {
-	return (sp - current->sas_ss_sp < current->sas_ss_size);
+#ifdef CONFIG_STACK_GROWSUP
+	return sp >= current->sas_ss_sp &&
+		sp - current->sas_ss_sp < current->sas_ss_size;
+#else
+	return sp > current->sas_ss_sp &&
+		sp - current->sas_ss_sp <= current->sas_ss_size;
+#endif
 }
 
 static inline int sas_ss_flags(unsigned long sp)
-- 
cgit v1.2.3-70-g09d2


From 2da3e160cb3d226d87b907fab26850d838ed8d7c Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 5 Nov 2009 23:06:50 +0100
Subject: hw-breakpoint: Move asm-generic/hw_breakpoint.h to
 linux/hw_breakpoint.h

We plan to make the breakpoints parameters generic among architectures.
For that it's better to move the asm-generic header to a generic linux
header.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 arch/x86/include/asm/hw_breakpoint.h |   2 +-
 include/asm-generic/hw_breakpoint.h  | 139 -----------------------------------
 include/linux/hw_breakpoint.h        | 136 ++++++++++++++++++++++++++++++++++
 3 files changed, 137 insertions(+), 140 deletions(-)
 delete mode 100644 include/asm-generic/hw_breakpoint.h
 create mode 100644 include/linux/hw_breakpoint.h

(limited to 'include')

diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
index 1acb4d45de70..3cfca8e2b5f6 100644
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -12,7 +12,7 @@ struct arch_hw_breakpoint {
 };
 
 #include <linux/kdebug.h>
-#include <asm-generic/hw_breakpoint.h>
+#include <linux/hw_breakpoint.h>
 
 /* Available HW breakpoint length encodings */
 #define HW_BREAKPOINT_LEN_1		0x40
diff --git a/include/asm-generic/hw_breakpoint.h b/include/asm-generic/hw_breakpoint.h
deleted file mode 100644
index 9bf2d12eb74a..000000000000
--- a/include/asm-generic/hw_breakpoint.h
+++ /dev/null
@@ -1,139 +0,0 @@
-#ifndef	_ASM_GENERIC_HW_BREAKPOINT_H
-#define	_ASM_GENERIC_HW_BREAKPOINT_H
-
-#ifndef __ARCH_HW_BREAKPOINT_H
-#error "Please don't include this file directly"
-#endif
-
-#ifdef	__KERNEL__
-#include <linux/list.h>
-#include <linux/types.h>
-#include <linux/kallsyms.h>
-
-/**
- * struct hw_breakpoint - unified kernel/user-space hardware breakpoint
- * @triggered: callback invoked after target address access
- * @info: arch-specific breakpoint info (address, length, and type)
- *
- * %hw_breakpoint structures are the kernel's way of representing
- * hardware breakpoints.  These are data breakpoints
- * (also known as "watchpoints", triggered on data access), and the breakpoint's
- * target address can be located in either kernel space or user space.
- *
- * The breakpoint's address, length, and type are highly
- * architecture-specific.  The values are encoded in the @info field; you
- * specify them when registering the breakpoint.  To examine the encoded
- * values use hw_breakpoint_get_{kaddress,uaddress,len,type}(), declared
- * below.
- *
- * The address is specified as a regular kernel pointer (for kernel-space
- * breakponts) or as an %__user pointer (for user-space breakpoints).
- * With register_user_hw_breakpoint(), the address must refer to a
- * location in user space.  The breakpoint will be active only while the
- * requested task is running.  Conversely with
- * register_kernel_hw_breakpoint(), the address must refer to a location
- * in kernel space, and the breakpoint will be active on all CPUs
- * regardless of the current task.
- *
- * The length is the breakpoint's extent in bytes, which is subject to
- * certain limitations.  include/asm/hw_breakpoint.h contains macros
- * defining the available lengths for a specific architecture.  Note that
- * the address's alignment must match the length.  The breakpoint will
- * catch accesses to any byte in the range from address to address +
- * (length - 1).
- *
- * The breakpoint's type indicates the sort of access that will cause it
- * to trigger.  Possible values may include:
- *
- * 	%HW_BREAKPOINT_RW (triggered on read or write access),
- * 	%HW_BREAKPOINT_WRITE (triggered on write access), and
- * 	%HW_BREAKPOINT_READ (triggered on read access).
- *
- * Appropriate macros are defined in include/asm/hw_breakpoint.h; not all
- * possibilities are available on all architectures.  Execute breakpoints
- * must have length equal to the special value %HW_BREAKPOINT_LEN_EXECUTE.
- *
- * When a breakpoint gets hit, the @triggered callback is
- * invoked in_interrupt with a pointer to the %hw_breakpoint structure and the
- * processor registers.
- * Data breakpoints occur after the memory access has taken place.
- * Breakpoints are disabled during execution @triggered, to avoid
- * recursive traps and allow unhindered access to breakpointed memory.
- *
- * This sample code sets a breakpoint on pid_max and registers a callback
- * function for writes to that variable.  Note that it is not portable
- * as written, because not all architectures support HW_BREAKPOINT_LEN_4.
- *
- * ----------------------------------------------------------------------
- *
- * #include <asm/hw_breakpoint.h>
- *
- * struct hw_breakpoint my_bp;
- *
- * static void my_triggered(struct hw_breakpoint *bp, struct pt_regs *regs)
- * {
- * 	printk(KERN_DEBUG "Inside triggered routine of breakpoint exception\n");
- * 	dump_stack();
- *  	.......<more debugging output>........
- * }
- *
- * static struct hw_breakpoint my_bp;
- *
- * static int init_module(void)
- * {
- *	..........<do anything>............
- *	my_bp.info.type = HW_BREAKPOINT_WRITE;
- *	my_bp.info.len = HW_BREAKPOINT_LEN_4;
- *
- *	my_bp.installed = (void *)my_bp_installed;
- *
- *	rc = register_kernel_hw_breakpoint(&my_bp);
- *	..........<do anything>............
- * }
- *
- * static void cleanup_module(void)
- * {
- *	..........<do anything>............
- *	unregister_kernel_hw_breakpoint(&my_bp);
- *	..........<do anything>............
- * }
- *
- * ----------------------------------------------------------------------
- */
-struct hw_breakpoint {
-	void (*triggered)(struct hw_breakpoint *, struct pt_regs *);
-	struct arch_hw_breakpoint info;
-};
-
-/*
- * len and type values are defined in include/asm/hw_breakpoint.h.
- * Available values vary according to the architecture.  On i386 the
- * possibilities are:
- *
- *	HW_BREAKPOINT_LEN_1
- *	HW_BREAKPOINT_LEN_2
- *	HW_BREAKPOINT_LEN_4
- *	HW_BREAKPOINT_RW
- *	HW_BREAKPOINT_READ
- *
- * On other architectures HW_BREAKPOINT_LEN_8 may be available, and the
- * 1-, 2-, and 4-byte lengths may be unavailable.  There also may be
- * HW_BREAKPOINT_WRITE.  You can use #ifdef to check at compile time.
- */
-
-extern int register_user_hw_breakpoint(struct task_struct *tsk,
-					struct hw_breakpoint *bp);
-extern int modify_user_hw_breakpoint(struct task_struct *tsk,
-					struct hw_breakpoint *bp);
-extern void unregister_user_hw_breakpoint(struct task_struct *tsk,
-						struct hw_breakpoint *bp);
-/*
- * Kernel breakpoints are not associated with any particular thread.
- */
-extern int register_kernel_hw_breakpoint(struct hw_breakpoint *bp);
-extern void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp);
-
-extern unsigned int hbp_kernel_pos;
-
-#endif	/* __KERNEL__ */
-#endif	/* _ASM_GENERIC_HW_BREAKPOINT_H */
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
new file mode 100644
index 000000000000..61ccc8f17eac
--- /dev/null
+++ b/include/linux/hw_breakpoint.h
@@ -0,0 +1,136 @@
+#ifndef _LINUX_HW_BREAKPOINT_H
+#define _LINUX_HW_BREAKPOINT_H
+
+
+#ifdef	__KERNEL__
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/kallsyms.h>
+
+/**
+ * struct hw_breakpoint - unified kernel/user-space hardware breakpoint
+ * @triggered: callback invoked after target address access
+ * @info: arch-specific breakpoint info (address, length, and type)
+ *
+ * %hw_breakpoint structures are the kernel's way of representing
+ * hardware breakpoints.  These are data breakpoints
+ * (also known as "watchpoints", triggered on data access), and the breakpoint's
+ * target address can be located in either kernel space or user space.
+ *
+ * The breakpoint's address, length, and type are highly
+ * architecture-specific.  The values are encoded in the @info field; you
+ * specify them when registering the breakpoint.  To examine the encoded
+ * values use hw_breakpoint_get_{kaddress,uaddress,len,type}(), declared
+ * below.
+ *
+ * The address is specified as a regular kernel pointer (for kernel-space
+ * breakponts) or as an %__user pointer (for user-space breakpoints).
+ * With register_user_hw_breakpoint(), the address must refer to a
+ * location in user space.  The breakpoint will be active only while the
+ * requested task is running.  Conversely with
+ * register_kernel_hw_breakpoint(), the address must refer to a location
+ * in kernel space, and the breakpoint will be active on all CPUs
+ * regardless of the current task.
+ *
+ * The length is the breakpoint's extent in bytes, which is subject to
+ * certain limitations.  include/asm/hw_breakpoint.h contains macros
+ * defining the available lengths for a specific architecture.  Note that
+ * the address's alignment must match the length.  The breakpoint will
+ * catch accesses to any byte in the range from address to address +
+ * (length - 1).
+ *
+ * The breakpoint's type indicates the sort of access that will cause it
+ * to trigger.  Possible values may include:
+ *
+ * 	%HW_BREAKPOINT_RW (triggered on read or write access),
+ * 	%HW_BREAKPOINT_WRITE (triggered on write access), and
+ * 	%HW_BREAKPOINT_READ (triggered on read access).
+ *
+ * Appropriate macros are defined in include/asm/hw_breakpoint.h; not all
+ * possibilities are available on all architectures.  Execute breakpoints
+ * must have length equal to the special value %HW_BREAKPOINT_LEN_EXECUTE.
+ *
+ * When a breakpoint gets hit, the @triggered callback is
+ * invoked in_interrupt with a pointer to the %hw_breakpoint structure and the
+ * processor registers.
+ * Data breakpoints occur after the memory access has taken place.
+ * Breakpoints are disabled during execution @triggered, to avoid
+ * recursive traps and allow unhindered access to breakpointed memory.
+ *
+ * This sample code sets a breakpoint on pid_max and registers a callback
+ * function for writes to that variable.  Note that it is not portable
+ * as written, because not all architectures support HW_BREAKPOINT_LEN_4.
+ *
+ * ----------------------------------------------------------------------
+ *
+ * #include <asm/hw_breakpoint.h>
+ *
+ * struct hw_breakpoint my_bp;
+ *
+ * static void my_triggered(struct hw_breakpoint *bp, struct pt_regs *regs)
+ * {
+ * 	printk(KERN_DEBUG "Inside triggered routine of breakpoint exception\n");
+ * 	dump_stack();
+ *  	.......<more debugging output>........
+ * }
+ *
+ * static struct hw_breakpoint my_bp;
+ *
+ * static int init_module(void)
+ * {
+ *	..........<do anything>............
+ *	my_bp.info.type = HW_BREAKPOINT_WRITE;
+ *	my_bp.info.len = HW_BREAKPOINT_LEN_4;
+ *
+ *	my_bp.installed = (void *)my_bp_installed;
+ *
+ *	rc = register_kernel_hw_breakpoint(&my_bp);
+ *	..........<do anything>............
+ * }
+ *
+ * static void cleanup_module(void)
+ * {
+ *	..........<do anything>............
+ *	unregister_kernel_hw_breakpoint(&my_bp);
+ *	..........<do anything>............
+ * }
+ *
+ * ----------------------------------------------------------------------
+ */
+struct hw_breakpoint {
+	void (*triggered)(struct hw_breakpoint *, struct pt_regs *);
+	struct arch_hw_breakpoint info;
+};
+
+/*
+ * len and type values are defined in include/asm/hw_breakpoint.h.
+ * Available values vary according to the architecture.  On i386 the
+ * possibilities are:
+ *
+ *	HW_BREAKPOINT_LEN_1
+ *	HW_BREAKPOINT_LEN_2
+ *	HW_BREAKPOINT_LEN_4
+ *	HW_BREAKPOINT_RW
+ *	HW_BREAKPOINT_READ
+ *
+ * On other architectures HW_BREAKPOINT_LEN_8 may be available, and the
+ * 1-, 2-, and 4-byte lengths may be unavailable.  There also may be
+ * HW_BREAKPOINT_WRITE.  You can use #ifdef to check at compile time.
+ */
+
+extern int register_user_hw_breakpoint(struct task_struct *tsk,
+					struct hw_breakpoint *bp);
+extern int modify_user_hw_breakpoint(struct task_struct *tsk,
+					struct hw_breakpoint *bp);
+extern void unregister_user_hw_breakpoint(struct task_struct *tsk,
+						struct hw_breakpoint *bp);
+/*
+ * Kernel breakpoints are not associated with any particular thread.
+ */
+extern int register_kernel_hw_breakpoint(struct hw_breakpoint *bp);
+extern void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp);
+
+extern unsigned int hbp_kernel_pos;
+
+#endif	/* __KERNEL__ */
+#endif	/* _LINUX_HW_BREAKPOINT_H */
-- 
cgit v1.2.3-70-g09d2


From 444a2a3bcd6d5bed5c823136f68fcc93c0fe283f Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 6 Nov 2009 04:13:05 +0100
Subject: tracing, perf_events: Protect the buffer from recursion in perf

While tracing using events with perf, if one enables the
lockdep:lock_acquire event, it will infect every other perf
trace events.

Basically, you can enable whatever set of trace events through
perf but if this event is part of the set, the only result we
can get is a long list of lock_acquire events of rcu read lock,
and only that.

This is because of a recursion inside perf.

1) When a trace event is triggered, it will fill a per cpu
   buffer and submit it to perf.

2) Perf will commit this event but will also protect some data
   using rcu_read_lock

3) A recursion appears: rcu_read_lock triggers a lock_acquire
   event that will fill the per cpu event and then submit the
   buffer to perf.

4) Perf detects a recursion and ignores it

5) Perf continues its work on the previous event, but its buffer
   has been overwritten by the lock_acquire event, it has then
   been turned into a lock_acquire event of rcu read lock

Such scenario also happens with lock_release with
rcu_read_unlock().

We could turn the rcu_read_lock() into __rcu_read_lock() to drop
the lock debugging from perf fast path, but that would make us
lose the rcu debugging and that doesn't prevent from other
possible kind of recursion from perf in the future.

This patch adds a recursion protection based on a counter on the
perf trace per cpu buffers to solve the problem.

-v2: Fixed lost whitespace, added reviewed-by tag

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Jason Baron <jbaron@redhat.com>
LKML-Reference: <1257477185-7838-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace_event.h       |  9 +++++--
 include/trace/ftrace.h             | 39 ++++++++++++++++++++++-------
 kernel/trace/trace_event_profile.c | 41 ++++++++++++++-----------------
 kernel/trace/trace_kprobe.c        | 50 ++++++++++++++++++++++++++++++++------
 kernel/trace/trace_syscalls.c      | 44 +++++++++++++++++++++++++++------
 5 files changed, 133 insertions(+), 50 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index f7b47c336703..43360c1d8f70 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -137,8 +137,13 @@ struct ftrace_event_call {
 
 #define FTRACE_MAX_PROFILE_SIZE	2048
 
-extern char			*trace_profile_buf;
-extern char			*trace_profile_buf_nmi;
+struct perf_trace_buf {
+	char	buf[FTRACE_MAX_PROFILE_SIZE];
+	int	recursion;
+};
+
+extern struct perf_trace_buf	*perf_trace_buf;
+extern struct perf_trace_buf	*perf_trace_buf_nmi;
 
 #define MAX_FILTER_PRED		32
 #define MAX_FILTER_STR_VAL	256	/* Should handle KSYM_SYMBOL_LEN */
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index a7f946094128..4945d1c99864 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -649,6 +649,7 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
  *	struct ftrace_event_call *event_call = &event_<call>;
  *	extern void perf_tp_event(int, u64, u64, void *, int);
  *	struct ftrace_raw_##call *entry;
+ *	struct perf_trace_buf *trace_buf;
  *	u64 __addr = 0, __count = 1;
  *	unsigned long irq_flags;
  *	struct trace_entry *ent;
@@ -673,14 +674,25 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
  *	__cpu = smp_processor_id();
  *
  *	if (in_nmi())
- *		raw_data = rcu_dereference(trace_profile_buf_nmi);
+ *		trace_buf = rcu_dereference(perf_trace_buf_nmi);
  *	else
- *		raw_data = rcu_dereference(trace_profile_buf);
+ *		trace_buf = rcu_dereference(perf_trace_buf);
  *
- *	if (!raw_data)
+ *	if (!trace_buf)
  *		goto end;
  *
- *	raw_data = per_cpu_ptr(raw_data, __cpu);
+ *	trace_buf = per_cpu_ptr(trace_buf, __cpu);
+ *
+ * 	// Avoid recursion from perf that could mess up the buffer
+ * 	if (trace_buf->recursion++)
+ *		goto end_recursion;
+ *
+ * 	raw_data = trace_buf->buf;
+ *
+ *	// Make recursion update visible before entering perf_tp_event
+ *	// so that we protect from perf recursions.
+ *
+ *	barrier();
  *
  *	//zero dead bytes from alignment to avoid stack leak to userspace:
  *	*(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;
@@ -713,8 +725,9 @@ static void ftrace_profile_##call(proto)				\
 {									\
 	struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
 	struct ftrace_event_call *event_call = &event_##call;		\
-	extern void perf_tp_event(int, u64, u64, void *, int);	\
+	extern void perf_tp_event(int, u64, u64, void *, int);		\
 	struct ftrace_raw_##call *entry;				\
+	struct perf_trace_buf *trace_buf;				\
 	u64 __addr = 0, __count = 1;					\
 	unsigned long irq_flags;					\
 	struct trace_entry *ent;					\
@@ -739,14 +752,20 @@ static void ftrace_profile_##call(proto)				\
 	__cpu = smp_processor_id();					\
 									\
 	if (in_nmi())							\
-		raw_data = rcu_dereference(trace_profile_buf_nmi);		\
+		trace_buf = rcu_dereference(perf_trace_buf_nmi);	\
 	else								\
-		raw_data = rcu_dereference(trace_profile_buf);		\
+		trace_buf = rcu_dereference(perf_trace_buf);		\
 									\
-	if (!raw_data)							\
+	if (!trace_buf)							\
 		goto end;						\
 									\
-	raw_data = per_cpu_ptr(raw_data, __cpu);			\
+	trace_buf = per_cpu_ptr(trace_buf, __cpu);			\
+	if (trace_buf->recursion++)					\
+		goto end_recursion;					\
+									\
+	barrier();							\
+									\
+	raw_data = trace_buf->buf;					\
 									\
 	*(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;		\
 	entry = (struct ftrace_raw_##call *)raw_data;			\
@@ -761,6 +780,8 @@ static void ftrace_profile_##call(proto)				\
 	perf_tp_event(event_call->id, __addr, __count, entry,		\
 			     __entry_size);				\
 									\
+end_recursion:								\
+	trace_buf->recursion--;						\
 end:									\
 	local_irq_restore(irq_flags);					\
 									\
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index c9f687ab0d4f..e0d351b01f5a 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -8,41 +8,36 @@
 #include <linux/module.h>
 #include "trace.h"
 
-/*
- * We can't use a size but a type in alloc_percpu()
- * So let's create a dummy type that matches the desired size
- */
-typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t;
 
-char		*trace_profile_buf;
-EXPORT_SYMBOL_GPL(trace_profile_buf);
+struct perf_trace_buf *perf_trace_buf;
+EXPORT_SYMBOL_GPL(perf_trace_buf);
 
-char		*trace_profile_buf_nmi;
-EXPORT_SYMBOL_GPL(trace_profile_buf_nmi);
+struct perf_trace_buf *perf_trace_buf_nmi;
+EXPORT_SYMBOL_GPL(perf_trace_buf_nmi);
 
 /* Count the events in use (per event id, not per instance) */
 static int	total_profile_count;
 
 static int ftrace_profile_enable_event(struct ftrace_event_call *event)
 {
-	char *buf;
+	struct perf_trace_buf *buf;
 	int ret = -ENOMEM;
 
 	if (atomic_inc_return(&event->profile_count))
 		return 0;
 
 	if (!total_profile_count) {
-		buf = (char *)alloc_percpu(profile_buf_t);
+		buf = alloc_percpu(struct perf_trace_buf);
 		if (!buf)
 			goto fail_buf;
 
-		rcu_assign_pointer(trace_profile_buf, buf);
+		rcu_assign_pointer(perf_trace_buf, buf);
 
-		buf = (char *)alloc_percpu(profile_buf_t);
+		buf = alloc_percpu(struct perf_trace_buf);
 		if (!buf)
 			goto fail_buf_nmi;
 
-		rcu_assign_pointer(trace_profile_buf_nmi, buf);
+		rcu_assign_pointer(perf_trace_buf_nmi, buf);
 	}
 
 	ret = event->profile_enable(event);
@@ -53,10 +48,10 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
 
 fail_buf_nmi:
 	if (!total_profile_count) {
-		free_percpu(trace_profile_buf_nmi);
-		free_percpu(trace_profile_buf);
-		trace_profile_buf_nmi = NULL;
-		trace_profile_buf = NULL;
+		free_percpu(perf_trace_buf_nmi);
+		free_percpu(perf_trace_buf);
+		perf_trace_buf_nmi = NULL;
+		perf_trace_buf = NULL;
 	}
 fail_buf:
 	atomic_dec(&event->profile_count);
@@ -84,7 +79,7 @@ int ftrace_profile_enable(int event_id)
 
 static void ftrace_profile_disable_event(struct ftrace_event_call *event)
 {
-	char *buf, *nmi_buf;
+	struct perf_trace_buf *buf, *nmi_buf;
 
 	if (!atomic_add_negative(-1, &event->profile_count))
 		return;
@@ -92,11 +87,11 @@ static void ftrace_profile_disable_event(struct ftrace_event_call *event)
 	event->profile_disable(event);
 
 	if (!--total_profile_count) {
-		buf = trace_profile_buf;
-		rcu_assign_pointer(trace_profile_buf, NULL);
+		buf = perf_trace_buf;
+		rcu_assign_pointer(perf_trace_buf, NULL);
 
-		nmi_buf = trace_profile_buf_nmi;
-		rcu_assign_pointer(trace_profile_buf_nmi, NULL);
+		nmi_buf = perf_trace_buf_nmi;
+		rcu_assign_pointer(perf_trace_buf_nmi, NULL);
 
 		/*
 		 * Ensure every events in profiling have finished before
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index cf17a6694f32..3696476f307d 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1208,6 +1208,7 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
 	struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
 	struct ftrace_event_call *call = &tp->call;
 	struct kprobe_trace_entry *entry;
+	struct perf_trace_buf *trace_buf;
 	struct trace_entry *ent;
 	int size, __size, i, pc, __cpu;
 	unsigned long irq_flags;
@@ -1229,14 +1230,26 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
 	__cpu = smp_processor_id();
 
 	if (in_nmi())
-		raw_data = rcu_dereference(trace_profile_buf_nmi);
+		trace_buf = rcu_dereference(perf_trace_buf_nmi);
 	else
-		raw_data = rcu_dereference(trace_profile_buf);
+		trace_buf = rcu_dereference(perf_trace_buf);
 
-	if (!raw_data)
+	if (!trace_buf)
 		goto end;
 
-	raw_data = per_cpu_ptr(raw_data, __cpu);
+	trace_buf = per_cpu_ptr(trace_buf, __cpu);
+
+	if (trace_buf->recursion++)
+		goto end_recursion;
+
+	/*
+	 * Make recursion update visible before entering perf_tp_event
+	 * so that we protect from perf recursions.
+	 */
+	barrier();
+
+	raw_data = trace_buf->buf;
+
 	/* Zero dead bytes from alignment to avoid buffer leak to userspace */
 	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
 	entry = (struct kprobe_trace_entry *)raw_data;
@@ -1249,8 +1262,12 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
 	for (i = 0; i < tp->nr_args; i++)
 		entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
 	perf_tp_event(call->id, entry->ip, 1, entry, size);
+
+end_recursion:
+	trace_buf->recursion--;
 end:
 	local_irq_restore(irq_flags);
+
 	return 0;
 }
 
@@ -1261,6 +1278,7 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
 	struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
 	struct ftrace_event_call *call = &tp->call;
 	struct kretprobe_trace_entry *entry;
+	struct perf_trace_buf *trace_buf;
 	struct trace_entry *ent;
 	int size, __size, i, pc, __cpu;
 	unsigned long irq_flags;
@@ -1282,14 +1300,26 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
 	__cpu = smp_processor_id();
 
 	if (in_nmi())
-		raw_data = rcu_dereference(trace_profile_buf_nmi);
+		trace_buf = rcu_dereference(perf_trace_buf_nmi);
 	else
-		raw_data = rcu_dereference(trace_profile_buf);
+		trace_buf = rcu_dereference(perf_trace_buf);
 
-	if (!raw_data)
+	if (!trace_buf)
 		goto end;
 
-	raw_data = per_cpu_ptr(raw_data, __cpu);
+	trace_buf = per_cpu_ptr(trace_buf, __cpu);
+
+	if (trace_buf->recursion++)
+		goto end_recursion;
+
+	/*
+	 * Make recursion update visible before entering perf_tp_event
+	 * so that we protect from perf recursions.
+	 */
+	barrier();
+
+	raw_data = trace_buf->buf;
+
 	/* Zero dead bytes from alignment to avoid buffer leak to userspace */
 	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
 	entry = (struct kretprobe_trace_entry *)raw_data;
@@ -1303,8 +1333,12 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
 	for (i = 0; i < tp->nr_args; i++)
 		entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
 	perf_tp_event(call->id, entry->ret_ip, 1, entry, size);
+
+end_recursion:
+	trace_buf->recursion--;
 end:
 	local_irq_restore(irq_flags);
+
 	return 0;
 }
 
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 58b8e5370767..51213b0aa81b 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -477,6 +477,7 @@ static int sys_prof_refcount_exit;
 static void prof_syscall_enter(struct pt_regs *regs, long id)
 {
 	struct syscall_metadata *sys_data;
+	struct perf_trace_buf *trace_buf;
 	struct syscall_trace_enter *rec;
 	unsigned long flags;
 	char *raw_data;
@@ -507,14 +508,25 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
 	cpu = smp_processor_id();
 
 	if (in_nmi())
-		raw_data = rcu_dereference(trace_profile_buf_nmi);
+		trace_buf = rcu_dereference(perf_trace_buf_nmi);
 	else
-		raw_data = rcu_dereference(trace_profile_buf);
+		trace_buf = rcu_dereference(perf_trace_buf);
 
-	if (!raw_data)
+	if (!trace_buf)
 		goto end;
 
-	raw_data = per_cpu_ptr(raw_data, cpu);
+	trace_buf = per_cpu_ptr(trace_buf, cpu);
+
+	if (trace_buf->recursion++)
+		goto end_recursion;
+
+	/*
+	 * Make recursion update visible before entering perf_tp_event
+	 * so that we protect from perf recursions.
+	 */
+	barrier();
+
+	raw_data = trace_buf->buf;
 
 	/* zero the dead bytes from align to not leak stack to user */
 	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -527,6 +539,8 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
 			       (unsigned long *)&rec->args);
 	perf_tp_event(sys_data->enter_id, 0, 1, rec, size);
 
+end_recursion:
+	trace_buf->recursion--;
 end:
 	local_irq_restore(flags);
 }
@@ -574,6 +588,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 {
 	struct syscall_metadata *sys_data;
 	struct syscall_trace_exit *rec;
+	struct perf_trace_buf *trace_buf;
 	unsigned long flags;
 	int syscall_nr;
 	char *raw_data;
@@ -605,14 +620,25 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 	cpu = smp_processor_id();
 
 	if (in_nmi())
-		raw_data = rcu_dereference(trace_profile_buf_nmi);
+		trace_buf = rcu_dereference(perf_trace_buf_nmi);
 	else
-		raw_data = rcu_dereference(trace_profile_buf);
+		trace_buf = rcu_dereference(perf_trace_buf);
 
-	if (!raw_data)
+	if (!trace_buf)
 		goto end;
 
-	raw_data = per_cpu_ptr(raw_data, cpu);
+	trace_buf = per_cpu_ptr(trace_buf, cpu);
+
+	if (trace_buf->recursion++)
+		goto end_recursion;
+
+	/*
+	 * Make recursion update visible before entering perf_tp_event
+	 * so that we protect from perf recursions.
+	 */
+	barrier();
+
+	raw_data = trace_buf->buf;
 
 	/* zero the dead bytes from align to not leak stack to user */
 	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -626,6 +652,8 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 
 	perf_tp_event(sys_data->exit_id, 0, 1, rec, size);
 
+end_recursion:
+	trace_buf->recursion--;
 end:
 	local_irq_restore(flags);
 }
-- 
cgit v1.2.3-70-g09d2


From 24f1e32c60c45c89a997c73395b69c8af6f0a84e Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 9 Sep 2009 19:22:48 +0200
Subject: hw-breakpoints: Rewrite the hw-breakpoints layer on top of perf
 events

This patch rebase the implementation of the breakpoints API on top of
perf events instances.

Each breakpoints are now perf events that handle the
register scheduling, thread/cpu attachment, etc..

The new layering is now made as follows:

       ptrace       kgdb      ftrace   perf syscall
          \          |          /         /
           \         |         /         /
                                        /
            Core breakpoint API        /
                                      /
                     |               /
                     |              /

              Breakpoints perf events

                     |
                     |

               Breakpoints PMU ---- Debug Register constraints handling
                                    (Part of core breakpoint API)
                     |
                     |

             Hardware debug registers

Reasons of this rewrite:

- Use the centralized/optimized pmu registers scheduling,
  implying an easier arch integration
- More powerful register handling: perf attributes (pinned/flexible
  events, exclusive/non-exclusive, tunable period, etc...)

Impact:

- New perf ABI: the hardware breakpoints counters
- Ptrace breakpoints setting remains tricky and still needs some per
  thread breakpoints references.

Todo (in the order):

- Support breakpoints perf counter events for perf tools (ie: implement
  perf_bpcounter_event())
- Support from perf tools

Changes in v2:

- Follow the perf "event " rename
- The ptrace regression have been fixed (ptrace breakpoint perf events
  weren't released when a task ended)
- Drop the struct hw_breakpoint and store generic fields in
  perf_event_attr.
- Separate core and arch specific headers, drop
  asm-generic/hw_breakpoint.h and create linux/hw_breakpoint.h
- Use new generic len/type for breakpoint
- Handle off case: when breakpoints api is not supported by an arch

Changes in v3:

- Fix broken CONFIG_KVM, we need to propagate the breakpoint api
  changes to kvm when we exit the guest and restore the bp registers
  to the host.

Changes in v4:

- Drop the hw_breakpoint_restore() stub as it is only used by KVM
- EXPORT_SYMBOL_GPL hw_breakpoint_restore() as KVM can be built as a
  module
- Restore the breakpoints unconditionally on kvm guest exit:
  TIF_DEBUG_THREAD doesn't anymore cover every cases of running
  breakpoints and vcpu->arch.switch_db_regs might not always be
  set when the guest used debug registers.
  (Waiting for a reliable optimization)

Changes in v5:

- Split-up the asm-generic/hw-breakpoint.h moving to
  linux/hw_breakpoint.h into a separate patch
- Optimize the breakpoints restoring while switching from kvm guest
  to host. We only want to restore the state if we have active
  breakpoints to the host, otherwise we don't care about messed-up
  address registers.
- Add asm/hw_breakpoint.h to Kbuild
- Fix bad breakpoint type in trace_selftest.c

Changes in v6:

- Fix wrong header inclusion in trace.h (triggered a build
  error with CONFIG_FTRACE_SELFTEST

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Prasad <prasad@linux.vnet.ibm.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jan Kiszka <jan.kiszka@web.de>
Cc: Jiri Slaby <jirislaby@gmail.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Paul Mundt <lethal@linux-sh.org>
---
 arch/Kconfig                         |   3 +
 arch/x86/include/asm/Kbuild          |   1 +
 arch/x86/include/asm/debugreg.h      |  11 +-
 arch/x86/include/asm/hw_breakpoint.h |  58 +++--
 arch/x86/include/asm/processor.h     |  12 +-
 arch/x86/kernel/hw_breakpoint.c      | 391 +++++++++++++++++++++-----------
 arch/x86/kernel/process.c            |   7 +-
 arch/x86/kernel/process_32.c         |  26 +--
 arch/x86/kernel/process_64.c         |  26 +--
 arch/x86/kernel/ptrace.c             | 182 ++++++++++-----
 arch/x86/kernel/smpboot.c            |   3 -
 arch/x86/kvm/x86.c                   |  18 +-
 arch/x86/power/cpu.c                 |   6 -
 include/linux/hw_breakpoint.h        | 243 ++++++++++----------
 include/linux/perf_event.h           |  26 ++-
 kernel/exit.c                        |   5 +
 kernel/hw_breakpoint.c               | 424 ++++++++++++++---------------------
 kernel/perf_event.c                  |  53 ++++-
 kernel/trace/trace.h                 |   5 +-
 kernel/trace/trace_entries.h         |   6 +-
 kernel/trace/trace_ksym.c            | 126 +++++------
 kernel/trace/trace_selftest.c        |   3 +-
 22 files changed, 885 insertions(+), 750 deletions(-)

(limited to 'include')

diff --git a/arch/Kconfig b/arch/Kconfig
index acb664397945..eef3bbb97075 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -128,6 +128,9 @@ config HAVE_DEFAULT_NO_SPIN_MUTEXES
 
 config HAVE_HW_BREAKPOINT
 	bool
+	depends on HAVE_PERF_EVENTS
+	select ANON_INODES
+	select PERF_EVENTS
 
 
 source "kernel/gcov/Kconfig"
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 4a8e80cdcfa5..9f828f87ca35 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -10,6 +10,7 @@ header-y += ptrace-abi.h
 header-y += sigcontext32.h
 header-y += ucontext.h
 header-y += processor-flags.h
+header-y += hw_breakpoint.h
 
 unifdef-y += e820.h
 unifdef-y += ist.h
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 23439fbb1d0e..9a3333c91f9a 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -75,13 +75,8 @@
  */
 #ifdef __KERNEL__
 
-/* For process management */
-extern void flush_thread_hw_breakpoint(struct task_struct *tsk);
-extern int copy_thread_hw_breakpoint(struct task_struct *tsk,
-		struct task_struct *child, unsigned long clone_flags);
+DECLARE_PER_CPU(unsigned long, dr7);
 
-/* For CPU management */
-extern void load_debug_registers(void);
 static inline void hw_breakpoint_disable(void)
 {
 	/* Zero the control register for HW Breakpoint */
@@ -94,6 +89,10 @@ static inline void hw_breakpoint_disable(void)
 	set_debugreg(0UL, 3);
 }
 
+#ifdef CONFIG_KVM
+extern void hw_breakpoint_restore(void);
+#endif
+
 #endif	/* __KERNEL__ */
 
 #endif /* _ASM_X86_DEBUGREG_H */
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
index 3cfca8e2b5f6..0675a7c4c20e 100644
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -4,6 +4,11 @@
 #ifdef	__KERNEL__
 #define	__ARCH_HW_BREAKPOINT_H
 
+/*
+ * The name should probably be something dealt in
+ * a higher level. While dealing with the user
+ * (display/resolving)
+ */
 struct arch_hw_breakpoint {
 	char		*name; /* Contains name of the symbol to set bkpt */
 	unsigned long	address;
@@ -12,44 +17,57 @@ struct arch_hw_breakpoint {
 };
 
 #include <linux/kdebug.h>
-#include <linux/hw_breakpoint.h>
+#include <linux/percpu.h>
+#include <linux/list.h>
 
 /* Available HW breakpoint length encodings */
-#define HW_BREAKPOINT_LEN_1		0x40
-#define HW_BREAKPOINT_LEN_2		0x44
-#define HW_BREAKPOINT_LEN_4		0x4c
-#define HW_BREAKPOINT_LEN_EXECUTE	0x40
+#define X86_BREAKPOINT_LEN_1		0x40
+#define X86_BREAKPOINT_LEN_2		0x44
+#define X86_BREAKPOINT_LEN_4		0x4c
+#define X86_BREAKPOINT_LEN_EXECUTE	0x40
 
 #ifdef CONFIG_X86_64
-#define HW_BREAKPOINT_LEN_8		0x48
+#define X86_BREAKPOINT_LEN_8		0x48
 #endif
 
 /* Available HW breakpoint type encodings */
 
 /* trigger on instruction execute */
-#define HW_BREAKPOINT_EXECUTE	0x80
+#define X86_BREAKPOINT_EXECUTE	0x80
 /* trigger on memory write */
-#define HW_BREAKPOINT_WRITE	0x81
+#define X86_BREAKPOINT_WRITE	0x81
 /* trigger on memory read or write */
-#define HW_BREAKPOINT_RW	0x83
+#define X86_BREAKPOINT_RW	0x83
 
 /* Total number of available HW breakpoint registers */
 #define HBP_NUM 4
 
-extern struct hw_breakpoint *hbp_kernel[HBP_NUM];
-DECLARE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HBP_NUM]);
-extern unsigned int hbp_user_refcount[HBP_NUM];
+struct perf_event;
+struct pmu;
 
-extern void arch_install_thread_hw_breakpoint(struct task_struct *tsk);
-extern void arch_uninstall_thread_hw_breakpoint(void);
 extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len);
-extern int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp,
-						struct task_struct *tsk);
-extern void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk);
-extern void arch_flush_thread_hw_breakpoint(struct task_struct *tsk);
-extern void arch_update_kernel_hw_breakpoint(void *);
+extern int arch_validate_hwbkpt_settings(struct perf_event *bp,
+					 struct task_struct *tsk);
 extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
-				     unsigned long val, void *data);
+					   unsigned long val, void *data);
+
+
+int arch_install_hw_breakpoint(struct perf_event *bp);
+void arch_uninstall_hw_breakpoint(struct perf_event *bp);
+void hw_breakpoint_pmu_read(struct perf_event *bp);
+void hw_breakpoint_pmu_unthrottle(struct perf_event *bp);
+
+extern void
+arch_fill_perf_breakpoint(struct perf_event *bp);
+
+unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type);
+int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type);
+
+extern int arch_bp_generic_fields(int x86_len, int x86_type,
+				  int *gen_len, int *gen_type);
+
+extern struct pmu perf_ops_bp;
+
 #endif	/* __KERNEL__ */
 #endif	/* _I386_HW_BREAKPOINT_H */
 
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 61aafb71c7ef..820f3000f736 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -423,6 +423,8 @@ extern unsigned int xstate_size;
 extern void free_thread_xstate(struct task_struct *);
 extern struct kmem_cache *task_xstate_cachep;
 
+struct perf_event;
+
 struct thread_struct {
 	/* Cached TLS descriptors: */
 	struct desc_struct	tls_array[GDT_ENTRY_TLS_ENTRIES];
@@ -444,12 +446,10 @@ struct thread_struct {
 	unsigned long		fs;
 #endif
 	unsigned long		gs;
-	/* Hardware debugging registers: */
-	unsigned long		debugreg[HBP_NUM];
-	unsigned long		debugreg6;
-	unsigned long		debugreg7;
-	/* Hardware breakpoint info */
-	struct hw_breakpoint	*hbp[HBP_NUM];
+	/* Save middle states of ptrace breakpoints */
+	struct perf_event	*ptrace_bps[HBP_NUM];
+	/* Debug status used for traps, single steps, etc... */
+	unsigned long           debugreg6;
 	/* Fault info: */
 	unsigned long		cr2;
 	unsigned long		trap_no;
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 9316a9de4de3..e622620790bd 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -15,6 +15,7 @@
  *
  * Copyright (C) 2007 Alan Stern
  * Copyright (C) 2009 IBM Corporation
+ * Copyright (C) 2009 Frederic Weisbecker <fweisbec@gmail.com>
  */
 
 /*
@@ -22,6 +23,8 @@
  * using the CPU's debug registers.
  */
 
+#include <linux/perf_event.h>
+#include <linux/hw_breakpoint.h>
 #include <linux/irqflags.h>
 #include <linux/notifier.h>
 #include <linux/kallsyms.h>
@@ -38,26 +41,24 @@
 #include <asm/processor.h>
 #include <asm/debugreg.h>
 
-/* Unmasked kernel DR7 value */
-static unsigned long kdr7;
+/* Per cpu debug control register value */
+DEFINE_PER_CPU(unsigned long, dr7);
+
+/* Per cpu debug address registers values */
+static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]);
 
 /*
- * Masks for the bits corresponding to registers DR0 - DR3 in DR7 register.
- * Used to clear and verify the status of bits corresponding to DR0 - DR3
+ * Stores the breakpoints currently in use on each breakpoint address
+ * register for each cpus
  */
-static const unsigned long	dr7_masks[HBP_NUM] = {
-	0x000f0003,	/* LEN0, R/W0, G0, L0 */
-	0x00f0000c,	/* LEN1, R/W1, G1, L1 */
-	0x0f000030,	/* LEN2, R/W2, G2, L2 */
-	0xf00000c0	/* LEN3, R/W3, G3, L3 */
-};
+static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]);
 
 
 /*
  * Encode the length, type, Exact, and Enable bits for a particular breakpoint
  * as stored in debug register 7.
  */
-static unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type)
+unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type)
 {
 	unsigned long bp_info;
 
@@ -68,64 +69,89 @@ static unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type)
 	return bp_info;
 }
 
-void arch_update_kernel_hw_breakpoint(void *unused)
+/*
+ * Decode the length and type bits for a particular breakpoint as
+ * stored in debug register 7.  Return the "enabled" status.
+ */
+int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type)
 {
-	struct hw_breakpoint *bp;
-	int i, cpu = get_cpu();
-	unsigned long temp_kdr7 = 0;
-
-	/* Don't allow debug exceptions while we update the registers */
-	set_debugreg(0UL, 7);
+	int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE);
 
-	for (i = hbp_kernel_pos; i < HBP_NUM; i++) {
-		per_cpu(this_hbp_kernel[i], cpu) = bp = hbp_kernel[i];
-		if (bp) {
-			temp_kdr7 |= encode_dr7(i, bp->info.len, bp->info.type);
-			set_debugreg(bp->info.address, i);
-		}
-	}
+	*len = (bp_info & 0xc) | 0x40;
+	*type = (bp_info & 0x3) | 0x80;
 
-	/* No need to set DR6. Update the debug registers with kernel-space
-	 * breakpoint values from kdr7 and user-space requests from the
-	 * current process
-	 */
-	kdr7 = temp_kdr7;
-	set_debugreg(kdr7 | current->thread.debugreg7, 7);
-	put_cpu();
+	return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3;
 }
 
 /*
- * Install the thread breakpoints in their debug registers.
+ * Install a perf counter breakpoint.
+ *
+ * We seek a free debug address register and use it for this
+ * breakpoint. Eventually we enable it in the debug control register.
+ *
+ * Atomic: we hold the counter->ctx->lock and we only handle variables
+ * and registers local to this cpu.
  */
-void arch_install_thread_hw_breakpoint(struct task_struct *tsk)
+int arch_install_hw_breakpoint(struct perf_event *bp)
 {
-	struct thread_struct *thread = &(tsk->thread);
-
-	switch (hbp_kernel_pos) {
-	case 4:
-		set_debugreg(thread->debugreg[3], 3);
-	case 3:
-		set_debugreg(thread->debugreg[2], 2);
-	case 2:
-		set_debugreg(thread->debugreg[1], 1);
-	case 1:
-		set_debugreg(thread->debugreg[0], 0);
-	default:
-		break;
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+	unsigned long *dr7;
+	int i;
+
+	for (i = 0; i < HBP_NUM; i++) {
+		struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]);
+
+		if (!*slot) {
+			*slot = bp;
+			break;
+		}
 	}
 
-	/* No need to set DR6 */
-	set_debugreg((kdr7 | thread->debugreg7), 7);
+	if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
+		return -EBUSY;
+
+	set_debugreg(info->address, i);
+	__get_cpu_var(cpu_debugreg[i]) = info->address;
+
+	dr7 = &__get_cpu_var(dr7);
+	*dr7 |= encode_dr7(i, info->len, info->type);
+
+	set_debugreg(*dr7, 7);
+
+	return 0;
 }
 
 /*
- * Install the debug register values for just the kernel, no thread.
+ * Uninstall the breakpoint contained in the given counter.
+ *
+ * First we search the debug address register it uses and then we disable
+ * it.
+ *
+ * Atomic: we hold the counter->ctx->lock and we only handle variables
+ * and registers local to this cpu.
  */
-void arch_uninstall_thread_hw_breakpoint(void)
+void arch_uninstall_hw_breakpoint(struct perf_event *bp)
 {
-	/* Clear the user-space portion of debugreg7 by setting only kdr7 */
-	set_debugreg(kdr7, 7);
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+	unsigned long *dr7;
+	int i;
+
+	for (i = 0; i < HBP_NUM; i++) {
+		struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]);
+
+		if (*slot == bp) {
+			*slot = NULL;
+			break;
+		}
+	}
+
+	if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
+		return;
 
+	dr7 = &__get_cpu_var(dr7);
+	*dr7 &= ~encode_dr7(i, info->len, info->type);
+
+	set_debugreg(*dr7, 7);
 }
 
 static int get_hbp_len(u8 hbp_len)
@@ -133,17 +159,17 @@ static int get_hbp_len(u8 hbp_len)
 	unsigned int len_in_bytes = 0;
 
 	switch (hbp_len) {
-	case HW_BREAKPOINT_LEN_1:
+	case X86_BREAKPOINT_LEN_1:
 		len_in_bytes = 1;
 		break;
-	case HW_BREAKPOINT_LEN_2:
+	case X86_BREAKPOINT_LEN_2:
 		len_in_bytes = 2;
 		break;
-	case HW_BREAKPOINT_LEN_4:
+	case X86_BREAKPOINT_LEN_4:
 		len_in_bytes = 4;
 		break;
 #ifdef CONFIG_X86_64
-	case HW_BREAKPOINT_LEN_8:
+	case X86_BREAKPOINT_LEN_8:
 		len_in_bytes = 8;
 		break;
 #endif
@@ -178,67 +204,146 @@ static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len)
 /*
  * Store a breakpoint's encoded address, length, and type.
  */
-static int arch_store_info(struct hw_breakpoint *bp, struct task_struct *tsk)
+static int arch_store_info(struct perf_event *bp)
 {
-	/*
-	 * User-space requests will always have the address field populated
-	 * Symbol names from user-space are rejected
-	 */
-	if (tsk && bp->info.name)
-		return -EINVAL;
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
 	/*
 	 * For kernel-addresses, either the address or symbol name can be
 	 * specified.
 	 */
-	if (bp->info.name)
-		bp->info.address = (unsigned long)
-					kallsyms_lookup_name(bp->info.name);
-	if (bp->info.address)
+	if (info->name)
+		info->address = (unsigned long)
+				kallsyms_lookup_name(info->name);
+	if (info->address)
 		return 0;
+
 	return -EINVAL;
 }
 
-/*
- * Validate the arch-specific HW Breakpoint register settings
- */
-int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp,
-						struct task_struct *tsk)
+int arch_bp_generic_fields(int x86_len, int x86_type,
+			   int *gen_len, int *gen_type)
 {
-	unsigned int align;
-	int ret = -EINVAL;
+	/* Len */
+	switch (x86_len) {
+	case X86_BREAKPOINT_LEN_1:
+		*gen_len = HW_BREAKPOINT_LEN_1;
+		break;
+	case X86_BREAKPOINT_LEN_2:
+		*gen_len = HW_BREAKPOINT_LEN_2;
+		break;
+	case X86_BREAKPOINT_LEN_4:
+		*gen_len = HW_BREAKPOINT_LEN_4;
+		break;
+#ifdef CONFIG_X86_64
+	case X86_BREAKPOINT_LEN_8:
+		*gen_len = HW_BREAKPOINT_LEN_8;
+		break;
+#endif
+	default:
+		return -EINVAL;
+	}
 
-	switch (bp->info.type) {
-	/*
-	 * Ptrace-refactoring code
-	 * For now, we'll allow instruction breakpoint only for user-space
-	 * addresses
-	 */
-	case HW_BREAKPOINT_EXECUTE:
-		if ((!arch_check_va_in_userspace(bp->info.address,
-							bp->info.len)) &&
-			bp->info.len != HW_BREAKPOINT_LEN_EXECUTE)
-			return ret;
+	/* Type */
+	switch (x86_type) {
+	case X86_BREAKPOINT_EXECUTE:
+		*gen_type = HW_BREAKPOINT_X;
 		break;
-	case HW_BREAKPOINT_WRITE:
+	case X86_BREAKPOINT_WRITE:
+		*gen_type = HW_BREAKPOINT_W;
 		break;
-	case HW_BREAKPOINT_RW:
+	case X86_BREAKPOINT_RW:
+		*gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
 		break;
 	default:
-		return ret;
+		return -EINVAL;
 	}
 
-	switch (bp->info.len) {
+	return 0;
+}
+
+
+static int arch_build_bp_info(struct perf_event *bp)
+{
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+
+	info->address = bp->attr.bp_addr;
+
+	/* Len */
+	switch (bp->attr.bp_len) {
 	case HW_BREAKPOINT_LEN_1:
-		align = 0;
+		info->len = X86_BREAKPOINT_LEN_1;
 		break;
 	case HW_BREAKPOINT_LEN_2:
-		align = 1;
+		info->len = X86_BREAKPOINT_LEN_2;
 		break;
 	case HW_BREAKPOINT_LEN_4:
-		align = 3;
+		info->len = X86_BREAKPOINT_LEN_4;
 		break;
 #ifdef CONFIG_X86_64
 	case HW_BREAKPOINT_LEN_8:
+		info->len = X86_BREAKPOINT_LEN_8;
+		break;
+#endif
+	default:
+		return -EINVAL;
+	}
+
+	/* Type */
+	switch (bp->attr.bp_type) {
+	case HW_BREAKPOINT_W:
+		info->type = X86_BREAKPOINT_WRITE;
+		break;
+	case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
+		info->type = X86_BREAKPOINT_RW;
+		break;
+	case HW_BREAKPOINT_X:
+		info->type = X86_BREAKPOINT_EXECUTE;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+/*
+ * Validate the arch-specific HW Breakpoint register settings
+ */
+int arch_validate_hwbkpt_settings(struct perf_event *bp,
+				  struct task_struct *tsk)
+{
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+	unsigned int align;
+	int ret;
+
+
+	ret = arch_build_bp_info(bp);
+	if (ret)
+		return ret;
+
+	ret = -EINVAL;
+
+	if (info->type == X86_BREAKPOINT_EXECUTE)
+		/*
+		 * Ptrace-refactoring code
+		 * For now, we'll allow instruction breakpoint only for user-space
+		 * addresses
+		 */
+		if ((!arch_check_va_in_userspace(info->address, info->len)) &&
+			info->len != X86_BREAKPOINT_EXECUTE)
+			return ret;
+
+	switch (info->len) {
+	case X86_BREAKPOINT_LEN_1:
+		align = 0;
+		break;
+	case X86_BREAKPOINT_LEN_2:
+		align = 1;
+		break;
+	case X86_BREAKPOINT_LEN_4:
+		align = 3;
+		break;
+#ifdef CONFIG_X86_64
+	case X86_BREAKPOINT_LEN_8:
 		align = 7;
 		break;
 #endif
@@ -246,8 +351,8 @@ int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp,
 		return ret;
 	}
 
-	if (bp->triggered)
-		ret = arch_store_info(bp, tsk);
+	if (bp->callback)
+		ret = arch_store_info(bp);
 
 	if (ret < 0)
 		return ret;
@@ -255,44 +360,47 @@ int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp,
 	 * Check that the low-order bits of the address are appropriate
 	 * for the alignment implied by len.
 	 */
-	if (bp->info.address & align)
+	if (info->address & align)
 		return -EINVAL;
 
 	/* Check that the virtual address is in the proper range */
 	if (tsk) {
-		if (!arch_check_va_in_userspace(bp->info.address, bp->info.len))
+		if (!arch_check_va_in_userspace(info->address, info->len))
 			return -EFAULT;
 	} else {
-		if (!arch_check_va_in_kernelspace(bp->info.address,
-								bp->info.len))
+		if (!arch_check_va_in_kernelspace(info->address, info->len))
 			return -EFAULT;
 	}
+
 	return 0;
 }
 
-void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk)
+/*
+ * Release the user breakpoints used by ptrace
+ */
+void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
 {
-	struct thread_struct *thread = &(tsk->thread);
-	struct hw_breakpoint *bp = thread->hbp[pos];
-
-	thread->debugreg7 &= ~dr7_masks[pos];
-	if (bp) {
-		thread->debugreg[pos] = bp->info.address;
-		thread->debugreg7 |= encode_dr7(pos, bp->info.len,
-							bp->info.type);
-	} else
-		thread->debugreg[pos] = 0;
+	int i;
+	struct thread_struct *t = &tsk->thread;
+
+	for (i = 0; i < HBP_NUM; i++) {
+		unregister_hw_breakpoint(t->ptrace_bps[i]);
+		t->ptrace_bps[i] = NULL;
+	}
 }
 
-void arch_flush_thread_hw_breakpoint(struct task_struct *tsk)
+#ifdef CONFIG_KVM
+void hw_breakpoint_restore(void)
 {
-	int i;
-	struct thread_struct *thread = &(tsk->thread);
-
-	thread->debugreg7 = 0;
-	for (i = 0; i < HBP_NUM; i++)
-		thread->debugreg[i] = 0;
+	set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0);
+	set_debugreg(__get_cpu_var(cpu_debugreg[1]), 1);
+	set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2);
+	set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3);
+	set_debugreg(current->thread.debugreg6, 6);
+	set_debugreg(__get_cpu_var(dr7), 7);
 }
+EXPORT_SYMBOL_GPL(hw_breakpoint_restore);
+#endif
 
 /*
  * Handle debug exception notifications.
@@ -313,7 +421,7 @@ void arch_flush_thread_hw_breakpoint(struct task_struct *tsk)
 static int __kprobes hw_breakpoint_handler(struct die_args *args)
 {
 	int i, cpu, rc = NOTIFY_STOP;
-	struct hw_breakpoint *bp;
+	struct perf_event *bp;
 	unsigned long dr7, dr6;
 	unsigned long *dr6_p;
 
@@ -325,10 +433,6 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
 	if ((dr6 & DR_TRAP_BITS) == 0)
 		return NOTIFY_DONE;
 
-	/* Lazy debug register switching */
-	if (!test_tsk_thread_flag(current, TIF_DEBUG))
-		arch_uninstall_thread_hw_breakpoint();
-
 	get_debugreg(dr7, 7);
 	/* Disable breakpoints during exception handling */
 	set_debugreg(0UL, 7);
@@ -344,17 +448,18 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
 	for (i = 0; i < HBP_NUM; ++i) {
 		if (likely(!(dr6 & (DR_TRAP0 << i))))
 			continue;
+
 		/*
-		 * Find the corresponding hw_breakpoint structure and
-		 * invoke its triggered callback.
+		 * The counter may be concurrently released but that can only
+		 * occur from a call_rcu() path. We can then safely fetch
+		 * the breakpoint, use its callback, touch its counter
+		 * while we are in an rcu_read_lock() path.
 		 */
-		if (i >= hbp_kernel_pos)
-			bp = per_cpu(this_hbp_kernel[i], cpu);
-		else {
-			bp = current->thread.hbp[i];
-			if (bp)
-				rc = NOTIFY_DONE;
-		}
+		rcu_read_lock();
+
+		bp = per_cpu(bp_per_reg[i], cpu);
+		if (bp)
+			rc = NOTIFY_DONE;
 		/*
 		 * Reset the 'i'th TRAP bit in dr6 to denote completion of
 		 * exception handling
@@ -362,19 +467,23 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
 		(*dr6_p) &= ~(DR_TRAP0 << i);
 		/*
 		 * bp can be NULL due to lazy debug register switching
-		 * or due to the delay between updates of hbp_kernel_pos
-		 * and this_hbp_kernel.
+		 * or due to concurrent perf counter removing.
 		 */
-		if (!bp)
-			continue;
+		if (!bp) {
+			rcu_read_unlock();
+			break;
+		}
+
+		(bp->callback)(bp, args->regs);
 
-		(bp->triggered)(bp, args->regs);
+		rcu_read_unlock();
 	}
 	if (dr6 & (~DR_TRAP_BITS))
 		rc = NOTIFY_DONE;
 
 	set_debugreg(dr7, 7);
 	put_cpu();
+
 	return rc;
 }
 
@@ -389,3 +498,13 @@ int __kprobes hw_breakpoint_exceptions_notify(
 
 	return hw_breakpoint_handler(data);
 }
+
+void hw_breakpoint_pmu_read(struct perf_event *bp)
+{
+	/* TODO */
+}
+
+void hw_breakpoint_pmu_unthrottle(struct perf_event *bp)
+{
+	/* TODO */
+}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index cf8ee0016307..744508e7cfdd 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -10,6 +10,7 @@
 #include <linux/clockchips.h>
 #include <linux/random.h>
 #include <trace/events/power.h>
+#include <linux/hw_breakpoint.h>
 #include <asm/system.h>
 #include <asm/apic.h>
 #include <asm/syscalls.h>
@@ -18,7 +19,6 @@
 #include <asm/i387.h>
 #include <asm/ds.h>
 #include <asm/debugreg.h>
-#include <asm/hw_breakpoint.h>
 
 unsigned long idle_halt;
 EXPORT_SYMBOL(idle_halt);
@@ -47,8 +47,6 @@ void free_thread_xstate(struct task_struct *tsk)
 		kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
 		tsk->thread.xstate = NULL;
 	}
-	if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
-		flush_thread_hw_breakpoint(tsk);
 
 	WARN(tsk->thread.ds_ctx, "leaking DS context\n");
 }
@@ -107,8 +105,7 @@ void flush_thread(void)
 	}
 #endif
 
-	if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
-		flush_thread_hw_breakpoint(tsk);
+	flush_ptrace_hw_breakpoint(tsk);
 	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
 	/*
 	 * Forget coprocessor state..
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 209e74801763..d5bd3132ee70 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -59,7 +59,6 @@
 #include <asm/syscalls.h>
 #include <asm/ds.h>
 #include <asm/debugreg.h>
-#include <asm/hw_breakpoint.h>
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 
@@ -264,9 +263,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 	p->thread.io_bitmap_ptr = NULL;
 	tsk = current;
 	err = -ENOMEM;
-	if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
-		if (copy_thread_hw_breakpoint(tsk, p, clone_flags))
-			goto out;
+
+	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
 
 	if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
 		p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
@@ -287,13 +285,10 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 		err = do_set_thread_area(p, -1,
 			(struct user_desc __user *)childregs->si, 0);
 
-out:
 	if (err && p->thread.io_bitmap_ptr) {
 		kfree(p->thread.io_bitmap_ptr);
 		p->thread.io_bitmap_max = 0;
 	}
-	if (err)
-		flush_thread_hw_breakpoint(p);
 
 	clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
 	p->thread.ds_ctx = NULL;
@@ -437,23 +432,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 		lazy_load_gs(next->gs);
 
 	percpu_write(current_task, next_p);
-	/*
-	 * There's a problem with moving the arch_install_thread_hw_breakpoint()
-	 * call before current is updated.  Suppose a kernel breakpoint is
-	 * triggered in between the two, the hw-breakpoint handler will see that
-	 * the 'current' task does not have TIF_DEBUG flag set and will think it
-	 * is leftover from an old task (lazy switching) and will erase it. Then
-	 * until the next context switch, no user-breakpoints will be installed.
-	 *
-	 * The real problem is that it's impossible to update both current and
-	 * physical debug registers at the same instant, so there will always be
-	 * a window in which they disagree and a breakpoint might get triggered.
-	 * Since we use lazy switching, we are forced to assume that a
-	 * disagreement means that current is correct and the exception is due
-	 * to lazy debug register switching.
-	 */
-	if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
-		arch_install_thread_hw_breakpoint(next_p);
 
 	return prev_p;
 }
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 72edac026a78..5bafdec34441 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -53,7 +53,6 @@
 #include <asm/syscalls.h>
 #include <asm/ds.h>
 #include <asm/debugreg.h>
-#include <asm/hw_breakpoint.h>
 
 asmlinkage extern void ret_from_fork(void);
 
@@ -244,8 +243,6 @@ void release_thread(struct task_struct *dead_task)
 			BUG();
 		}
 	}
-	if (unlikely(dead_task->thread.debugreg7))
-		flush_thread_hw_breakpoint(dead_task);
 }
 
 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
@@ -309,9 +306,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 	savesegment(ds, p->thread.ds);
 
 	err = -ENOMEM;
-	if (unlikely(test_tsk_thread_flag(me, TIF_DEBUG)))
-		if (copy_thread_hw_breakpoint(me, p, clone_flags))
-			goto out;
+	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
 
 	if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
 		p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
@@ -351,8 +346,6 @@ out:
 		kfree(p->thread.io_bitmap_ptr);
 		p->thread.io_bitmap_max = 0;
 	}
-	if (err)
-		flush_thread_hw_breakpoint(p);
 
 	return err;
 }
@@ -508,23 +501,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	 */
 	if (preload_fpu)
 		__math_state_restore();
-	/*
-	 * There's a problem with moving the arch_install_thread_hw_breakpoint()
-	 * call before current is updated.  Suppose a kernel breakpoint is
-	 * triggered in between the two, the hw-breakpoint handler will see that
-	 * the 'current' task does not have TIF_DEBUG flag set and will think it
-	 * is leftover from an old task (lazy switching) and will erase it. Then
-	 * until the next context switch, no user-breakpoints will be installed.
-	 *
-	 * The real problem is that it's impossible to update both current and
-	 * physical debug registers at the same instant, so there will always be
-	 * a window in which they disagree and a breakpoint might get triggered.
-	 * Since we use lazy switching, we are forced to assume that a
-	 * disagreement means that current is correct and the exception is due
-	 * to lazy debug register switching.
-	 */
-	if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
-		arch_install_thread_hw_breakpoint(next_p);
 
 	return prev_p;
 }
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 267cb85b479c..e79610d95971 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -22,6 +22,8 @@
 #include <linux/seccomp.h>
 #include <linux/signal.h>
 #include <linux/workqueue.h>
+#include <linux/perf_event.h>
+#include <linux/hw_breakpoint.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -441,54 +443,59 @@ static int genregs_set(struct task_struct *target,
 	return ret;
 }
 
-/*
- * Decode the length and type bits for a particular breakpoint as
- * stored in debug register 7.  Return the "enabled" status.
- */
-static int decode_dr7(unsigned long dr7, int bpnum, unsigned *len,
-		unsigned *type)
-{
-	int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE);
-
-	*len = (bp_info & 0xc) | 0x40;
-	*type = (bp_info & 0x3) | 0x80;
-	return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3;
-}
-
-static void ptrace_triggered(struct hw_breakpoint *bp, struct pt_regs *regs)
+static void ptrace_triggered(struct perf_event *bp, void *data)
 {
-	struct thread_struct *thread = &(current->thread);
 	int i;
+	struct thread_struct *thread = &(current->thread);
 
 	/*
 	 * Store in the virtual DR6 register the fact that the breakpoint
 	 * was hit so the thread's debugger will see it.
 	 */
-	for (i = 0; i < hbp_kernel_pos; i++)
-		/*
-		 * We will check bp->info.address against the address stored in
-		 * thread's hbp structure and not debugreg[i]. This is to ensure
-		 * that the corresponding bit for 'i' in DR7 register is enabled
-		 */
-		if (bp->info.address == thread->hbp[i]->info.address)
+	for (i = 0; i < HBP_NUM; i++) {
+		if (thread->ptrace_bps[i] == bp)
 			break;
+	}
 
 	thread->debugreg6 |= (DR_TRAP0 << i);
 }
 
+/*
+ * Walk through every ptrace breakpoints for this thread and
+ * build the dr7 value on top of their attributes.
+ *
+ */
+static unsigned long ptrace_get_dr7(struct perf_event *bp[])
+{
+	int i;
+	int dr7 = 0;
+	struct arch_hw_breakpoint *info;
+
+	for (i = 0; i < HBP_NUM; i++) {
+		if (bp[i] && !bp[i]->attr.disabled) {
+			info = counter_arch_bp(bp[i]);
+			dr7 |= encode_dr7(i, info->len, info->type);
+		}
+	}
+
+	return dr7;
+}
+
 /*
  * Handle ptrace writes to debug register 7.
  */
 static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
 {
 	struct thread_struct *thread = &(tsk->thread);
-	unsigned long old_dr7 = thread->debugreg7;
+	unsigned long old_dr7;
 	int i, orig_ret = 0, rc = 0;
 	int enabled, second_pass = 0;
 	unsigned len, type;
-	struct hw_breakpoint *bp;
+	int gen_len, gen_type;
+	struct perf_event *bp;
 
 	data &= ~DR_CONTROL_RESERVED;
+	old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
 restore:
 	/*
 	 * Loop through all the hardware breakpoints, making the
@@ -496,11 +503,12 @@ restore:
 	 */
 	for (i = 0; i < HBP_NUM; i++) {
 		enabled = decode_dr7(data, i, &len, &type);
-		bp = thread->hbp[i];
+		bp = thread->ptrace_bps[i];
 
 		if (!enabled) {
 			if (bp) {
-				/* Don't unregister the breakpoints right-away,
+				/*
+				 * Don't unregister the breakpoints right-away,
 				 * unless all register_user_hw_breakpoint()
 				 * requests have succeeded. This prevents
 				 * any window of opportunity for debug
@@ -508,27 +516,45 @@ restore:
 				 */
 				if (!second_pass)
 					continue;
-				unregister_user_hw_breakpoint(tsk, bp);
-				kfree(bp);
+				thread->ptrace_bps[i] = NULL;
+				unregister_hw_breakpoint(bp);
 			}
 			continue;
 		}
+
+		/*
+		 * We shoud have at least an inactive breakpoint at this
+		 * slot. It means the user is writing dr7 without having
+		 * written the address register first
+		 */
 		if (!bp) {
-			rc = -ENOMEM;
-			bp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL);
-			if (bp) {
-				bp->info.address = thread->debugreg[i];
-				bp->triggered = ptrace_triggered;
-				bp->info.len = len;
-				bp->info.type = type;
-				rc = register_user_hw_breakpoint(tsk, bp);
-				if (rc)
-					kfree(bp);
-			}
-		} else
-			rc = modify_user_hw_breakpoint(tsk, bp);
+			rc = -EINVAL;
+			break;
+		}
+
+		rc = arch_bp_generic_fields(len, type, &gen_len, &gen_type);
 		if (rc)
 			break;
+
+		/*
+		 * This is a temporary thing as bp is unregistered/registered
+		 * to simulate modification
+		 */
+		bp = modify_user_hw_breakpoint(bp, bp->attr.bp_addr, gen_len,
+					       gen_type, bp->callback,
+					       tsk, true);
+		thread->ptrace_bps[i] = NULL;
+
+		if (!bp) { /* incorrect bp, or we have a bug in bp API */
+			rc = -EINVAL;
+			break;
+		}
+		if (IS_ERR(bp)) {
+			rc = PTR_ERR(bp);
+			bp = NULL;
+			break;
+		}
+		thread->ptrace_bps[i] = bp;
 	}
 	/*
 	 * Make a second pass to free the remaining unused breakpoints
@@ -553,15 +579,63 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
 	struct thread_struct *thread = &(tsk->thread);
 	unsigned long val = 0;
 
-	if (n < HBP_NUM)
-		val = thread->debugreg[n];
-	else if (n == 6)
+	if (n < HBP_NUM) {
+		struct perf_event *bp;
+		bp = thread->ptrace_bps[n];
+		if (!bp)
+			return 0;
+		val = bp->hw.info.address;
+	} else if (n == 6) {
 		val = thread->debugreg6;
-	else if (n == 7)
-		val = thread->debugreg7;
+	 } else if (n == 7) {
+		val = ptrace_get_dr7(thread->ptrace_bps);
+	}
 	return val;
 }
 
+static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
+				      unsigned long addr)
+{
+	struct perf_event *bp;
+	struct thread_struct *t = &tsk->thread;
+
+	if (!t->ptrace_bps[nr]) {
+		/*
+		 * Put stub len and type to register (reserve) an inactive but
+		 * correct bp
+		 */
+		bp = register_user_hw_breakpoint(addr, HW_BREAKPOINT_LEN_1,
+						 HW_BREAKPOINT_W,
+						 ptrace_triggered, tsk,
+						 false);
+	} else {
+		bp = t->ptrace_bps[nr];
+		t->ptrace_bps[nr] = NULL;
+		bp = modify_user_hw_breakpoint(bp, addr, bp->attr.bp_len,
+					       bp->attr.bp_type,
+					       bp->callback,
+					       tsk,
+					       bp->attr.disabled);
+	}
+
+	if (!bp)
+		return -EIO;
+	/*
+	 * CHECKME: the previous code returned -EIO if the addr wasn't a
+	 * valid task virtual addr. The new one will return -EINVAL in this
+	 * case.
+	 * -EINVAL may be what we want for in-kernel breakpoints users, but
+	 * -EIO looks better for ptrace, since we refuse a register writing
+	 * for the user. And anyway this is the previous behaviour.
+	 */
+	if (IS_ERR(bp))
+		return PTR_ERR(bp);
+
+	t->ptrace_bps[nr] = bp;
+
+	return 0;
+}
+
 /*
  * Handle PTRACE_POKEUSR calls for the debug register area.
  */
@@ -575,19 +649,13 @@ int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val)
 		return -EIO;
 
 	if (n == 6) {
-		tsk->thread.debugreg6 = val;
+		thread->debugreg6 = val;
 		goto ret_path;
 	}
 	if (n < HBP_NUM) {
-		if (thread->hbp[n]) {
-			if (arch_check_va_in_userspace(val,
-					thread->hbp[n]->info.len) == 0) {
-				rc = -EIO;
-				goto ret_path;
-			}
-			thread->hbp[n]->info.address = val;
-		}
-		thread->debugreg[n] = val;
+		rc = ptrace_set_breakpoint_addr(tsk, n, val);
+		if (rc)
+			return rc;
 	}
 	/* All that's left is DR7 */
 	if (n == 7)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 213a7a3e4562..565ebc65920e 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -64,7 +64,6 @@
 #include <asm/apic.h>
 #include <asm/setup.h>
 #include <asm/uv/uv.h>
-#include <asm/debugreg.h>
 #include <linux/mc146818rtc.h>
 
 #include <asm/smpboot_hooks.h>
@@ -328,7 +327,6 @@ notrace static void __cpuinit start_secondary(void *unused)
 	x86_cpuinit.setup_percpu_clockev();
 
 	wmb();
-	load_debug_registers();
 	cpu_idle();
 }
 
@@ -1269,7 +1267,6 @@ void cpu_disable_common(void)
 	remove_cpu_from_maps(cpu);
 	unlock_vector_lock();
 	fixup_irqs();
-	hw_breakpoint_disable();
 }
 
 int native_cpu_disable(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fc2974adf9b6..22dee7aa7813 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -42,6 +42,7 @@
 #define CREATE_TRACE_POINTS
 #include "trace.h"
 
+#include <asm/debugreg.h>
 #include <asm/uaccess.h>
 #include <asm/msr.h>
 #include <asm/desc.h>
@@ -3643,14 +3644,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	trace_kvm_entry(vcpu->vcpu_id);
 	kvm_x86_ops->run(vcpu, kvm_run);
 
-	if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) {
-		set_debugreg(current->thread.debugreg[0], 0);
-		set_debugreg(current->thread.debugreg[1], 1);
-		set_debugreg(current->thread.debugreg[2], 2);
-		set_debugreg(current->thread.debugreg[3], 3);
-		set_debugreg(current->thread.debugreg6, 6);
-		set_debugreg(current->thread.debugreg7, 7);
-	}
+	/*
+	 * If the guest has used debug registers, at least dr7
+	 * will be disabled while returning to the host.
+	 * If we don't have active breakpoints in the host, we don't
+	 * care about the messed up debug address registers. But if
+	 * we have some of them active, restore the old state.
+	 */
+	if (__get_cpu_var(dr7) & DR_GLOBAL_ENABLE_MASK)
+		hw_breakpoint_restore();
 
 	set_bit(KVM_REQ_KICK, &vcpu->requests);
 	local_irq_enable();
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index e09a44fc4664..0a979f3e5b8a 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -105,7 +105,6 @@ static void __save_processor_state(struct saved_context *ctxt)
 	ctxt->cr4 = read_cr4();
 	ctxt->cr8 = read_cr8();
 #endif
-	hw_breakpoint_disable();
 }
 
 /* Needed by apm.c */
@@ -144,11 +143,6 @@ static void fix_processor_context(void)
 #endif
 	load_TR_desc();				/* This does ltr */
 	load_LDT(&current->active_mm->context);	/* This does lldt */
-
-	/*
-	 * Now maybe reload the debug registers
-	 */
-	load_debug_registers();
 }
 
 /**
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 61ccc8f17eac..7eba9b92e5f3 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -1,136 +1,131 @@
 #ifndef _LINUX_HW_BREAKPOINT_H
 #define _LINUX_HW_BREAKPOINT_H
 
+#include <linux/perf_event.h>
 
-#ifdef	__KERNEL__
-#include <linux/list.h>
-#include <linux/types.h>
-#include <linux/kallsyms.h>
-
-/**
- * struct hw_breakpoint - unified kernel/user-space hardware breakpoint
- * @triggered: callback invoked after target address access
- * @info: arch-specific breakpoint info (address, length, and type)
- *
- * %hw_breakpoint structures are the kernel's way of representing
- * hardware breakpoints.  These are data breakpoints
- * (also known as "watchpoints", triggered on data access), and the breakpoint's
- * target address can be located in either kernel space or user space.
- *
- * The breakpoint's address, length, and type are highly
- * architecture-specific.  The values are encoded in the @info field; you
- * specify them when registering the breakpoint.  To examine the encoded
- * values use hw_breakpoint_get_{kaddress,uaddress,len,type}(), declared
- * below.
- *
- * The address is specified as a regular kernel pointer (for kernel-space
- * breakponts) or as an %__user pointer (for user-space breakpoints).
- * With register_user_hw_breakpoint(), the address must refer to a
- * location in user space.  The breakpoint will be active only while the
- * requested task is running.  Conversely with
- * register_kernel_hw_breakpoint(), the address must refer to a location
- * in kernel space, and the breakpoint will be active on all CPUs
- * regardless of the current task.
- *
- * The length is the breakpoint's extent in bytes, which is subject to
- * certain limitations.  include/asm/hw_breakpoint.h contains macros
- * defining the available lengths for a specific architecture.  Note that
- * the address's alignment must match the length.  The breakpoint will
- * catch accesses to any byte in the range from address to address +
- * (length - 1).
- *
- * The breakpoint's type indicates the sort of access that will cause it
- * to trigger.  Possible values may include:
- *
- * 	%HW_BREAKPOINT_RW (triggered on read or write access),
- * 	%HW_BREAKPOINT_WRITE (triggered on write access), and
- * 	%HW_BREAKPOINT_READ (triggered on read access).
- *
- * Appropriate macros are defined in include/asm/hw_breakpoint.h; not all
- * possibilities are available on all architectures.  Execute breakpoints
- * must have length equal to the special value %HW_BREAKPOINT_LEN_EXECUTE.
- *
- * When a breakpoint gets hit, the @triggered callback is
- * invoked in_interrupt with a pointer to the %hw_breakpoint structure and the
- * processor registers.
- * Data breakpoints occur after the memory access has taken place.
- * Breakpoints are disabled during execution @triggered, to avoid
- * recursive traps and allow unhindered access to breakpointed memory.
- *
- * This sample code sets a breakpoint on pid_max and registers a callback
- * function for writes to that variable.  Note that it is not portable
- * as written, because not all architectures support HW_BREAKPOINT_LEN_4.
- *
- * ----------------------------------------------------------------------
- *
- * #include <asm/hw_breakpoint.h>
- *
- * struct hw_breakpoint my_bp;
- *
- * static void my_triggered(struct hw_breakpoint *bp, struct pt_regs *regs)
- * {
- * 	printk(KERN_DEBUG "Inside triggered routine of breakpoint exception\n");
- * 	dump_stack();
- *  	.......<more debugging output>........
- * }
- *
- * static struct hw_breakpoint my_bp;
- *
- * static int init_module(void)
- * {
- *	..........<do anything>............
- *	my_bp.info.type = HW_BREAKPOINT_WRITE;
- *	my_bp.info.len = HW_BREAKPOINT_LEN_4;
- *
- *	my_bp.installed = (void *)my_bp_installed;
- *
- *	rc = register_kernel_hw_breakpoint(&my_bp);
- *	..........<do anything>............
- * }
- *
- * static void cleanup_module(void)
- * {
- *	..........<do anything>............
- *	unregister_kernel_hw_breakpoint(&my_bp);
- *	..........<do anything>............
- * }
- *
- * ----------------------------------------------------------------------
- */
-struct hw_breakpoint {
-	void (*triggered)(struct hw_breakpoint *, struct pt_regs *);
-	struct arch_hw_breakpoint info;
+enum {
+	HW_BREAKPOINT_LEN_1 = 1,
+	HW_BREAKPOINT_LEN_2 = 2,
+	HW_BREAKPOINT_LEN_4 = 4,
+	HW_BREAKPOINT_LEN_8 = 8,
 };
 
-/*
- * len and type values are defined in include/asm/hw_breakpoint.h.
- * Available values vary according to the architecture.  On i386 the
- * possibilities are:
- *
- *	HW_BREAKPOINT_LEN_1
- *	HW_BREAKPOINT_LEN_2
- *	HW_BREAKPOINT_LEN_4
- *	HW_BREAKPOINT_RW
- *	HW_BREAKPOINT_READ
- *
- * On other architectures HW_BREAKPOINT_LEN_8 may be available, and the
- * 1-, 2-, and 4-byte lengths may be unavailable.  There also may be
- * HW_BREAKPOINT_WRITE.  You can use #ifdef to check at compile time.
- */
+enum {
+	HW_BREAKPOINT_R = 1,
+	HW_BREAKPOINT_W = 2,
+	HW_BREAKPOINT_X = 4,
+};
+
+static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
+{
+	return &bp->hw.info;
+}
+
+static inline unsigned long hw_breakpoint_addr(struct perf_event *bp)
+{
+	return bp->attr.bp_addr;
+}
+
+static inline int hw_breakpoint_type(struct perf_event *bp)
+{
+	return bp->attr.bp_type;
+}
+
+static inline int hw_breakpoint_len(struct perf_event *bp)
+{
+	return bp->attr.bp_len;
+}
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+extern struct perf_event *
+register_user_hw_breakpoint(unsigned long addr,
+			    int len,
+			    int type,
+			    perf_callback_t triggered,
+			    struct task_struct *tsk,
+			    bool active);
+
+/* FIXME: only change from the attr, and don't unregister */
+extern struct perf_event *
+modify_user_hw_breakpoint(struct perf_event *bp,
+			  unsigned long addr,
+			  int len,
+			  int type,
+			  perf_callback_t triggered,
+			  struct task_struct *tsk,
+			  bool active);
 
-extern int register_user_hw_breakpoint(struct task_struct *tsk,
-					struct hw_breakpoint *bp);
-extern int modify_user_hw_breakpoint(struct task_struct *tsk,
-					struct hw_breakpoint *bp);
-extern void unregister_user_hw_breakpoint(struct task_struct *tsk,
-						struct hw_breakpoint *bp);
 /*
  * Kernel breakpoints are not associated with any particular thread.
  */
-extern int register_kernel_hw_breakpoint(struct hw_breakpoint *bp);
-extern void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp);
+extern struct perf_event *
+register_wide_hw_breakpoint_cpu(unsigned long addr,
+				int len,
+				int type,
+				perf_callback_t triggered,
+				int cpu,
+				bool active);
+
+extern struct perf_event **
+register_wide_hw_breakpoint(unsigned long addr,
+			    int len,
+			    int type,
+			    perf_callback_t triggered,
+			    bool active);
+
+extern int register_perf_hw_breakpoint(struct perf_event *bp);
+extern int __register_perf_hw_breakpoint(struct perf_event *bp);
+extern void unregister_hw_breakpoint(struct perf_event *bp);
+extern void unregister_wide_hw_breakpoint(struct perf_event **cpu_events);
+
+extern int reserve_bp_slot(struct perf_event *bp);
+extern void release_bp_slot(struct perf_event *bp);
+
+extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk);
+
+#else /* !CONFIG_HAVE_HW_BREAKPOINT */
+
+static inline struct perf_event *
+register_user_hw_breakpoint(unsigned long addr,
+			    int len,
+			    int type,
+			    perf_callback_t triggered,
+			    struct task_struct *tsk,
+			    bool active)		{ return NULL; }
+static inline struct perf_event *
+modify_user_hw_breakpoint(struct perf_event *bp,
+			  unsigned long addr,
+			  int len,
+			  int type,
+			  perf_callback_t triggered,
+			  struct task_struct *tsk,
+			  bool active)			{ return NULL; }
+static inline struct perf_event *
+register_wide_hw_breakpoint_cpu(unsigned long addr,
+				int len,
+				int type,
+				perf_callback_t triggered,
+				int cpu,
+				bool active)		{ return NULL; }
+static inline struct perf_event **
+register_wide_hw_breakpoint(unsigned long addr,
+			    int len,
+			    int type,
+			    perf_callback_t triggered,
+			    bool active)		{ return NULL; }
+static inline int
+register_perf_hw_breakpoint(struct perf_event *bp)	{ return -ENOSYS; }
+static inline int
+__register_perf_hw_breakpoint(struct perf_event *bp) 	{ return -ENOSYS; }
+static inline void unregister_hw_breakpoint(struct perf_event *bp)	{ }
+static inline void
+unregister_wide_hw_breakpoint(struct perf_event **cpu_events)		{ }
+static inline int
+reserve_bp_slot(struct perf_event *bp)			{return -ENOSYS; }
+static inline void release_bp_slot(struct perf_event *bp) 		{ }
+
+static inline void flush_ptrace_hw_breakpoint(struct task_struct *tsk)	{ }
 
-extern unsigned int hbp_kernel_pos;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
 
-#endif	/* __KERNEL__ */
-#endif	/* _LINUX_HW_BREAKPOINT_H */
+#endif /* _LINUX_HW_BREAKPOINT_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 8d54e6d25eeb..cead64ea6c15 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -18,6 +18,10 @@
 #include <linux/ioctl.h>
 #include <asm/byteorder.h>
 
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+#include <asm/hw_breakpoint.h>
+#endif
+
 /*
  * User-space ABI bits:
  */
@@ -31,6 +35,7 @@ enum perf_type_id {
 	PERF_TYPE_TRACEPOINT			= 2,
 	PERF_TYPE_HW_CACHE			= 3,
 	PERF_TYPE_RAW				= 4,
+	PERF_TYPE_BREAKPOINT			= 5,
 
 	PERF_TYPE_MAX,				/* non-ABI */
 };
@@ -207,6 +212,15 @@ struct perf_event_attr {
 		__u32		wakeup_events;	  /* wakeup every n events */
 		__u32		wakeup_watermark; /* bytes before wakeup   */
 	};
+
+	union {
+		struct { /* Hardware breakpoint info */
+			__u64		bp_addr;
+			__u32		bp_type;
+			__u32		bp_len;
+		};
+	};
+
 	__u32			__reserved_2;
 
 	__u64			__reserved_3;
@@ -476,6 +490,11 @@ struct hw_perf_event {
 			atomic64_t	count;
 			struct hrtimer	hrtimer;
 		};
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+		union { /* breakpoint */
+			struct arch_hw_breakpoint	info;
+		};
+#endif
 	};
 	atomic64_t			prev_count;
 	u64				sample_period;
@@ -588,7 +607,7 @@ struct perf_event {
 	u64				tstamp_running;
 	u64				tstamp_stopped;
 
-	struct perf_event_attr	attr;
+	struct perf_event_attr		attr;
 	struct hw_perf_event		hw;
 
 	struct perf_event_context	*ctx;
@@ -643,6 +662,8 @@ struct perf_event {
 
 	perf_callback_t			callback;
 
+	perf_callback_t			event_callback;
+
 #endif /* CONFIG_PERF_EVENTS */
 };
 
@@ -831,6 +852,7 @@ extern int sysctl_perf_event_sample_rate;
 extern void perf_event_init(void);
 extern void perf_tp_event(int event_id, u64 addr, u64 count,
 				 void *record, int entry_size);
+extern void perf_bp_event(struct perf_event *event, void *data);
 
 #ifndef perf_misc_flags
 #define perf_misc_flags(regs)	(user_mode(regs) ? PERF_RECORD_MISC_USER : \
@@ -865,6 +887,8 @@ static inline int perf_event_task_enable(void)				{ return -EINVAL; }
 static inline void
 perf_sw_event(u32 event_id, u64 nr, int nmi,
 		     struct pt_regs *regs, u64 addr)			{ }
+static inline void
+perf_bp_event(struct perf_event *event, void *data)		{ }
 
 static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
 static inline void perf_event_comm(struct task_struct *tsk)		{ }
diff --git a/kernel/exit.c b/kernel/exit.c
index e61891f80123..266f8920628a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -49,6 +49,7 @@
 #include <linux/init_task.h>
 #include <linux/perf_event.h>
 #include <trace/events/sched.h>
+#include <linux/hw_breakpoint.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -979,6 +980,10 @@ NORET_TYPE void do_exit(long code)
 
 	proc_exit_connector(tsk);
 
+	/*
+	 * FIXME: do that only when needed, using sched_exit tracepoint
+	 */
+	flush_ptrace_hw_breakpoint(tsk);
 	/*
 	 * Flush inherited counters to the parent - before the parent
 	 * gets woken up by child-exit notifications.
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index c1f64e65a9f3..08f6d0163201 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -15,6 +15,7 @@
  *
  * Copyright (C) 2007 Alan Stern
  * Copyright (C) IBM Corporation, 2009
+ * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com>
  */
 
 /*
@@ -35,334 +36,242 @@
 #include <linux/init.h>
 #include <linux/smp.h>
 
-#include <asm/hw_breakpoint.h>
+#include <linux/hw_breakpoint.h>
+
 #include <asm/processor.h>
 
 #ifdef CONFIG_X86
 #include <asm/debugreg.h>
 #endif
-/*
- * Spinlock that protects all (un)register operations over kernel/user-space
- * breakpoint requests
- */
-static DEFINE_SPINLOCK(hw_breakpoint_lock);
-
-/* Array of kernel-space breakpoint structures */
-struct hw_breakpoint *hbp_kernel[HBP_NUM];
-
-/*
- * Per-processor copy of hbp_kernel[]. Used only when hbp_kernel is being
- * modified but we need the older copy to handle any hbp exceptions. It will
- * sync with hbp_kernel[] value after updation is done through IPIs.
- */
-DEFINE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HBP_NUM]);
-
-/*
- * Kernel breakpoints grow downwards, starting from HBP_NUM
- * 'hbp_kernel_pos' denotes lowest numbered breakpoint register occupied for
- * kernel-space request. We will initialise it here and not in an __init
- * routine because load_debug_registers(), which uses this variable can be
- * called very early during CPU initialisation.
- */
-unsigned int hbp_kernel_pos = HBP_NUM;
 
-/*
- * An array containing refcount of threads using a given bkpt register
- * Accesses are synchronised by acquiring hw_breakpoint_lock
- */
-unsigned int hbp_user_refcount[HBP_NUM];
+static atomic_t bp_slot;
 
-/*
- * Load the debug registers during startup of a CPU.
- */
-void load_debug_registers(void)
+int reserve_bp_slot(struct perf_event *bp)
 {
-	unsigned long flags;
-	struct task_struct *tsk = current;
-
-	spin_lock_bh(&hw_breakpoint_lock);
-
-	/* Prevent IPIs for new kernel breakpoint updates */
-	local_irq_save(flags);
-	arch_update_kernel_hw_breakpoint(NULL);
-	local_irq_restore(flags);
-
-	if (test_tsk_thread_flag(tsk, TIF_DEBUG))
-		arch_install_thread_hw_breakpoint(tsk);
-
-	spin_unlock_bh(&hw_breakpoint_lock);
-}
+	if (atomic_inc_return(&bp_slot) == HBP_NUM) {
+		atomic_dec(&bp_slot);
 
-/*
- * Erase all the hardware breakpoint info associated with a thread.
- *
- * If tsk != current then tsk must not be usable (for example, a
- * child being cleaned up from a failed fork).
- */
-void flush_thread_hw_breakpoint(struct task_struct *tsk)
-{
-	int i;
-	struct thread_struct *thread = &(tsk->thread);
-
-	spin_lock_bh(&hw_breakpoint_lock);
-
-	/* The thread no longer has any breakpoints associated with it */
-	clear_tsk_thread_flag(tsk, TIF_DEBUG);
-	for (i = 0; i < HBP_NUM; i++) {
-		if (thread->hbp[i]) {
-			hbp_user_refcount[i]--;
-			kfree(thread->hbp[i]);
-			thread->hbp[i] = NULL;
-		}
+		return -ENOSPC;
 	}
 
-	arch_flush_thread_hw_breakpoint(tsk);
-
-	/* Actually uninstall the breakpoints if necessary */
-	if (tsk == current)
-		arch_uninstall_thread_hw_breakpoint();
-	spin_unlock_bh(&hw_breakpoint_lock);
+	return 0;
 }
 
-/*
- * Copy the hardware breakpoint info from a thread to its cloned child.
- */
-int copy_thread_hw_breakpoint(struct task_struct *tsk,
-		struct task_struct *child, unsigned long clone_flags)
+void release_bp_slot(struct perf_event *bp)
 {
-	/*
-	 * We will assume that breakpoint settings are not inherited
-	 * and the child starts out with no debug registers set.
-	 * But what about CLONE_PTRACE?
-	 */
-	clear_tsk_thread_flag(child, TIF_DEBUG);
-
-	/* We will call flush routine since the debugregs are not inherited */
-	arch_flush_thread_hw_breakpoint(child);
-
-	return 0;
+	atomic_dec(&bp_slot);
 }
 
-static int __register_user_hw_breakpoint(int pos, struct task_struct *tsk,
-					struct hw_breakpoint *bp)
+int __register_perf_hw_breakpoint(struct perf_event *bp)
 {
-	struct thread_struct *thread = &(tsk->thread);
-	int rc;
+	int ret;
 
-	/* Do not overcommit. Fail if kernel has used the hbp registers */
-	if (pos >= hbp_kernel_pos)
-		return -ENOSPC;
+	ret = reserve_bp_slot(bp);
+	if (ret)
+		return ret;
 
-	rc = arch_validate_hwbkpt_settings(bp, tsk);
-	if (rc)
-		return rc;
+	if (!bp->attr.disabled)
+		ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
 
-	thread->hbp[pos] = bp;
-	hbp_user_refcount[pos]++;
+	return ret;
+}
 
-	arch_update_user_hw_breakpoint(pos, tsk);
-	/*
-	 * Does it need to be installed right now?
-	 * Otherwise it will get installed the next time tsk runs
-	 */
-	if (tsk == current)
-		arch_install_thread_hw_breakpoint(tsk);
+int register_perf_hw_breakpoint(struct perf_event *bp)
+{
+	bp->callback = perf_bp_event;
 
-	return rc;
+	return __register_perf_hw_breakpoint(bp);
 }
 
 /*
- * Modify the address of a hbp register already in use by the task
- * Do not invoke this in-lieu of a __unregister_user_hw_breakpoint()
+ * Register a breakpoint bound to a task and a given cpu.
+ * If cpu is -1, the breakpoint is active for the task in every cpu
+ * If the task is -1, the breakpoint is active for every tasks in the given
+ * cpu.
  */
-static int __modify_user_hw_breakpoint(int pos, struct task_struct *tsk,
-					struct hw_breakpoint *bp)
+static struct perf_event *
+register_user_hw_breakpoint_cpu(unsigned long addr,
+				int len,
+				int type,
+				perf_callback_t triggered,
+				pid_t pid,
+				int cpu,
+				bool active)
 {
-	struct thread_struct *thread = &(tsk->thread);
-
-	if ((pos >= hbp_kernel_pos) || (arch_validate_hwbkpt_settings(bp, tsk)))
-		return -EINVAL;
-
-	if (thread->hbp[pos] == NULL)
-		return -EINVAL;
-
-	thread->hbp[pos] = bp;
+	struct perf_event_attr *attr;
+	struct perf_event *bp;
+
+	attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+	if (!attr)
+		return ERR_PTR(-ENOMEM);
+
+	attr->type = PERF_TYPE_BREAKPOINT;
+	attr->size = sizeof(*attr);
+	attr->bp_addr = addr;
+	attr->bp_len = len;
+	attr->bp_type = type;
 	/*
-	 * 'pos' must be that of a hbp register already used by 'tsk'
-	 * Otherwise arch_modify_user_hw_breakpoint() will fail
+	 * Such breakpoints are used by debuggers to trigger signals when
+	 * we hit the excepted memory op. We can't miss such events, they
+	 * must be pinned.
 	 */
-	arch_update_user_hw_breakpoint(pos, tsk);
+	attr->pinned = 1;
 
-	if (tsk == current)
-		arch_install_thread_hw_breakpoint(tsk);
+	if (!active)
+		attr->disabled = 1;
 
-	return 0;
-}
-
-static void __unregister_user_hw_breakpoint(int pos, struct task_struct *tsk)
-{
-	hbp_user_refcount[pos]--;
-	tsk->thread.hbp[pos] = NULL;
+	bp = perf_event_create_kernel_counter(attr, cpu, pid, triggered);
+	kfree(attr);
 
-	arch_update_user_hw_breakpoint(pos, tsk);
-
-	if (tsk == current)
-		arch_install_thread_hw_breakpoint(tsk);
+	return bp;
 }
 
 /**
  * register_user_hw_breakpoint - register a hardware breakpoint for user space
+ * @addr: is the memory address that triggers the breakpoint
+ * @len: the length of the access to the memory (1 byte, 2 bytes etc...)
+ * @type: the type of the access to the memory (read/write/exec)
+ * @triggered: callback to trigger when we hit the breakpoint
  * @tsk: pointer to 'task_struct' of the process to which the address belongs
- * @bp: the breakpoint structure to register
- *
- * @bp.info->name or @bp.info->address, @bp.info->len, @bp.info->type and
- * @bp->triggered must be set properly before invocation
+ * @active: should we activate it while registering it
  *
  */
-int register_user_hw_breakpoint(struct task_struct *tsk,
-					struct hw_breakpoint *bp)
+struct perf_event *
+register_user_hw_breakpoint(unsigned long addr,
+			    int len,
+			    int type,
+			    perf_callback_t triggered,
+			    struct task_struct *tsk,
+			    bool active)
 {
-	struct thread_struct *thread = &(tsk->thread);
-	int i, rc = -ENOSPC;
-
-	spin_lock_bh(&hw_breakpoint_lock);
-
-	for (i = 0; i < hbp_kernel_pos; i++) {
-		if (!thread->hbp[i]) {
-			rc = __register_user_hw_breakpoint(i, tsk, bp);
-			break;
-		}
-	}
-	if (!rc)
-		set_tsk_thread_flag(tsk, TIF_DEBUG);
-
-	spin_unlock_bh(&hw_breakpoint_lock);
-	return rc;
+	return register_user_hw_breakpoint_cpu(addr, len, type, triggered,
+					       tsk->pid, -1, active);
 }
 EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
 
 /**
  * modify_user_hw_breakpoint - modify a user-space hardware breakpoint
+ * @bp: the breakpoint structure to modify
+ * @addr: is the memory address that triggers the breakpoint
+ * @len: the length of the access to the memory (1 byte, 2 bytes etc...)
+ * @type: the type of the access to the memory (read/write/exec)
+ * @triggered: callback to trigger when we hit the breakpoint
  * @tsk: pointer to 'task_struct' of the process to which the address belongs
- * @bp: the breakpoint structure to unregister
- *
+ * @active: should we activate it while registering it
  */
-int modify_user_hw_breakpoint(struct task_struct *tsk, struct hw_breakpoint *bp)
+struct perf_event *
+modify_user_hw_breakpoint(struct perf_event *bp,
+			  unsigned long addr,
+			  int len,
+			  int type,
+			  perf_callback_t triggered,
+			  struct task_struct *tsk,
+			  bool active)
 {
-	struct thread_struct *thread = &(tsk->thread);
-	int i, ret = -ENOENT;
+	/*
+	 * FIXME: do it without unregistering
+	 * - We don't want to lose our slot
+	 * - If the new bp is incorrect, don't lose the older one
+	 */
+	unregister_hw_breakpoint(bp);
 
-	spin_lock_bh(&hw_breakpoint_lock);
-	for (i = 0; i < hbp_kernel_pos; i++) {
-		if (bp == thread->hbp[i]) {
-			ret = __modify_user_hw_breakpoint(i, tsk, bp);
-			break;
-		}
-	}
-	spin_unlock_bh(&hw_breakpoint_lock);
-	return ret;
+	return register_user_hw_breakpoint(addr, len, type, triggered,
+					   tsk, active);
 }
 EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
 
 /**
- * unregister_user_hw_breakpoint - unregister a user-space hardware breakpoint
- * @tsk: pointer to 'task_struct' of the process to which the address belongs
+ * unregister_hw_breakpoint - unregister a user-space hardware breakpoint
  * @bp: the breakpoint structure to unregister
- *
  */
-void unregister_user_hw_breakpoint(struct task_struct *tsk,
-						struct hw_breakpoint *bp)
+void unregister_hw_breakpoint(struct perf_event *bp)
 {
-	struct thread_struct *thread = &(tsk->thread);
-	int i, pos = -1, hbp_counter = 0;
-
-	spin_lock_bh(&hw_breakpoint_lock);
-	for (i = 0; i < hbp_kernel_pos; i++) {
-		if (thread->hbp[i])
-			hbp_counter++;
-		if (bp == thread->hbp[i])
-			pos = i;
-	}
-	if (pos >= 0) {
-		__unregister_user_hw_breakpoint(pos, tsk);
-		hbp_counter--;
-	}
-	if (!hbp_counter)
-		clear_tsk_thread_flag(tsk, TIF_DEBUG);
-
-	spin_unlock_bh(&hw_breakpoint_lock);
+	if (!bp)
+		return;
+	perf_event_release_kernel(bp);
+}
+EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
+
+static struct perf_event *
+register_kernel_hw_breakpoint_cpu(unsigned long addr,
+				  int len,
+				  int type,
+				  perf_callback_t triggered,
+				  int cpu,
+				  bool active)
+{
+	return register_user_hw_breakpoint_cpu(addr, len, type, triggered,
+					       -1, cpu, active);
 }
-EXPORT_SYMBOL_GPL(unregister_user_hw_breakpoint);
 
 /**
- * register_kernel_hw_breakpoint - register a hardware breakpoint for kernel space
- * @bp: the breakpoint structure to register
- *
- * @bp.info->name or @bp.info->address, @bp.info->len, @bp.info->type and
- * @bp->triggered must be set properly before invocation
+ * register_wide_hw_breakpoint - register a wide breakpoint in the kernel
+ * @addr: is the memory address that triggers the breakpoint
+ * @len: the length of the access to the memory (1 byte, 2 bytes etc...)
+ * @type: the type of the access to the memory (read/write/exec)
+ * @triggered: callback to trigger when we hit the breakpoint
+ * @active: should we activate it while registering it
  *
+ * @return a set of per_cpu pointers to perf events
  */
-int register_kernel_hw_breakpoint(struct hw_breakpoint *bp)
+struct perf_event **
+register_wide_hw_breakpoint(unsigned long addr,
+			    int len,
+			    int type,
+			    perf_callback_t triggered,
+			    bool active)
 {
-	int rc;
+	struct perf_event **cpu_events, **pevent, *bp;
+	long err;
+	int cpu;
+
+	cpu_events = alloc_percpu(typeof(*cpu_events));
+	if (!cpu_events)
+		return ERR_PTR(-ENOMEM);
 
-	rc = arch_validate_hwbkpt_settings(bp, NULL);
-	if (rc)
-		return rc;
+	for_each_possible_cpu(cpu) {
+		pevent = per_cpu_ptr(cpu_events, cpu);
+		bp = register_kernel_hw_breakpoint_cpu(addr, len, type,
+					triggered, cpu, active);
 
-	spin_lock_bh(&hw_breakpoint_lock);
+		*pevent = bp;
 
-	rc = -ENOSPC;
-	/* Check if we are over-committing */
-	if ((hbp_kernel_pos > 0) && (!hbp_user_refcount[hbp_kernel_pos-1])) {
-		hbp_kernel_pos--;
-		hbp_kernel[hbp_kernel_pos] = bp;
-		on_each_cpu(arch_update_kernel_hw_breakpoint, NULL, 1);
-		rc = 0;
+		if (IS_ERR(bp) || !bp) {
+			err = PTR_ERR(bp);
+			goto fail;
+		}
 	}
 
-	spin_unlock_bh(&hw_breakpoint_lock);
-	return rc;
+	return cpu_events;
+
+fail:
+	for_each_possible_cpu(cpu) {
+		pevent = per_cpu_ptr(cpu_events, cpu);
+		if (IS_ERR(*pevent) || !*pevent)
+			break;
+		unregister_hw_breakpoint(*pevent);
+	}
+	free_percpu(cpu_events);
+	/* return the error if any */
+	return ERR_PTR(err);
 }
-EXPORT_SYMBOL_GPL(register_kernel_hw_breakpoint);
 
 /**
- * unregister_kernel_hw_breakpoint - unregister a HW breakpoint for kernel space
- * @bp: the breakpoint structure to unregister
- *
- * Uninstalls and unregisters @bp.
+ * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel
+ * @cpu_events: the per cpu set of events to unregister
  */
-void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp)
+void unregister_wide_hw_breakpoint(struct perf_event **cpu_events)
 {
-	int i, j;
-
-	spin_lock_bh(&hw_breakpoint_lock);
-
-	/* Find the 'bp' in our list of breakpoints for kernel */
-	for (i = hbp_kernel_pos; i < HBP_NUM; i++)
-		if (bp == hbp_kernel[i])
-			break;
+	int cpu;
+	struct perf_event **pevent;
 
-	/* Check if we did not find a match for 'bp'. If so return early */
-	if (i == HBP_NUM) {
-		spin_unlock_bh(&hw_breakpoint_lock);
-		return;
+	for_each_possible_cpu(cpu) {
+		pevent = per_cpu_ptr(cpu_events, cpu);
+		unregister_hw_breakpoint(*pevent);
 	}
-
-	/*
-	 * We'll shift the breakpoints one-level above to compact if
-	 * unregistration creates a hole
-	 */
-	for (j = i; j > hbp_kernel_pos; j--)
-		hbp_kernel[j] = hbp_kernel[j-1];
-
-	hbp_kernel[hbp_kernel_pos] = NULL;
-	on_each_cpu(arch_update_kernel_hw_breakpoint, NULL, 1);
-	hbp_kernel_pos++;
-
-	spin_unlock_bh(&hw_breakpoint_lock);
+	free_percpu(cpu_events);
 }
-EXPORT_SYMBOL_GPL(unregister_kernel_hw_breakpoint);
+
 
 static struct notifier_block hw_breakpoint_exceptions_nb = {
 	.notifier_call = hw_breakpoint_exceptions_notify,
@@ -374,5 +283,12 @@ static int __init init_hw_breakpoint(void)
 {
 	return register_die_notifier(&hw_breakpoint_exceptions_nb);
 }
-
 core_initcall(init_hw_breakpoint);
+
+
+struct pmu perf_ops_bp = {
+	.enable		= arch_install_hw_breakpoint,
+	.disable	= arch_uninstall_hw_breakpoint,
+	.read		= hw_breakpoint_pmu_read,
+	.unthrottle	= hw_breakpoint_pmu_unthrottle
+};
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 5087125e2a00..98dc56b2ebe4 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -29,6 +29,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/perf_event.h>
 #include <linux/ftrace_event.h>
+#include <linux/hw_breakpoint.h>
 
 #include <asm/irq_regs.h>
 
@@ -4229,6 +4230,51 @@ static void perf_event_free_filter(struct perf_event *event)
 
 #endif /* CONFIG_EVENT_PROFILE */
 
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+static void bp_perf_event_destroy(struct perf_event *event)
+{
+	release_bp_slot(event);
+}
+
+static const struct pmu *bp_perf_event_init(struct perf_event *bp)
+{
+	int err;
+	/*
+	 * The breakpoint is already filled if we haven't created the counter
+	 * through perf syscall
+	 * FIXME: manage to get trigerred to NULL if it comes from syscalls
+	 */
+	if (!bp->callback)
+		err = register_perf_hw_breakpoint(bp);
+	else
+		err = __register_perf_hw_breakpoint(bp);
+	if (err)
+		return ERR_PTR(err);
+
+	bp->destroy = bp_perf_event_destroy;
+
+	return &perf_ops_bp;
+}
+
+void perf_bp_event(struct perf_event *bp, void *regs)
+{
+	/* TODO */
+}
+#else
+static void bp_perf_event_destroy(struct perf_event *event)
+{
+}
+
+static const struct pmu *bp_perf_event_init(struct perf_event *bp)
+{
+	return NULL;
+}
+
+void perf_bp_event(struct perf_event *bp, void *regs)
+{
+}
+#endif
+
 atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
 
 static void sw_perf_event_destroy(struct perf_event *event)
@@ -4375,6 +4421,11 @@ perf_event_alloc(struct perf_event_attr *attr,
 		pmu = tp_perf_event_init(event);
 		break;
 
+	case PERF_TYPE_BREAKPOINT:
+		pmu = bp_perf_event_init(event);
+		break;
+
+
 	default:
 		break;
 	}
@@ -4686,7 +4737,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 
 	ctx = find_get_context(pid, cpu);
 	if (IS_ERR(ctx))
-		return NULL ;
+		return NULL;
 
 	event = perf_event_alloc(attr, cpu, ctx, NULL,
 				     NULL, callback, GFP_KERNEL);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 91c3d0e9a5a1..d72f06ff263f 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -11,14 +11,11 @@
 #include <linux/ftrace.h>
 #include <trace/boot.h>
 #include <linux/kmemtrace.h>
+#include <linux/hw_breakpoint.h>
 
 #include <linux/trace_seq.h>
 #include <linux/ftrace_event.h>
 
-#ifdef CONFIG_KSYM_TRACER
-#include <asm/hw_breakpoint.h>
-#endif
-
 enum trace_type {
 	__TRACE_FIRST_TYPE = 0,
 
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index e19747d4f860..c16a08f399df 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -372,11 +372,11 @@ FTRACE_ENTRY(ksym_trace, ksym_trace_entry,
 	F_STRUCT(
 		__field(	unsigned long,	ip			  )
 		__field(	unsigned char,	type			  )
-		__array(	char	     ,	ksym_name, KSYM_NAME_LEN  )
 		__array(	char	     ,	cmd,	   TASK_COMM_LEN  )
+		__field(	unsigned long,  addr			  )
 	),
 
-	F_printk("ip: %pF type: %d ksym_name: %s cmd: %s",
+	F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s",
 		(void *)__entry->ip, (unsigned int)__entry->type,
-		__entry->ksym_name, __entry->cmd)
+		(void *)__entry->addr,  __entry->cmd)
 );
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
index 6d5609c67378..fea83eeeef09 100644
--- a/kernel/trace/trace_ksym.c
+++ b/kernel/trace/trace_ksym.c
@@ -29,7 +29,11 @@
 #include "trace_stat.h"
 #include "trace.h"
 
-/* For now, let us restrict the no. of symbols traced simultaneously to number
+#include <linux/hw_breakpoint.h>
+#include <asm/hw_breakpoint.h>
+
+/*
+ * For now, let us restrict the no. of symbols traced simultaneously to number
  * of available hardware breakpoint registers.
  */
 #define KSYM_TRACER_MAX HBP_NUM
@@ -37,8 +41,10 @@
 #define KSYM_TRACER_OP_LEN 3 /* rw- */
 
 struct trace_ksym {
-	struct hw_breakpoint	*ksym_hbp;
+	struct perf_event	**ksym_hbp;
 	unsigned long		ksym_addr;
+	int			type;
+	int			len;
 #ifdef CONFIG_PROFILE_KSYM_TRACER
 	unsigned long		counter;
 #endif
@@ -75,10 +81,11 @@ void ksym_collect_stats(unsigned long hbp_hit_addr)
 }
 #endif /* CONFIG_PROFILE_KSYM_TRACER */
 
-void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs)
+void ksym_hbp_handler(struct perf_event *hbp, void *data)
 {
 	struct ring_buffer_event *event;
 	struct ksym_trace_entry *entry;
+	struct pt_regs *regs = data;
 	struct ring_buffer *buffer;
 	int pc;
 
@@ -96,12 +103,12 @@ void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs)
 
 	entry		= ring_buffer_event_data(event);
 	entry->ip	= instruction_pointer(regs);
-	entry->type	= hbp->info.type;
-	strlcpy(entry->ksym_name, hbp->info.name, KSYM_SYMBOL_LEN);
+	entry->type	= hw_breakpoint_type(hbp);
+	entry->addr	= hw_breakpoint_addr(hbp);
 	strlcpy(entry->cmd, current->comm, TASK_COMM_LEN);
 
 #ifdef CONFIG_PROFILE_KSYM_TRACER
-	ksym_collect_stats(hbp->info.address);
+	ksym_collect_stats(hw_breakpoint_addr(hbp));
 #endif /* CONFIG_PROFILE_KSYM_TRACER */
 
 	trace_buffer_unlock_commit(buffer, event, 0, pc);
@@ -120,31 +127,21 @@ static int ksym_trace_get_access_type(char *str)
 	int access = 0;
 
 	if (str[0] == 'r')
-		access += 4;
-	else if (str[0] != '-')
-		return -EINVAL;
+		access |= HW_BREAKPOINT_R;
 
 	if (str[1] == 'w')
-		access += 2;
-	else if (str[1] != '-')
-		return -EINVAL;
+		access |= HW_BREAKPOINT_W;
 
-	if (str[2] != '-')
-		return -EINVAL;
+	if (str[2] == 'x')
+		access |= HW_BREAKPOINT_X;
 
 	switch (access) {
-	case 6:
-		access = HW_BREAKPOINT_RW;
-		break;
-	case 4:
-		access = -EINVAL;
-		break;
-	case 2:
-		access = HW_BREAKPOINT_WRITE;
-		break;
+	case HW_BREAKPOINT_W:
+	case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
+		return access;
+	default:
+		return -EINVAL;
 	}
-
-	return access;
 }
 
 /*
@@ -194,36 +191,33 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
 	if (!entry)
 		return -ENOMEM;
 
-	entry->ksym_hbp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL);
-	if (!entry->ksym_hbp)
-		goto err;
-
-	entry->ksym_hbp->info.name = kstrdup(ksymname, GFP_KERNEL);
-	if (!entry->ksym_hbp->info.name)
-		goto err;
-
-	entry->ksym_hbp->info.type = op;
-	entry->ksym_addr = entry->ksym_hbp->info.address = addr;
-#ifdef CONFIG_X86
-	entry->ksym_hbp->info.len = HW_BREAKPOINT_LEN_4;
-#endif
-	entry->ksym_hbp->triggered = (void *)ksym_hbp_handler;
+	entry->type = op;
+	entry->ksym_addr = addr;
+	entry->len = HW_BREAKPOINT_LEN_4;
+
+	ret = -EAGAIN;
+	entry->ksym_hbp = register_wide_hw_breakpoint(entry->ksym_addr,
+					entry->len, entry->type,
+					ksym_hbp_handler, true);
+	if (IS_ERR(entry->ksym_hbp)) {
+		entry->ksym_hbp = NULL;
+		ret = PTR_ERR(entry->ksym_hbp);
+	}
 
-	ret = register_kernel_hw_breakpoint(entry->ksym_hbp);
-	if (ret < 0) {
+	if (!entry->ksym_hbp) {
 		printk(KERN_INFO "ksym_tracer request failed. Try again"
 					" later!!\n");
-		ret = -EAGAIN;
 		goto err;
 	}
+
 	hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
 	ksym_filter_entry_count++;
+
 	return 0;
+
 err:
-	if (entry->ksym_hbp)
-		kfree(entry->ksym_hbp->info.name);
-	kfree(entry->ksym_hbp);
 	kfree(entry);
+
 	return ret;
 }
 
@@ -244,10 +238,10 @@ static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
 	mutex_lock(&ksym_tracer_mutex);
 
 	hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
-		ret = trace_seq_printf(s, "%s:", entry->ksym_hbp->info.name);
-		if (entry->ksym_hbp->info.type == HW_BREAKPOINT_WRITE)
+		ret = trace_seq_printf(s, "%pS:", (void *)entry->ksym_addr);
+		if (entry->type == HW_BREAKPOINT_W)
 			ret = trace_seq_puts(s, "-w-\n");
-		else if (entry->ksym_hbp->info.type == HW_BREAKPOINT_RW)
+		else if (entry->type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R))
 			ret = trace_seq_puts(s, "rw-\n");
 		WARN_ON_ONCE(!ret);
 	}
@@ -269,12 +263,10 @@ static void __ksym_trace_reset(void)
 	mutex_lock(&ksym_tracer_mutex);
 	hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
 								ksym_hlist) {
-		unregister_kernel_hw_breakpoint(entry->ksym_hbp);
+		unregister_wide_hw_breakpoint(entry->ksym_hbp);
 		ksym_filter_entry_count--;
 		hlist_del_rcu(&(entry->ksym_hlist));
 		synchronize_rcu();
-		kfree(entry->ksym_hbp->info.name);
-		kfree(entry->ksym_hbp);
 		kfree(entry);
 	}
 	mutex_unlock(&ksym_tracer_mutex);
@@ -327,7 +319,7 @@ static ssize_t ksym_trace_filter_write(struct file *file,
 	hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
 		if (entry->ksym_addr == ksym_addr) {
 			/* Check for malformed request: (6) */
-			if (entry->ksym_hbp->info.type != op)
+			if (entry->type != op)
 				changed = 1;
 			else
 				goto out;
@@ -335,18 +327,21 @@ static ssize_t ksym_trace_filter_write(struct file *file,
 		}
 	}
 	if (changed) {
-		unregister_kernel_hw_breakpoint(entry->ksym_hbp);
-		entry->ksym_hbp->info.type = op;
+		unregister_wide_hw_breakpoint(entry->ksym_hbp);
+		entry->type = op;
 		if (op > 0) {
-			ret = register_kernel_hw_breakpoint(entry->ksym_hbp);
-			if (ret == 0)
+			entry->ksym_hbp =
+				register_wide_hw_breakpoint(entry->ksym_addr,
+					entry->len, entry->type,
+					ksym_hbp_handler, true);
+			if (IS_ERR(entry->ksym_hbp))
+				entry->ksym_hbp = NULL;
+			if (!entry->ksym_hbp)
 				goto out;
 		}
 		ksym_filter_entry_count--;
 		hlist_del_rcu(&(entry->ksym_hlist));
 		synchronize_rcu();
-		kfree(entry->ksym_hbp->info.name);
-		kfree(entry->ksym_hbp);
 		kfree(entry);
 		ret = 0;
 		goto out;
@@ -413,16 +408,16 @@ static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
 
 	trace_assign_type(field, entry);
 
-	ret = trace_seq_printf(s, "%11s-%-5d [%03d] %-30s ", field->cmd,
-				entry->pid, iter->cpu, field->ksym_name);
+	ret = trace_seq_printf(s, "%11s-%-5d [%03d] %pS", field->cmd,
+				entry->pid, iter->cpu, (char *)field->addr);
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 
 	switch (field->type) {
-	case HW_BREAKPOINT_WRITE:
+	case HW_BREAKPOINT_W:
 		ret = trace_seq_printf(s, " W  ");
 		break;
-	case HW_BREAKPOINT_RW:
+	case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
 		ret = trace_seq_printf(s, " RW ");
 		break;
 	default:
@@ -490,14 +485,13 @@ static int ksym_tracer_stat_show(struct seq_file *m, void *v)
 
 	entry = hlist_entry(stat, struct trace_ksym, ksym_hlist);
 
-	if (entry->ksym_hbp)
-		access_type = entry->ksym_hbp->info.type;
+	access_type = entry->type;
 
 	switch (access_type) {
-	case HW_BREAKPOINT_WRITE:
+	case HW_BREAKPOINT_W:
 		seq_puts(m, "  W           ");
 		break;
-	case HW_BREAKPOINT_RW:
+	case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
 		seq_puts(m, "  RW          ");
 		break;
 	default:
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 7179c12e4f0f..27c5072c2e6b 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -828,7 +828,8 @@ trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
 
 	ksym_selftest_dummy = 0;
 	/* Register the read-write tracing request */
-	ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, HW_BREAKPOINT_RW,
+	ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY,
+				     HW_BREAKPOINT_R | HW_BREAKPOINT_W,
 					(unsigned long)(&ksym_selftest_dummy));
 
 	if (ret < 0) {
-- 
cgit v1.2.3-70-g09d2


From 91284224da5b15ec6c2b45e10fa5eccd1c92a204 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Sun, 18 Oct 2009 23:32:33 +0200
Subject: pcmcia: add new CIS access helpers

As a replacement to pcmcia_get_{first,next}_tuple() and
pcmcia_get_tuple_data(), three new -- and easier to use --
functions are added:

- pcmcia_get_tuple() to get the very first CIS entry of one
  type.

- pcmcia_loop_tuple() to loop over all CIS entries of one type.

- pcmcia_get_mac_from_cis() to read out the hardware MAC address
  from CISTPL_FUNCE.

Only a handful of drivers need these functions anyway, as most
CIS access is already handled by pcmcia_loop_config(), which
now shares the same backed (pccard_loop_tuple()) with
pcmcia_loop_tuple().

A pcmcia_get_mac_from_cis() bug noted by Komuro
<komurojun-mbn@nifty.com> has been fixed in this revision.

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 Documentation/pcmcia/driver-changes.txt |   7 ++
 drivers/pcmcia/cistpl.c                 |  61 +++++++++
 drivers/pcmcia/cs_internal.h            |   7 ++
 drivers/pcmcia/pcmcia_resource.c        | 217 +++++++++++++++++++++++++++-----
 drivers/pcmcia/rsrc_mgr.c               |   1 +
 include/pcmcia/ds.h                     |  57 ++++++---
 6 files changed, 304 insertions(+), 46 deletions(-)

(limited to 'include')

diff --git a/Documentation/pcmcia/driver-changes.txt b/Documentation/pcmcia/driver-changes.txt
index 059934363caf..adfb83e58675 100644
--- a/Documentation/pcmcia/driver-changes.txt
+++ b/Documentation/pcmcia/driver-changes.txt
@@ -1,5 +1,12 @@
 This file details changes in 2.6 which affect PCMCIA card driver authors:
 
+* New CIS tuple access (as of 2.6.33)
+   Instead of pcmcia_get_{first,next}_tuple(), pcmcia_get_tuple_data() and
+   pcmcia_parse_tuple(), a driver shall use "pcmcia_get_tuple()" if it is
+   only interested in one (raw) tuple, or "pcmcia_loop_tuple()" if it is
+   interested in all tuples of one type. To decode the MAC from CISTPL_FUNCE,
+   a new helper "pcmcia_get_mac_from_cis()" was added.
+
 * New configuration loop helper (as of 2.6.28)
    By calling pcmcia_loop_config(), a driver can iterate over all available
    configuration options. During a driver's probe() phase, one doesn't need
diff --git a/drivers/pcmcia/cistpl.c b/drivers/pcmcia/cistpl.c
index 6c4a4fc83630..24dd3c1eac0b 100644
--- a/drivers/pcmcia/cistpl.c
+++ b/drivers/pcmcia/cistpl.c
@@ -1482,6 +1482,67 @@ done:
 }
 EXPORT_SYMBOL(pccard_read_tuple);
 
+
+/**
+ * pccard_loop_tuple() - loop over tuples in the CIS
+ * @s:		the struct pcmcia_socket where the card is inserted
+ * @function:	the device function we loop for
+ * @code:	which CIS code shall we look for?
+ * @parse:	buffer where the tuple shall be parsed (or NULL, if no parse)
+ * @priv_data:	private data to be passed to the loop_tuple function.
+ * @loop_tuple:	function to call for each CIS entry of type @function. IT
+ *		gets passed the raw tuple, the paresed tuple (if @parse is
+ *		set) and @priv_data.
+ *
+ * pccard_loop_tuple() loops over all CIS entries of type @function, and
+ * calls the @loop_tuple function for each entry. If the call to @loop_tuple
+ * returns 0, the loop exits. Returns 0 on success or errorcode otherwise.
+ */
+int pccard_loop_tuple(struct pcmcia_socket *s, unsigned int function,
+		      cisdata_t code, cisparse_t *parse, void *priv_data,
+		      int (*loop_tuple) (tuple_t *tuple,
+					 cisparse_t *parse,
+					 void *priv_data))
+{
+	tuple_t tuple;
+	cisdata_t *buf;
+	int ret;
+
+	buf = kzalloc(256, GFP_KERNEL);
+	if (buf == NULL) {
+		dev_printk(KERN_WARNING, &s->dev, "no memory to read tuple\n");
+		return -ENOMEM;
+	}
+
+	tuple.TupleData = buf;
+	tuple.TupleDataMax = 255;
+	tuple.TupleOffset = 0;
+	tuple.DesiredTuple = code;
+	tuple.Attributes = 0;
+
+	ret = pccard_get_first_tuple(s, function, &tuple);
+	while (!ret) {
+		if (pccard_get_tuple_data(s, &tuple))
+			goto next_entry;
+
+		if (parse)
+			if (pcmcia_parse_tuple(&tuple, parse))
+				goto next_entry;
+
+		ret = loop_tuple(&tuple, parse, priv_data);
+		if (!ret)
+			break;
+
+next_entry:
+		ret = pccard_get_next_tuple(s, function, &tuple);
+	}
+
+	kfree(buf);
+	return ret;
+}
+EXPORT_SYMBOL(pccard_loop_tuple);
+
+
 /*======================================================================
 
     This tries to determine if a card has a sensible CIS.  It returns
diff --git a/drivers/pcmcia/cs_internal.h b/drivers/pcmcia/cs_internal.h
index 1f4098f1354d..06a14c951e92 100644
--- a/drivers/pcmcia/cs_internal.h
+++ b/drivers/pcmcia/cs_internal.h
@@ -199,6 +199,13 @@ int pcmcia_replace_cis(struct pcmcia_socket *s,
 		       const u8 *data, const size_t len);
 int pccard_validate_cis(struct pcmcia_socket *s, unsigned int *count);
 
+/* loop over CIS entries */
+int pccard_loop_tuple(struct pcmcia_socket *s, unsigned int function,
+		      cisdata_t code, cisparse_t *parse, void *priv_data,
+		      int (*loop_tuple) (tuple_t *tuple,
+					 cisparse_t *parse,
+					 void *priv_data));
+
 /* rsrc_mgr.c */
 int pcmcia_validate_mem(struct pcmcia_socket *s);
 struct resource *pcmcia_find_io_region(unsigned long base,
diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c
index d919e96c0afd..0bfb05aa8f85 100644
--- a/drivers/pcmcia/pcmcia_resource.c
+++ b/drivers/pcmcia/pcmcia_resource.c
@@ -20,6 +20,7 @@
 #include <linux/delay.h>
 #include <linux/pci.h>
 #include <linux/device.h>
+#include <linux/netdevice.h>
 
 #include <pcmcia/cs_types.h>
 #include <pcmcia/ss.h>
@@ -885,12 +886,39 @@ EXPORT_SYMBOL(pcmcia_disable_device);
 
 
 struct pcmcia_cfg_mem {
-	tuple_t tuple;
+	struct pcmcia_device *p_dev;
+	void *priv_data;
+	int (*conf_check) (struct pcmcia_device *p_dev,
+			   cistpl_cftable_entry_t *cfg,
+			   cistpl_cftable_entry_t *dflt,
+			   unsigned int vcc,
+			   void *priv_data);
 	cisparse_t parse;
-	u8 buf[256];
 	cistpl_cftable_entry_t dflt;
 };
 
+/**
+ * pcmcia_do_loop_config() - internal helper for pcmcia_loop_config()
+ *
+ * pcmcia_do_loop_config() is the internal callback for the call from
+ * pcmcia_loop_config() to pccard_loop_tuple(). Data is transferred
+ * by a struct pcmcia_cfg_mem.
+ */
+static int pcmcia_do_loop_config(tuple_t *tuple, cisparse_t *parse, void *priv)
+{
+	cistpl_cftable_entry_t *cfg = &parse->cftable_entry;
+	struct pcmcia_cfg_mem *cfg_mem = priv;
+
+	/* default values */
+	cfg_mem->p_dev->conf.ConfigIndex = cfg->index;
+	if (cfg->flags & CISTPL_CFTABLE_DEFAULT)
+		cfg_mem->dflt = *cfg;
+
+	return cfg_mem->conf_check(cfg_mem->p_dev, cfg, &cfg_mem->dflt,
+				   cfg_mem->p_dev->socket->socket.Vcc,
+				   cfg_mem->priv_data);
+}
+
 /**
  * pcmcia_loop_config() - loop over configuration options
  * @p_dev:	the struct pcmcia_device which we need to loop for.
@@ -913,48 +941,173 @@ int pcmcia_loop_config(struct pcmcia_device *p_dev,
 		       void *priv_data)
 {
 	struct pcmcia_cfg_mem *cfg_mem;
-
-	tuple_t *tuple;
 	int ret;
-	unsigned int vcc;
 
 	cfg_mem = kzalloc(sizeof(struct pcmcia_cfg_mem), GFP_KERNEL);
 	if (cfg_mem == NULL)
 		return -ENOMEM;
 
-	/* get the current Vcc setting */
-	vcc = p_dev->socket->socket.Vcc;
+	cfg_mem->p_dev = p_dev;
+	cfg_mem->conf_check = conf_check;
+	cfg_mem->priv_data = priv_data;
 
-	tuple = &cfg_mem->tuple;
-	tuple->TupleData = cfg_mem->buf;
-	tuple->TupleDataMax = 255;
-	tuple->TupleOffset = 0;
-	tuple->DesiredTuple = CISTPL_CFTABLE_ENTRY;
-	tuple->Attributes = 0;
+	ret = pccard_loop_tuple(p_dev->socket, p_dev->func,
+				CISTPL_CFTABLE_ENTRY, &cfg_mem->parse,
+				cfg_mem, pcmcia_do_loop_config);
 
-	ret = pcmcia_get_first_tuple(p_dev, tuple);
-	while (!ret) {
-		cistpl_cftable_entry_t *cfg = &cfg_mem->parse.cftable_entry;
+	kfree(cfg_mem);
+	return ret;
+}
+EXPORT_SYMBOL(pcmcia_loop_config);
 
-		if (pcmcia_get_tuple_data(p_dev, tuple))
-			goto next_entry;
 
-		if (pcmcia_parse_tuple(tuple, &cfg_mem->parse))
-			goto next_entry;
+struct pcmcia_loop_mem {
+	struct pcmcia_device *p_dev;
+	void *priv_data;
+	int (*loop_tuple) (struct pcmcia_device *p_dev,
+			   tuple_t *tuple,
+			   void *priv_data);
+};
 
-		/* default values */
-		p_dev->conf.ConfigIndex = cfg->index;
-		if (cfg->flags & CISTPL_CFTABLE_DEFAULT)
-			cfg_mem->dflt = *cfg;
+/**
+ * pcmcia_do_loop_tuple() - internal helper for pcmcia_loop_config()
+ *
+ * pcmcia_do_loop_tuple() is the internal callback for the call from
+ * pcmcia_loop_tuple() to pccard_loop_tuple(). Data is transferred
+ * by a struct pcmcia_cfg_mem.
+ */
+static int pcmcia_do_loop_tuple(tuple_t *tuple, cisparse_t *parse, void *priv)
+{
+	struct pcmcia_loop_mem *loop = priv;
 
-		ret = conf_check(p_dev, cfg, &cfg_mem->dflt, vcc, priv_data);
-		if (!ret)
-			break;
+	return loop->loop_tuple(loop->p_dev, tuple, loop->priv_data);
+};
+
+/**
+ * pcmcia_loop_tuple() - loop over tuples in the CIS
+ * @p_dev:	the struct pcmcia_device which we need to loop for.
+ * @code:	which CIS code shall we look for?
+ * @priv_data:	private data to be passed to the loop_tuple function.
+ * @loop_tuple:	function to call for each CIS entry of type @function. IT
+ *		gets passed the raw tuple and @priv_data.
+ *
+ * pcmcia_loop_tuple() loops over all CIS entries of type @function, and
+ * calls the @loop_tuple function for each entry. If the call to @loop_tuple
+ * returns 0, the loop exits. Returns 0 on success or errorcode otherwise.
+ */
+int pcmcia_loop_tuple(struct pcmcia_device *p_dev, cisdata_t code,
+		      int (*loop_tuple) (struct pcmcia_device *p_dev,
+					 tuple_t *tuple,
+					 void *priv_data),
+		      void *priv_data)
+{
+	struct pcmcia_loop_mem loop = {
+		.p_dev = p_dev,
+		.loop_tuple = loop_tuple,
+		.priv_data = priv_data};
+
+	return pccard_loop_tuple(p_dev->socket, p_dev->func, code, NULL,
+				 &loop, pcmcia_do_loop_tuple);
+};
+EXPORT_SYMBOL(pcmcia_loop_tuple);
 
-next_entry:
-		ret = pcmcia_get_next_tuple(p_dev, tuple);
+
+struct pcmcia_loop_get {
+	size_t len;
+	cisdata_t **buf;
+};
+
+/**
+ * pcmcia_do_get_tuple() - internal helper for pcmcia_get_tuple()
+ *
+ * pcmcia_do_get_tuple() is the internal callback for the call from
+ * pcmcia_get_tuple() to pcmcia_loop_tuple(). As we're only interested in
+ * the first tuple, return 0 unconditionally. Create a memory buffer large
+ * enough to hold the content of the tuple, and fill it with the tuple data.
+ * The caller is responsible to free the buffer.
+ */
+static int pcmcia_do_get_tuple(struct pcmcia_device *p_dev, tuple_t *tuple,
+			       void *priv)
+{
+	struct pcmcia_loop_get *get = priv;
+
+	*get->buf = kzalloc(tuple->TupleDataLen, GFP_KERNEL);
+	if (*get->buf) {
+		get->len = tuple->TupleDataLen;
+		memcpy(*get->buf, tuple->TupleData, tuple->TupleDataLen);
 	}
+	return 0;
+};
+
+/**
+ * pcmcia_get_tuple() - get first tuple from CIS
+ * @p_dev:	the struct pcmcia_device which we need to loop for.
+ * @code:	which CIS code shall we look for?
+ * @buf:        pointer to store the buffer to.
+ *
+ * pcmcia_get_tuple() gets the content of the first CIS entry of type @code.
+ * It returns the buffer length (or zero). The caller is responsible to free
+ * the buffer passed in @buf.
+ */
+size_t pcmcia_get_tuple(struct pcmcia_device *p_dev, cisdata_t code,
+			unsigned char **buf)
+{
+	struct pcmcia_loop_get get = {
+		.len = 0,
+		.buf = buf,
+	};
+
+	*get.buf = NULL;
+	pcmcia_loop_tuple(p_dev, code, pcmcia_do_get_tuple, &get);
+
+	return get.len;
+};
+EXPORT_SYMBOL(pcmcia_get_tuple);
+
+
+/**
+ * pcmcia_do_get_mac() - internal helper for pcmcia_get_mac_from_cis()
+ *
+ * pcmcia_do_get_mac() is the internal callback for the call from
+ * pcmcia_get_mac_from_cis() to pcmcia_loop_tuple(). We check whether the
+ * tuple contains a proper LAN_NODE_ID of length 6, and copy the data
+ * to struct net_device->dev_addr[i].
+ */
+static int pcmcia_do_get_mac(struct pcmcia_device *p_dev, tuple_t *tuple,
+			     void *priv)
+{
+	struct net_device *dev = priv;
+	int i;
+
+	if (tuple->TupleData[0] != CISTPL_FUNCE_LAN_NODE_ID)
+		return -EINVAL;
+	if (tuple->TupleDataLen < ETH_ALEN + 2) {
+		dev_warn(&p_dev->dev, "Invalid CIS tuple length for "
+			"LAN_NODE_ID\n");
+		return -EINVAL;
+	}
+
+	if (tuple->TupleData[1] != ETH_ALEN) {
+		dev_warn(&p_dev->dev, "Invalid header for LAN_NODE_ID\n");
+		return -EINVAL;
+	}
+	for (i = 0; i < 6; i++)
+		dev->dev_addr[i] = tuple->TupleData[i+2];
+	return 0;
+};
+
+/**
+ * pcmcia_get_mac_from_cis() - read out MAC address from CISTPL_FUNCE
+ * @p_dev:	the struct pcmcia_device for which we want the address.
+ * @dev:	a properly prepared struct net_device to store the info to.
+ *
+ * pcmcia_get_mac_from_cis() reads out the hardware MAC address from
+ * CISTPL_FUNCE and stores it into struct net_device *dev->dev_addr which
+ * must be set up properly by the driver (see examples!).
+ */
+int pcmcia_get_mac_from_cis(struct pcmcia_device *p_dev, struct net_device *dev)
+{
+	return pcmcia_loop_tuple(p_dev, CISTPL_FUNCE, pcmcia_do_get_mac, dev);
+};
+EXPORT_SYMBOL(pcmcia_get_mac_from_cis);
 
-	return ret;
-}
-EXPORT_SYMBOL(pcmcia_loop_config);
diff --git a/drivers/pcmcia/rsrc_mgr.c b/drivers/pcmcia/rsrc_mgr.c
index e592e0e0d7ed..de0e770ce6a3 100644
--- a/drivers/pcmcia/rsrc_mgr.c
+++ b/drivers/pcmcia/rsrc_mgr.c
@@ -18,6 +18,7 @@
 #include <pcmcia/cs_types.h>
 #include <pcmcia/ss.h>
 #include <pcmcia/cs.h>
+#include <pcmcia/cistpl.h>
 #include "cs_internal.h"
 
 
diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h
index a2be80b9a095..2eb6e24d1a6b 100644
--- a/include/pcmcia/ds.h
+++ b/include/pcmcia/ds.h
@@ -34,6 +34,7 @@
 struct pcmcia_socket;
 struct pcmcia_device;
 struct config_t;
+struct net_device;
 
 /* dynamic device IDs for PCMCIA device drivers. See
  * Documentation/pcmcia/driver.txt for details.
@@ -176,26 +177,39 @@ const char *pcmcia_error_ret(int ret);
 			   pcmcia_error_ret(ret));	\
 	}
 
-/* CIS access.
- * Use the pcmcia_* versions in PCMCIA drivers
+
+/*
+ * CIS access.
+ *
+ * Please use the following functions to access CIS tuples:
+ * - pcmcia_get_tuple()
+ * - pcmcia_loop_tuple()
+ * - pcmcia_get_mac_from_cis()
+ *
+ * To parse a tuple_t, pcmcia_parse_tuple() exists. Its interface
+ * might change in future.
  */
-int pcmcia_parse_tuple(tuple_t *tuple, cisparse_t *parse);
 
-int pccard_get_first_tuple(struct pcmcia_socket *s, unsigned int function,
-			   tuple_t *tuple);
-#define pcmcia_get_first_tuple(p_dev, tuple) \
-		pccard_get_first_tuple(p_dev->socket, p_dev->func, tuple)
+/* get the very first CIS entry of type @code. Note that buf is pointer
+ * to u8 *buf; and that you need to kfree(buf) afterwards. */
+size_t pcmcia_get_tuple(struct pcmcia_device *p_dev, cisdata_t code,
+			u8 **buf);
 
-int pccard_get_next_tuple(struct pcmcia_socket *s, unsigned int function,
-			  tuple_t *tuple);
-#define pcmcia_get_next_tuple(p_dev, tuple) \
-		pccard_get_next_tuple(p_dev->socket, p_dev->func, tuple)
+/* loop over CIS entries */
+int pcmcia_loop_tuple(struct pcmcia_device *p_dev, cisdata_t code,
+		      int (*loop_tuple) (struct pcmcia_device *p_dev,
+					 tuple_t *tuple,
+					 void *priv_data),
+		      void *priv_data);
 
-int pccard_get_tuple_data(struct pcmcia_socket *s, tuple_t *tuple);
-#define pcmcia_get_tuple_data(p_dev, tuple) \
-		pccard_get_tuple_data(p_dev->socket, tuple)
+/* get the MAC address from CISTPL_FUNCE */
+int pcmcia_get_mac_from_cis(struct pcmcia_device *p_dev,
+			    struct net_device *dev);
 
 
+/* parse a tuple_t */
+int pcmcia_parse_tuple(tuple_t *tuple, cisparse_t *parse);
+
 /* loop CIS entries for valid configuration */
 int pcmcia_loop_config(struct pcmcia_device *p_dev,
 		       int	(*conf_check)	(struct pcmcia_device *p_dev,
@@ -215,6 +229,21 @@ int pcmcia_reset_card(struct pcmcia_socket *skt);
 int pcmcia_access_configuration_register(struct pcmcia_device *p_dev,
 					 conf_reg_t *reg);
 
+/* deprecated -- do not use in drivers. */
+int pccard_get_first_tuple(struct pcmcia_socket *s, unsigned int function,
+			tuple_t *tuple);
+#define pcmcia_get_first_tuple(p_dev, tuple) \
+	pccard_get_first_tuple(p_dev->socket, p_dev->func, tuple)
+
+int pccard_get_next_tuple(struct pcmcia_socket *s, unsigned int function,
+			tuple_t *tuple);
+#define pcmcia_get_next_tuple(p_dev, tuple) \
+	pccard_get_next_tuple(p_dev->socket, p_dev->func, tuple)
+
+int pccard_get_tuple_data(struct pcmcia_socket *s, tuple_t *tuple);
+#define pcmcia_get_tuple_data(p_dev, tuple) \
+	pccard_get_tuple_data(p_dev->socket, tuple)
+
 /* device configuration */
 int pcmcia_request_io(struct pcmcia_device *p_dev, io_req_t *req);
 int pcmcia_request_irq(struct pcmcia_device *p_dev, irq_req_t *req);
-- 
cgit v1.2.3-70-g09d2


From 18a7a19b37838789452e0bd2855a51475628b971 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Mon, 19 Oct 2009 00:07:39 +0200
Subject: pcmcia: remove pcmcia_get_{first,next}_tuple()

Remove the pcmcia_get_{first,next}_tuple() calls no longer needed by
(current) pcmcia device drivers.

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 drivers/pcmcia/cs_internal.h |  9 +++++++++
 include/pcmcia/ds.h          | 15 ---------------
 2 files changed, 9 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/pcmcia/cs_internal.h b/drivers/pcmcia/cs_internal.h
index 06a14c951e92..70432cae76eb 100644
--- a/drivers/pcmcia/cs_internal.h
+++ b/drivers/pcmcia/cs_internal.h
@@ -206,6 +206,15 @@ int pccard_loop_tuple(struct pcmcia_socket *s, unsigned int function,
 					 cisparse_t *parse,
 					 void *priv_data));
 
+int pccard_get_first_tuple(struct pcmcia_socket *s, unsigned int function,
+			tuple_t *tuple);
+
+int pccard_get_next_tuple(struct pcmcia_socket *s, unsigned int function,
+			tuple_t *tuple);
+
+int pccard_get_tuple_data(struct pcmcia_socket *s, tuple_t *tuple);
+
+
 /* rsrc_mgr.c */
 int pcmcia_validate_mem(struct pcmcia_socket *s);
 struct resource *pcmcia_find_io_region(unsigned long base,
diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h
index 2eb6e24d1a6b..6c37d4ed7832 100644
--- a/include/pcmcia/ds.h
+++ b/include/pcmcia/ds.h
@@ -229,21 +229,6 @@ int pcmcia_reset_card(struct pcmcia_socket *skt);
 int pcmcia_access_configuration_register(struct pcmcia_device *p_dev,
 					 conf_reg_t *reg);
 
-/* deprecated -- do not use in drivers. */
-int pccard_get_first_tuple(struct pcmcia_socket *s, unsigned int function,
-			tuple_t *tuple);
-#define pcmcia_get_first_tuple(p_dev, tuple) \
-	pccard_get_first_tuple(p_dev->socket, p_dev->func, tuple)
-
-int pccard_get_next_tuple(struct pcmcia_socket *s, unsigned int function,
-			tuple_t *tuple);
-#define pcmcia_get_next_tuple(p_dev, tuple) \
-	pccard_get_next_tuple(p_dev->socket, p_dev->func, tuple)
-
-int pccard_get_tuple_data(struct pcmcia_socket *s, tuple_t *tuple);
-#define pcmcia_get_tuple_data(p_dev, tuple) \
-	pccard_get_tuple_data(p_dev->socket, tuple)
-
 /* device configuration */
 int pcmcia_request_io(struct pcmcia_device *p_dev, io_req_t *req);
 int pcmcia_request_irq(struct pcmcia_device *p_dev, irq_req_t *req);
-- 
cgit v1.2.3-70-g09d2


From 9cb495bb41f07a3ebfc60d3b9d26017a1fd7050c Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Sat, 24 Oct 2009 15:57:22 +0200
Subject: pcmcia: remove now-defunct cs_error, pcmcia_error_{func,ret}

As all in-tree drivers have been converted to not use cs_error() any more,
drop these functions and definitions, and update the Documentation.

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 Documentation/pcmcia/driver-changes.txt |   5 ++
 drivers/pcmcia/ds.c                     | 101 --------------------------------
 include/pcmcia/ds.h                     |  36 ------------
 3 files changed, 5 insertions(+), 137 deletions(-)

(limited to 'include')

diff --git a/Documentation/pcmcia/driver-changes.txt b/Documentation/pcmcia/driver-changes.txt
index adfb83e58675..446f43b309df 100644
--- a/Documentation/pcmcia/driver-changes.txt
+++ b/Documentation/pcmcia/driver-changes.txt
@@ -1,5 +1,10 @@
 This file details changes in 2.6 which affect PCMCIA card driver authors:
 
+* no cs_error / CS_CHECK / CONFIG_PCMCIA_DEBUG (as of 2.6.33)
+   Instead of the cs_error() callback or the CS_CHECK() macro, please use
+   Linux-style checking of return values, and -- if necessary -- debug
+   messages using "dev_dbg()" or "pr_debug()".
+
 * New CIS tuple access (as of 2.6.33)
    Instead of pcmcia_get_{first,next}_tuple(), pcmcia_get_tuple_data() and
    pcmcia_parse_tuple(), a driver shall use "pcmcia_get_tuple()" if it is
diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c
index 5b069aeaf17a..05893d41dd41 100644
--- a/drivers/pcmcia/ds.c
+++ b/drivers/pcmcia/ds.c
@@ -46,107 +46,6 @@ spinlock_t pcmcia_dev_list_lock;
 
 /*====================================================================*/
 
-/* code which was in cs.c before */
-
-/* String tables for error messages */
-
-typedef struct lookup_t {
-    const int key;
-    const char *msg;
-} lookup_t;
-
-static const lookup_t error_table[] = {
-    { 0,			"Operation succeeded" },
-    { -EIO,			"Input/Output error" },
-    { -ENODEV,			"No card present" },
-    { -EINVAL,			"Bad parameter" },
-    { -EACCES,			"Configuration locked" },
-    { -EBUSY,			"Resource in use" },
-    { -ENOSPC,			"No more items" },
-    { -ENOMEM,			"Out of resource" },
-};
-
-
-static const lookup_t service_table[] = {
-    { AccessConfigurationRegister,	"AccessConfigurationRegister" },
-    { AddSocketServices,		"AddSocketServices" },
-    { AdjustResourceInfo,		"AdjustResourceInfo" },
-    { CheckEraseQueue,			"CheckEraseQueue" },
-    { CloseMemory,			"CloseMemory" },
-    { DeregisterClient,			"DeregisterClient" },
-    { DeregisterEraseQueue,		"DeregisterEraseQueue" },
-    { GetCardServicesInfo,		"GetCardServicesInfo" },
-    { GetClientInfo,			"GetClientInfo" },
-    { GetConfigurationInfo,		"GetConfigurationInfo" },
-    { GetEventMask,			"GetEventMask" },
-    { GetFirstClient,			"GetFirstClient" },
-    { GetFirstRegion,			"GetFirstRegion" },
-    { GetFirstTuple,			"GetFirstTuple" },
-    { GetNextClient,			"GetNextClient" },
-    { GetNextRegion,			"GetNextRegion" },
-    { GetNextTuple,			"GetNextTuple" },
-    { GetStatus,			"GetStatus" },
-    { GetTupleData,			"GetTupleData" },
-    { MapMemPage,			"MapMemPage" },
-    { ModifyConfiguration,		"ModifyConfiguration" },
-    { ModifyWindow,			"ModifyWindow" },
-    { OpenMemory,			"OpenMemory" },
-    { ParseTuple,			"ParseTuple" },
-    { ReadMemory,			"ReadMemory" },
-    { RegisterClient,			"RegisterClient" },
-    { RegisterEraseQueue,		"RegisterEraseQueue" },
-    { RegisterMTD,			"RegisterMTD" },
-    { ReleaseConfiguration,		"ReleaseConfiguration" },
-    { ReleaseIO,			"ReleaseIO" },
-    { ReleaseIRQ,			"ReleaseIRQ" },
-    { ReleaseWindow,			"ReleaseWindow" },
-    { RequestConfiguration,		"RequestConfiguration" },
-    { RequestIO,			"RequestIO" },
-    { RequestIRQ,			"RequestIRQ" },
-    { RequestSocketMask,		"RequestSocketMask" },
-    { RequestWindow,			"RequestWindow" },
-    { ResetCard,			"ResetCard" },
-    { SetEventMask,			"SetEventMask" },
-    { ValidateCIS,			"ValidateCIS" },
-    { WriteMemory,			"WriteMemory" },
-    { BindDevice,			"BindDevice" },
-    { BindMTD,				"BindMTD" },
-    { ReportError,			"ReportError" },
-    { SuspendCard,			"SuspendCard" },
-    { ResumeCard,			"ResumeCard" },
-    { EjectCard,			"EjectCard" },
-    { InsertCard,			"InsertCard" },
-    { ReplaceCIS,			"ReplaceCIS" }
-};
-
-const char *pcmcia_error_func(int func)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(service_table); i++)
-		if (service_table[i].key == func)
-			return service_table[i].msg;
-
-	return "Unknown service number";
-}
-EXPORT_SYMBOL(pcmcia_error_func);
-
-const char *pcmcia_error_ret(int ret)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(error_table); i++)
-		if (error_table[i].key == ret)
-			return error_table[i].msg;
-
-	return "unknown";
-}
-EXPORT_SYMBOL(pcmcia_error_ret);
-
-/*======================================================================*/
-
-
-
 static void pcmcia_check_driver(struct pcmcia_driver *p_drv)
 {
 	struct pcmcia_device_id *did = p_drv->id_table;
diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h
index 6c37d4ed7832..d82392de4e92 100644
--- a/include/pcmcia/ds.h
+++ b/include/pcmcia/ds.h
@@ -142,42 +142,6 @@ struct pcmcia_device {
 #define handle_to_dev(handle) (handle->dev)
 
 
-/* (deprecated) error reporting by PCMCIA devices. Use dev_printk()
- * or dev_dbg() directly in the driver, without referring to pcmcia_error_func()
- * and/or pcmcia_error_ret() for those functions will go away soon.
- */
-enum service {
-    AccessConfigurationRegister, AddSocketServices,
-    AdjustResourceInfo, CheckEraseQueue, CloseMemory, CopyMemory,
-    DeregisterClient, DeregisterEraseQueue, GetCardServicesInfo,
-    GetClientInfo, GetConfigurationInfo, GetEventMask,
-    GetFirstClient, GetFirstPartion, GetFirstRegion, GetFirstTuple,
-    GetNextClient, GetNextPartition, GetNextRegion, GetNextTuple,
-    GetStatus, GetTupleData, MapLogSocket, MapLogWindow, MapMemPage,
-    MapPhySocket, MapPhyWindow, ModifyConfiguration, ModifyWindow,
-    OpenMemory, ParseTuple, ReadMemory, RegisterClient,
-    RegisterEraseQueue, RegisterMTD, RegisterTimer,
-    ReleaseConfiguration, ReleaseExclusive, ReleaseIO, ReleaseIRQ,
-    ReleaseSocketMask, ReleaseWindow, ReplaceSocketServices,
-    RequestConfiguration, RequestExclusive, RequestIO, RequestIRQ,
-    RequestSocketMask, RequestWindow, ResetCard, ReturnSSEntry,
-    SetEventMask, SetRegion, ValidateCIS, VendorSpecific,
-    WriteMemory, BindDevice, BindMTD, ReportError,
-    SuspendCard, ResumeCard, EjectCard, InsertCard, ReplaceCIS,
-    GetFirstWindow, GetNextWindow, GetMemPage
-};
-const char *pcmcia_error_func(int func);
-const char *pcmcia_error_ret(int ret);
-
-#define cs_error(p_dev, func, ret)			\
-	{						\
-		dev_printk(KERN_NOTICE, &p_dev->dev,	\
-			   "%s : %s\n",			\
-			   pcmcia_error_func(func),	\
-			   pcmcia_error_ret(ret));	\
-	}
-
-
 /*
  * CIS access.
  *
-- 
cgit v1.2.3-70-g09d2


From 1689164a272a962572a1f31af715dfe462cf7910 Mon Sep 17 00:00:00 2001
From: Russell King - ARM Linux <linux@arm.linux.org.uk>
Date: Sun, 29 Mar 2009 22:43:43 +0100
Subject: PCMCIA: ss: allow PCI IRQs > 255

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 include/pcmcia/ss.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/pcmcia/ss.h b/include/pcmcia/ss.h
index d696a692d94a..753da9b087d3 100644
--- a/include/pcmcia/ss.h
+++ b/include/pcmcia/ss.h
@@ -172,7 +172,7 @@ struct pcmcia_socket {
 	u_int				irq_mask;
 	u_int				map_size;
 	u_int				io_offset;
-	u_char				pci_irq;
+	u_int				pci_irq;
 	struct pci_dev *		cb_dev;
 
 
-- 
cgit v1.2.3-70-g09d2


From dd8dbf2e6880e30c00b18600c962d0cb5a03c555 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Nov 2009 16:35:32 +1100
Subject: security: report the module name to security_module_request

For SELinux to do better filtering in userspace we send the name of the
module along with the AVC denial when a program is denied module_request.

Example output:

type=SYSCALL msg=audit(11/03/2009 10:59:43.510:9) : arch=x86_64 syscall=write success=yes exit=2 a0=3 a1=7fc28c0d56c0 a2=2 a3=7fffca0d7440 items=0 ppid=1727 pid=1729 auid=unset uid=root gid=root euid=root suid=root fsuid=root egid=root sgid=root fsgid=root tty=(none) ses=unset comm=rpc.nfsd exe=/usr/sbin/rpc.nfsd subj=system_u:system_r:nfsd_t:s0 key=(null)
type=AVC msg=audit(11/03/2009 10:59:43.510:9) : avc:  denied  { module_request } for  pid=1729 comm=rpc.nfsd kmod="net-pf-10" scontext=system_u:system_r:nfsd_t:s0 tcontext=system_u:system_r:kernel_t:s0 tclass=system

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/lsm_audit.h | 18 ++++++++++--------
 include/linux/security.h  |  7 ++++---
 kernel/kmod.c             |  8 ++++----
 security/capability.c     |  2 +-
 security/lsm_audit.c      |  4 ++++
 security/security.c       |  4 ++--
 security/selinux/hooks.c  | 13 +++++++++++--
 7 files changed, 36 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h
index 190c37854870..f78f83d7663f 100644
--- a/include/linux/lsm_audit.h
+++ b/include/linux/lsm_audit.h
@@ -26,14 +26,15 @@
 
 /* Auxiliary data to use in generating the audit record. */
 struct common_audit_data {
-	char    type;
-#define LSM_AUDIT_DATA_FS      1
-#define LSM_AUDIT_DATA_NET     2
-#define LSM_AUDIT_DATA_CAP     3
-#define LSM_AUDIT_DATA_IPC     4
-#define LSM_AUDIT_DATA_TASK    5
-#define LSM_AUDIT_DATA_KEY     6
-#define LSM_AUDIT_NO_AUDIT     7
+	char type;
+#define LSM_AUDIT_DATA_FS	1
+#define LSM_AUDIT_DATA_NET	2
+#define LSM_AUDIT_DATA_CAP	3
+#define LSM_AUDIT_DATA_IPC	4
+#define LSM_AUDIT_DATA_TASK	5
+#define LSM_AUDIT_DATA_KEY	6
+#define LSM_AUDIT_NO_AUDIT	7
+#define LSM_AUDIT_DATA_KMOD	8
 	struct task_struct *tsk;
 	union 	{
 		struct {
@@ -66,6 +67,7 @@ struct common_audit_data {
 			char *key_desc;
 		} key_struct;
 #endif
+		char *kmod_name;
 	} u;
 	/* this union contains LSM specific data */
 	union {
diff --git a/include/linux/security.h b/include/linux/security.h
index ed0faea60b82..466cbadbd1ef 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -706,6 +706,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  * @kernel_module_request:
  *	Ability to trigger the kernel to automatically upcall to userspace for
  *	userspace to load a kernel module with the given name.
+ *	@kmod_name name of the module requested by the kernel
  *	Return 0 if successful.
  * @task_setuid:
  *	Check permission before setting one or more of the user identity
@@ -1577,7 +1578,7 @@ struct security_operations {
 	void (*cred_transfer)(struct cred *new, const struct cred *old);
 	int (*kernel_act_as)(struct cred *new, u32 secid);
 	int (*kernel_create_files_as)(struct cred *new, struct inode *inode);
-	int (*kernel_module_request)(void);
+	int (*kernel_module_request)(char *kmod_name);
 	int (*task_setuid) (uid_t id0, uid_t id1, uid_t id2, int flags);
 	int (*task_fix_setuid) (struct cred *new, const struct cred *old,
 				int flags);
@@ -1842,7 +1843,7 @@ void security_commit_creds(struct cred *new, const struct cred *old);
 void security_transfer_creds(struct cred *new, const struct cred *old);
 int security_kernel_act_as(struct cred *new, u32 secid);
 int security_kernel_create_files_as(struct cred *new, struct inode *inode);
-int security_kernel_module_request(void);
+int security_kernel_module_request(char *kmod_name);
 int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags);
 int security_task_fix_setuid(struct cred *new, const struct cred *old,
 			     int flags);
@@ -2407,7 +2408,7 @@ static inline int security_kernel_create_files_as(struct cred *cred,
 	return 0;
 }
 
-static inline int security_kernel_module_request(void)
+static inline int security_kernel_module_request(char *kmod_name)
 {
 	return 0;
 }
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 9fcb53a11f87..25b103190364 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -80,16 +80,16 @@ int __request_module(bool wait, const char *fmt, ...)
 #define MAX_KMOD_CONCURRENT 50	/* Completely arbitrary value - KAO */
 	static int kmod_loop_msg;
 
-	ret = security_kernel_module_request();
-	if (ret)
-		return ret;
-
 	va_start(args, fmt);
 	ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
 	va_end(args);
 	if (ret >= MODULE_NAME_LEN)
 		return -ENAMETOOLONG;
 
+	ret = security_kernel_module_request(module_name);
+	if (ret)
+		return ret;
+
 	/* If modprobe needs a service that is in a module, we get a recursive
 	 * loop.  Limit the number of running kmod threads to max_threads/2 or
 	 * MAX_KMOD_CONCURRENT, whichever is the smaller.  A cleaner method
diff --git a/security/capability.c b/security/capability.c
index 4f3ab476937f..5c700e1a4fd3 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -421,7 +421,7 @@ static int cap_kernel_create_files_as(struct cred *new, struct inode *inode)
 	return 0;
 }
 
-static int cap_kernel_module_request(void)
+static int cap_kernel_module_request(char *kmod_name)
 {
 	return 0;
 }
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
index 3bb90b6f1dd3..51bd0fd9c9f0 100644
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -354,6 +354,10 @@ static void dump_common_audit_data(struct audit_buffer *ab,
 		}
 		break;
 #endif
+	case LSM_AUDIT_DATA_KMOD:
+		audit_log_format(ab, " kmod=");
+		audit_log_untrustedstring(ab, a->u.kmod_name);
+		break;
 	} /* switch (a->type) */
 }
 
diff --git a/security/security.c b/security/security.c
index aad71b2ca195..24e060be9fa5 100644
--- a/security/security.c
+++ b/security/security.c
@@ -764,9 +764,9 @@ int security_kernel_create_files_as(struct cred *new, struct inode *inode)
 	return security_ops->kernel_create_files_as(new, inode);
 }
 
-int security_kernel_module_request(void)
+int security_kernel_module_request(char *kmod_name)
 {
-	return security_ops->kernel_module_request();
+	return security_ops->kernel_module_request(kmod_name);
 }
 
 int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index a29d6612a328..c96d63ec4753 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3337,9 +3337,18 @@ static int selinux_kernel_create_files_as(struct cred *new, struct inode *inode)
 	return 0;
 }
 
-static int selinux_kernel_module_request(void)
+static int selinux_kernel_module_request(char *kmod_name)
 {
-	return task_has_system(current, SYSTEM__MODULE_REQUEST);
+	u32 sid;
+	struct common_audit_data ad;
+
+	sid = task_sid(current);
+
+	COMMON_AUDIT_DATA_INIT(&ad, KMOD);
+	ad.u.kmod_name = kmod_name;
+
+	return avc_has_perm(sid, SECINITSID_KERNEL, SECCLASS_SYSTEM,
+			    SYSTEM__MODULE_REQUEST, &ad);
 }
 
 static int selinux_task_setpgid(struct task_struct *p, pid_t pgid)
-- 
cgit v1.2.3-70-g09d2


From 9d5ce73a64be2be8112147a3e0b551ad9cd1247b Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Tue, 10 Nov 2009 19:46:16 +0900
Subject: x86: intel-iommu: Convert detect_intel_iommu to use iommu_init hook

This changes detect_intel_iommu() to set intel_iommu_init() to
iommu_init hook if detect_intel_iommu() finds the IOMMU.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: chrisw@sous-sol.org
Cc: dwmw2@infradead.org
Cc: joerg.roedel@amd.com
Cc: muli@il.ibm.com
LKML-Reference: <1257849980-22640-6-git-send-email-fujita.tomonori@lab.ntt.co.jp>
[ -v2: build fix for the !CONFIG_DMAR case ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/pci-dma.c |  2 --
 drivers/pci/dmar.c        |  4 ++++
 include/linux/dmar.h      | 15 ++++-----------
 3 files changed, 8 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 5ca44a9301a0..bed05e2e5890 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -294,8 +294,6 @@ static int __init pci_iommu_init(void)
 
 	x86_init.iommu.iommu_init();
 
-	intel_iommu_init();
-
 	no_iommu_init();
 	return 0;
 }
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 22b02c6df854..bce9cd7c755a 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -616,6 +616,10 @@ void __init detect_intel_iommu(void)
 		if (ret && !no_iommu && !iommu_detected && !swiotlb &&
 		    !dmar_disabled)
 			iommu_detected = 1;
+#endif
+#ifdef CONFIG_X86
+		if (ret)
+			x86_init.iommu.iommu_init = intel_iommu_init;
 #endif
 	}
 	early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 4a2b162c256a..5de4c9e5856d 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -208,16 +208,9 @@ struct dmar_atsr_unit {
 	u8 include_all:1;		/* include all ports */
 };
 
-/* Intel DMAR  initialization functions */
 extern int intel_iommu_init(void);
-#else
-static inline int intel_iommu_init(void)
-{
-#ifdef CONFIG_INTR_REMAP
-	return dmar_dev_scope_init();
-#else
-	return -ENODEV;
-#endif
-}
-#endif /* !CONFIG_DMAR */
+#else /* !CONFIG_DMAR: */
+static inline int intel_iommu_init(void) { return -ENODEV; }
+#endif /* CONFIG_DMAR */
+
 #endif /* __DMAR_H__ */
-- 
cgit v1.2.3-70-g09d2


From 9f993ac3f708b661207ed7de521f245586217a68 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Tue, 10 Nov 2009 19:46:17 +0900
Subject: bootmem: Add free_bootmem_late()

Add a new function for freeing bootmem after the bootmem
allocator has been released and the unreserved pages given to
the page allocator.

This allows us to reserve bootmem and then release it if we
later discover it was not needed.

( This new API will be used by the swiotlb code to recover
  a significant amount of RAM (64MB). )

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: chrisw@sous-sol.org
Cc: dwmw2@infradead.org
Cc: joerg.roedel@amd.com
Cc: muli@il.ibm.com
Cc: hannes@cmpxchg.org
Cc: tj@kernel.org
Cc: akpm@linux-foundation.org
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <1257849980-22640-7-git-send-email-fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/bootmem.h |  1 +
 mm/bootmem.c            | 24 ++++++++++++++++++++++++
 2 files changed, 25 insertions(+)

(limited to 'include')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index dd97fb8408a8..b10ec49ee2dd 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -53,6 +53,7 @@ extern void free_bootmem_node(pg_data_t *pgdat,
 			      unsigned long addr,
 			      unsigned long size);
 extern void free_bootmem(unsigned long addr, unsigned long size);
+extern void free_bootmem_late(unsigned long addr, unsigned long size);
 
 /*
  * Flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE,
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 555d5d2731c6..d1dc23cc7f10 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -143,6 +143,30 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
 	return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages);
 }
 
+/*
+ * free_bootmem_late - free bootmem pages directly to page allocator
+ * @addr: starting address of the range
+ * @size: size of the range in bytes
+ *
+ * This is only useful when the bootmem allocator has already been torn
+ * down, but we are still initializing the system.  Pages are given directly
+ * to the page allocator, no bootmem metadata is updated because it is gone.
+ */
+void __init free_bootmem_late(unsigned long addr, unsigned long size)
+{
+	unsigned long cursor, end;
+
+	kmemleak_free_part(__va(addr), size);
+
+	cursor = PFN_UP(addr);
+	end = PFN_DOWN(addr + size);
+
+	for (; cursor < end; cursor++) {
+		__free_pages_bootmem(pfn_to_page(cursor), 0);
+		totalram_pages++;
+	}
+}
+
 static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
 {
 	int aligned;
-- 
cgit v1.2.3-70-g09d2


From 5740afdb68abadc473fd5392df733558a58c1254 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Tue, 10 Nov 2009 19:46:18 +0900
Subject: swiotlb: Add swiotlb_free() function

swiotlb_free() function frees all allocated memory for swiotlb.

We need to initialize swiotlb before IOMMU initialization (x86
and powerpc needs to allocate memory from bootmem allocator). If
IOMMU initialization is successful, we need to free swiotlb
resource (don't want to waste 64MB).

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: chrisw@sous-sol.org
Cc: dwmw2@infradead.org
Cc: joerg.roedel@amd.com
Cc: muli@il.ibm.com
LKML-Reference: <1257849980-22640-8-git-send-email-fujita.tomonori@lab.ntt.co.jp>
[ -v2: build fix for the !CONFIG_SWIOTLB case ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/swiotlb.h |  6 ++++++
 lib/swiotlb.c           | 30 ++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+)

(limited to 'include')

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 73b1f1cec423..59bafa690290 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -88,4 +88,10 @@ swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr);
 extern int
 swiotlb_dma_supported(struct device *hwdev, u64 mask);
 
+#ifdef CONFIG_SWIOTLB
+extern void __init swiotlb_free(void);
+#else
+static inline void swiotlb_free(void) { }
+#endif
+
 #endif /* __LINUX_SWIOTLB_H */
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index ac25cd28e807..eee512b63f17 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -97,6 +97,8 @@ static phys_addr_t *io_tlb_orig_addr;
  */
 static DEFINE_SPINLOCK(io_tlb_lock);
 
+static int late_alloc;
+
 static int __init
 setup_io_tlb_npages(char *str)
 {
@@ -262,6 +264,8 @@ swiotlb_late_init_with_default_size(size_t default_size)
 
 	swiotlb_print_info(bytes);
 
+	late_alloc = 1;
+
 	return 0;
 
 cleanup4:
@@ -281,6 +285,32 @@ cleanup1:
 	return -ENOMEM;
 }
 
+void __init swiotlb_free(void)
+{
+	if (!io_tlb_overflow_buffer)
+		return;
+
+	if (late_alloc) {
+		free_pages((unsigned long)io_tlb_overflow_buffer,
+			   get_order(io_tlb_overflow));
+		free_pages((unsigned long)io_tlb_orig_addr,
+			   get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
+		free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
+								 sizeof(int)));
+		free_pages((unsigned long)io_tlb_start,
+			   get_order(io_tlb_nslabs << IO_TLB_SHIFT));
+	} else {
+		free_bootmem_late(__pa(io_tlb_overflow_buffer),
+				  io_tlb_overflow);
+		free_bootmem_late(__pa(io_tlb_orig_addr),
+				  io_tlb_nslabs * sizeof(phys_addr_t));
+		free_bootmem_late(__pa(io_tlb_list),
+				  io_tlb_nslabs * sizeof(int));
+		free_bootmem_late(__pa(io_tlb_start),
+				  io_tlb_nslabs << IO_TLB_SHIFT);
+	}
+}
+
 static int is_swiotlb_buffer(phys_addr_t paddr)
 {
 	return paddr >= virt_to_phys(io_tlb_start) &&
-- 
cgit v1.2.3-70-g09d2


From ad32e8cb86e7894aac51c8963eaa9f36bb8a4e14 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Tue, 10 Nov 2009 19:46:19 +0900
Subject: swiotlb: Defer swiotlb init printing, export swiotlb_print_info()

This enables us to avoid printing swiotlb memory info when we
initialize swiotlb. After swiotlb initialization, we could find
that we don't need swiotlb.

This patch removes the code to print swiotlb memory info in
swiotlb_init() and exports the function to do that.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: chrisw@sous-sol.org
Cc: dwmw2@infradead.org
Cc: joerg.roedel@amd.com
Cc: muli@il.ibm.com
Cc: tony.luck@intel.com
Cc: benh@kernel.crashing.org
LKML-Reference: <1257849980-22640-9-git-send-email-fujita.tomonori@lab.ntt.co.jp>
[ -v2: merge up conflict ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/ia64/kernel/pci-swiotlb.c |  4 ++--
 arch/powerpc/kernel/setup_32.c |  2 +-
 arch/powerpc/kernel/setup_64.c |  2 +-
 arch/x86/kernel/pci-swiotlb.c  |  3 +--
 include/linux/swiotlb.h        |  4 ++--
 lib/swiotlb.c                  | 15 ++++++++-------
 6 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c
index 285aae8431c6..53292abf846c 100644
--- a/arch/ia64/kernel/pci-swiotlb.c
+++ b/arch/ia64/kernel/pci-swiotlb.c
@@ -41,7 +41,7 @@ struct dma_map_ops swiotlb_dma_ops = {
 void __init swiotlb_dma_init(void)
 {
 	dma_ops = &swiotlb_dma_ops;
-	swiotlb_init();
+	swiotlb_init(1);
 }
 
 void __init pci_swiotlb_init(void)
@@ -51,7 +51,7 @@ void __init pci_swiotlb_init(void)
 		swiotlb = 1;
 		printk(KERN_INFO "PCI-DMA: Re-initialize machine vector.\n");
 		machvec_init("dig");
-		swiotlb_init();
+		swiotlb_init(1);
 		dma_ops = &swiotlb_dma_ops;
 #else
 		panic("Unable to find Intel IOMMU");
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 53bcf3d792db..b152de3e64d4 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -345,7 +345,7 @@ void __init setup_arch(char **cmdline_p)
 
 #ifdef CONFIG_SWIOTLB
 	if (ppc_swiotlb_enable)
-		swiotlb_init();
+		swiotlb_init(1);
 #endif
 
 	paging_init();
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 04f638d82fb3..df2c9e932b37 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -550,7 +550,7 @@ void __init setup_arch(char **cmdline_p)
 
 #ifdef CONFIG_SWIOTLB
 	if (ppc_swiotlb_enable)
-		swiotlb_init();
+		swiotlb_init(1);
 #endif
 
 	paging_init();
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index aaa6b7839f1e..ea20ef7ca523 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -52,8 +52,7 @@ void __init pci_swiotlb_init(void)
 	if (swiotlb_force)
 		swiotlb = 1;
 	if (swiotlb) {
-		printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n");
-		swiotlb_init();
+		swiotlb_init(0);
 		dma_ops = &swiotlb_dma_ops;
 	}
 }
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 59bafa690290..eb9bdb4d4854 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -20,8 +20,7 @@ struct scatterlist;
  */
 #define IO_TLB_SHIFT 11
 
-extern void
-swiotlb_init(void);
+extern void swiotlb_init(int verbose);
 
 extern void
 *swiotlb_alloc_coherent(struct device *hwdev, size_t size,
@@ -94,4 +93,5 @@ extern void __init swiotlb_free(void);
 static inline void swiotlb_free(void) { }
 #endif
 
+extern void swiotlb_print_info(void);
 #endif /* __LINUX_SWIOTLB_H */
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index eee512b63f17..0c12d7cce300 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -123,8 +123,9 @@ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
 	return phys_to_dma(hwdev, virt_to_phys(address));
 }
 
-static void swiotlb_print_info(unsigned long bytes)
+void swiotlb_print_info(void)
 {
+	unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT;
 	phys_addr_t pstart, pend;
 
 	pstart = virt_to_phys(io_tlb_start);
@@ -142,7 +143,7 @@ static void swiotlb_print_info(unsigned long bytes)
  * structures for the software IO TLB used to implement the DMA API.
  */
 void __init
-swiotlb_init_with_default_size(size_t default_size)
+swiotlb_init_with_default_size(size_t default_size, int verbose)
 {
 	unsigned long i, bytes;
 
@@ -178,14 +179,14 @@ swiotlb_init_with_default_size(size_t default_size)
 	io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow);
 	if (!io_tlb_overflow_buffer)
 		panic("Cannot allocate SWIOTLB overflow buffer!\n");
-
-	swiotlb_print_info(bytes);
+	if (verbose)
+		swiotlb_print_info();
 }
 
 void __init
-swiotlb_init(void)
+swiotlb_init(int verbose)
 {
-	swiotlb_init_with_default_size(64 * (1<<20));	/* default to 64MB */
+	swiotlb_init_with_default_size(64 * (1<<20), verbose);	/* default to 64MB */
 }
 
 /*
@@ -262,7 +263,7 @@ swiotlb_late_init_with_default_size(size_t default_size)
 	if (!io_tlb_overflow_buffer)
 		goto cleanup4;
 
-	swiotlb_print_info(bytes);
+	swiotlb_print_info();
 
 	late_alloc = 1;
 
-- 
cgit v1.2.3-70-g09d2


From 67178767b936fb47a3a5e88097cff41ccbda7acb Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 13 Nov 2009 10:06:34 +0100
Subject: tracing: Rename 'lockdep' event subsystem into 'lock'

Lockdep events subsystem gathers various locking related events
such as a request, release, contention or acquisition of a lock.

The name of this event subsystem is a bit of a misnomer since
these events are not quite related to lockdep but more generally
to locking, ie: these events are not reporting lock dependencies
or possible deadlock scenario but pure locking events.

Hence this rename.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <1258103194-843-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/events/lock.h    | 96 ++++++++++++++++++++++++++++++++++++++++++
 include/trace/events/lockdep.h | 96 ------------------------------------------
 kernel/lockdep.c               |  2 +-
 3 files changed, 97 insertions(+), 97 deletions(-)
 create mode 100644 include/trace/events/lock.h
 delete mode 100644 include/trace/events/lockdep.h

(limited to 'include')

diff --git a/include/trace/events/lock.h b/include/trace/events/lock.h
new file mode 100644
index 000000000000..a870ba125aa8
--- /dev/null
+++ b/include/trace/events/lock.h
@@ -0,0 +1,96 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM lock
+
+#if !defined(_TRACE_LOCK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_LOCK_H
+
+#include <linux/lockdep.h>
+#include <linux/tracepoint.h>
+
+#ifdef CONFIG_LOCKDEP
+
+TRACE_EVENT(lock_acquire,
+
+	TP_PROTO(struct lockdep_map *lock, unsigned int subclass,
+		int trylock, int read, int check,
+		struct lockdep_map *next_lock, unsigned long ip),
+
+	TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip),
+
+	TP_STRUCT__entry(
+		__field(unsigned int, flags)
+		__string(name, lock->name)
+	),
+
+	TP_fast_assign(
+		__entry->flags = (trylock ? 1 : 0) | (read ? 2 : 0);
+		__assign_str(name, lock->name);
+	),
+
+	TP_printk("%s%s%s", (__entry->flags & 1) ? "try " : "",
+		  (__entry->flags & 2) ? "read " : "",
+		  __get_str(name))
+);
+
+TRACE_EVENT(lock_release,
+
+	TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip),
+
+	TP_ARGS(lock, nested, ip),
+
+	TP_STRUCT__entry(
+		__string(name, lock->name)
+	),
+
+	TP_fast_assign(
+		__assign_str(name, lock->name);
+	),
+
+	TP_printk("%s", __get_str(name))
+);
+
+#ifdef CONFIG_LOCK_STAT
+
+TRACE_EVENT(lock_contended,
+
+	TP_PROTO(struct lockdep_map *lock, unsigned long ip),
+
+	TP_ARGS(lock, ip),
+
+	TP_STRUCT__entry(
+		__string(name, lock->name)
+	),
+
+	TP_fast_assign(
+		__assign_str(name, lock->name);
+	),
+
+	TP_printk("%s", __get_str(name))
+);
+
+TRACE_EVENT(lock_acquired,
+	TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime),
+
+	TP_ARGS(lock, ip, waittime),
+
+	TP_STRUCT__entry(
+		__string(name, lock->name)
+		__field(unsigned long, wait_usec)
+		__field(unsigned long, wait_nsec_rem)
+	),
+	TP_fast_assign(
+		__assign_str(name, lock->name);
+		__entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC);
+		__entry->wait_usec = (unsigned long) waittime;
+	),
+	TP_printk("%s (%lu.%03lu us)", __get_str(name), __entry->wait_usec,
+				       __entry->wait_nsec_rem)
+);
+
+#endif
+#endif
+
+#endif /* _TRACE_LOCK_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/lockdep.h b/include/trace/events/lockdep.h
deleted file mode 100644
index bcf1d209a00d..000000000000
--- a/include/trace/events/lockdep.h
+++ /dev/null
@@ -1,96 +0,0 @@
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM lockdep
-
-#if !defined(_TRACE_LOCKDEP_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_LOCKDEP_H
-
-#include <linux/lockdep.h>
-#include <linux/tracepoint.h>
-
-#ifdef CONFIG_LOCKDEP
-
-TRACE_EVENT(lock_acquire,
-
-	TP_PROTO(struct lockdep_map *lock, unsigned int subclass,
-		int trylock, int read, int check,
-		struct lockdep_map *next_lock, unsigned long ip),
-
-	TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip),
-
-	TP_STRUCT__entry(
-		__field(unsigned int, flags)
-		__string(name, lock->name)
-	),
-
-	TP_fast_assign(
-		__entry->flags = (trylock ? 1 : 0) | (read ? 2 : 0);
-		__assign_str(name, lock->name);
-	),
-
-	TP_printk("%s%s%s", (__entry->flags & 1) ? "try " : "",
-		  (__entry->flags & 2) ? "read " : "",
-		  __get_str(name))
-);
-
-TRACE_EVENT(lock_release,
-
-	TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip),
-
-	TP_ARGS(lock, nested, ip),
-
-	TP_STRUCT__entry(
-		__string(name, lock->name)
-	),
-
-	TP_fast_assign(
-		__assign_str(name, lock->name);
-	),
-
-	TP_printk("%s", __get_str(name))
-);
-
-#ifdef CONFIG_LOCK_STAT
-
-TRACE_EVENT(lock_contended,
-
-	TP_PROTO(struct lockdep_map *lock, unsigned long ip),
-
-	TP_ARGS(lock, ip),
-
-	TP_STRUCT__entry(
-		__string(name, lock->name)
-	),
-
-	TP_fast_assign(
-		__assign_str(name, lock->name);
-	),
-
-	TP_printk("%s", __get_str(name))
-);
-
-TRACE_EVENT(lock_acquired,
-	TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime),
-
-	TP_ARGS(lock, ip, waittime),
-
-	TP_STRUCT__entry(
-		__string(name, lock->name)
-		__field(unsigned long, wait_usec)
-		__field(unsigned long, wait_nsec_rem)
-	),
-	TP_fast_assign(
-		__assign_str(name, lock->name);
-		__entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC);
-		__entry->wait_usec = (unsigned long) waittime;
-	),
-	TP_printk("%s (%lu.%03lu us)", __get_str(name), __entry->wait_usec,
-				       __entry->wait_nsec_rem)
-);
-
-#endif
-#endif
-
-#endif /* _TRACE_LOCKDEP_H */
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 9af56723c096..f5dcd36d3151 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -49,7 +49,7 @@
 #include "lockdep_internals.h"
 
 #define CREATE_TRACE_POINTS
-#include <trace/events/lockdep.h>
+#include <trace/events/lock.h>
 
 #ifdef CONFIG_PROVE_LOCKING
 int prove_locking = 1;
-- 
cgit v1.2.3-70-g09d2


From 6beb000923882f6204ea2cfcd932e568e900803f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 9 Nov 2009 15:21:34 +0000
Subject: locking: Make inlining decision Kconfig based

commit 892a7c67 (locking: Allow arch-inlined spinlocks) implements the
selection of which lock functions are inlined based on defines in
arch/.../spinlock.h: #define __always_inline__LOCK_FUNCTION

Despite of the name __always_inline__* the lock functions can be built
out of line depending on config options. Also if the arch does not set
some inline defines the generic code might set them; again depending on
config options.

This makes it unnecessary hard to figure out when and which lock
functions are inlined. Aside of that it makes it way harder and
messier for -rt to manipulate the lock functions.

Convert the inlining decision to CONFIG switches. Each lock function
is inlined depending on CONFIG_INLINE_*. The configs implement the
existing dependencies. The architecture code can select ARCH_INLINE_*
to signal that it wants the corresponding lock function inlined.
ARCH_INLINE_* is necessary as Kconfig ignores "depends on"
restrictions when a config element is selected.

No functional change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <20091109151428.504477141@linutronix.de>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 arch/s390/Kconfig                |  28 ++++++
 arch/s390/include/asm/spinlock.h |  29 ------
 include/linux/spinlock_api_smp.h |  75 ++++++---------
 init/Kconfig                     |   1 +
 kernel/Kconfig.locks             | 199 +++++++++++++++++++++++++++++++++++++++
 kernel/spinlock.c                |  56 +++++------
 6 files changed, 284 insertions(+), 104 deletions(-)
 create mode 100644 kernel/Kconfig.locks

(limited to 'include')

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 43c0acad7160..16c673096a22 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -95,6 +95,34 @@ config S390
 	select HAVE_ARCH_TRACEHOOK
 	select INIT_ALL_POSSIBLE
 	select HAVE_PERF_EVENTS
+	select ARCH_INLINE_SPIN_TRYLOCK
+	select ARCH_INLINE_SPIN_TRYLOCK_BH
+	select ARCH_INLINE_SPIN_LOCK
+	select ARCH_INLINE_SPIN_LOCK_BH
+	select ARCH_INLINE_SPIN_LOCK_IRQ
+	select ARCH_INLINE_SPIN_LOCK_IRQSAVE
+	select ARCH_INLINE_SPIN_UNLOCK
+	select ARCH_INLINE_SPIN_UNLOCK_BH
+	select ARCH_INLINE_SPIN_UNLOCK_IRQ
+	select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE
+	select ARCH_INLINE_READ_TRYLOCK
+	select ARCH_INLINE_READ_LOCK
+	select ARCH_INLINE_READ_LOCK_BH
+	select ARCH_INLINE_READ_LOCK_IRQ
+	select ARCH_INLINE_READ_LOCK_IRQSAVE
+	select ARCH_INLINE_READ_UNLOCK
+	select ARCH_INLINE_READ_UNLOCK_BH
+	select ARCH_INLINE_READ_UNLOCK_IRQ
+	select ARCH_INLINE_READ_UNLOCK_IRQRESTORE
+	select ARCH_INLINE_WRITE_TRYLOCK
+	select ARCH_INLINE_WRITE_LOCK
+	select ARCH_INLINE_WRITE_LOCK_BH
+	select ARCH_INLINE_WRITE_LOCK_IRQ
+	select ARCH_INLINE_WRITE_LOCK_IRQSAVE
+	select ARCH_INLINE_WRITE_UNLOCK
+	select ARCH_INLINE_WRITE_UNLOCK_BH
+	select ARCH_INLINE_WRITE_UNLOCK_IRQ
+	select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
 
 config SCHED_OMIT_FRAME_POINTER
 	bool
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index 41ce6861174e..c9af0d19c7ab 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -191,33 +191,4 @@ static inline int __raw_write_trylock(raw_rwlock_t *rw)
 #define _raw_read_relax(lock)	cpu_relax()
 #define _raw_write_relax(lock)	cpu_relax()
 
-#define __always_inline__spin_lock
-#define __always_inline__read_lock
-#define __always_inline__write_lock
-#define __always_inline__spin_lock_bh
-#define __always_inline__read_lock_bh
-#define __always_inline__write_lock_bh
-#define __always_inline__spin_lock_irq
-#define __always_inline__read_lock_irq
-#define __always_inline__write_lock_irq
-#define __always_inline__spin_lock_irqsave
-#define __always_inline__read_lock_irqsave
-#define __always_inline__write_lock_irqsave
-#define __always_inline__spin_trylock
-#define __always_inline__read_trylock
-#define __always_inline__write_trylock
-#define __always_inline__spin_trylock_bh
-#define __always_inline__spin_unlock
-#define __always_inline__read_unlock
-#define __always_inline__write_unlock
-#define __always_inline__spin_unlock_bh
-#define __always_inline__read_unlock_bh
-#define __always_inline__write_unlock_bh
-#define __always_inline__spin_unlock_irq
-#define __always_inline__read_unlock_irq
-#define __always_inline__write_unlock_irq
-#define __always_inline__spin_unlock_irqrestore
-#define __always_inline__read_unlock_irqrestore
-#define __always_inline__write_unlock_irqrestore
-
 #endif /* __ASM_SPINLOCK_H */
diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
index 7a7e18fc2415..8264a7f459bc 100644
--- a/include/linux/spinlock_api_smp.h
+++ b/include/linux/spinlock_api_smp.h
@@ -60,137 +60,118 @@ void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 							__releases(lock);
 
-/*
- * We inline the unlock functions in the nondebug case:
- */
-#if !defined(CONFIG_DEBUG_SPINLOCK) && !defined(CONFIG_PREEMPT)
-#define __always_inline__spin_unlock
-#define __always_inline__read_unlock
-#define __always_inline__write_unlock
-#define __always_inline__spin_unlock_irq
-#define __always_inline__read_unlock_irq
-#define __always_inline__write_unlock_irq
-#endif
-
-#ifndef CONFIG_DEBUG_SPINLOCK
-#ifndef CONFIG_GENERIC_LOCKBREAK
-
-#ifdef __always_inline__spin_lock
+#ifdef CONFIG_INLINE_SPIN_LOCK
 #define _spin_lock(lock) __spin_lock(lock)
 #endif
 
-#ifdef __always_inline__read_lock
+#ifdef CONFIG_INLINE_READ_LOCK
 #define _read_lock(lock) __read_lock(lock)
 #endif
 
-#ifdef __always_inline__write_lock
+#ifdef CONFIG_INLINE_WRITE_LOCK
 #define _write_lock(lock) __write_lock(lock)
 #endif
 
-#ifdef __always_inline__spin_lock_bh
+#ifdef CONFIG_INLINE_SPIN_LOCK_BH
 #define _spin_lock_bh(lock) __spin_lock_bh(lock)
 #endif
 
-#ifdef __always_inline__read_lock_bh
+#ifdef CONFIG_INLINE_READ_LOCK_BH
 #define _read_lock_bh(lock) __read_lock_bh(lock)
 #endif
 
-#ifdef __always_inline__write_lock_bh
+#ifdef CONFIG_INLINE_WRITE_LOCK_BH
 #define _write_lock_bh(lock) __write_lock_bh(lock)
 #endif
 
-#ifdef __always_inline__spin_lock_irq
+#ifdef CONFIG_INLINE_SPIN_LOCK_IRQ
 #define _spin_lock_irq(lock) __spin_lock_irq(lock)
 #endif
 
-#ifdef __always_inline__read_lock_irq
+#ifdef CONFIG_INLINE_READ_LOCK_IRQ
 #define _read_lock_irq(lock) __read_lock_irq(lock)
 #endif
 
-#ifdef __always_inline__write_lock_irq
+#ifdef CONFIG_INLINE_WRITE_LOCK_IRQ
 #define _write_lock_irq(lock) __write_lock_irq(lock)
 #endif
 
-#ifdef __always_inline__spin_lock_irqsave
+#ifdef CONFIG_INLINE_SPIN_LOCK_IRQSAVE
 #define _spin_lock_irqsave(lock) __spin_lock_irqsave(lock)
 #endif
 
-#ifdef __always_inline__read_lock_irqsave
+#ifdef CONFIG_INLINE_READ_LOCK_IRQSAVE
 #define _read_lock_irqsave(lock) __read_lock_irqsave(lock)
 #endif
 
-#ifdef __always_inline__write_lock_irqsave
+#ifdef CONFIG_INLINE_WRITE_LOCK_IRQSAVE
 #define _write_lock_irqsave(lock) __write_lock_irqsave(lock)
 #endif
 
-#endif /* !CONFIG_GENERIC_LOCKBREAK */
-
-#ifdef __always_inline__spin_trylock
+#ifdef CONFIG_INLINE_SPIN_TRYLOCK
 #define _spin_trylock(lock) __spin_trylock(lock)
 #endif
 
-#ifdef __always_inline__read_trylock
+#ifdef CONFIG_INLINE_READ_TRYLOCK
 #define _read_trylock(lock) __read_trylock(lock)
 #endif
 
-#ifdef __always_inline__write_trylock
+#ifdef CONFIG_INLINE_WRITE_TRYLOCK
 #define _write_trylock(lock) __write_trylock(lock)
 #endif
 
-#ifdef __always_inline__spin_trylock_bh
+#ifdef CONFIG_INLINE_SPIN_TRYLOCK_BH
 #define _spin_trylock_bh(lock) __spin_trylock_bh(lock)
 #endif
 
-#ifdef __always_inline__spin_unlock
+#ifdef CONFIG_INLINE_SPIN_UNLOCK
 #define _spin_unlock(lock) __spin_unlock(lock)
 #endif
 
-#ifdef __always_inline__read_unlock
+#ifdef CONFIG_INLINE_READ_UNLOCK
 #define _read_unlock(lock) __read_unlock(lock)
 #endif
 
-#ifdef __always_inline__write_unlock
+#ifdef CONFIG_INLINE_WRITE_UNLOCK
 #define _write_unlock(lock) __write_unlock(lock)
 #endif
 
-#ifdef __always_inline__spin_unlock_bh
+#ifdef CONFIG_INLINE_SPIN_UNLOCK_BH
 #define _spin_unlock_bh(lock) __spin_unlock_bh(lock)
 #endif
 
-#ifdef __always_inline__read_unlock_bh
+#ifdef CONFIG_INLINE_READ_UNLOCK_BH
 #define _read_unlock_bh(lock) __read_unlock_bh(lock)
 #endif
 
-#ifdef __always_inline__write_unlock_bh
+#ifdef CONFIG_INLINE_WRITE_UNLOCK_BH
 #define _write_unlock_bh(lock) __write_unlock_bh(lock)
 #endif
 
-#ifdef __always_inline__spin_unlock_irq
+#ifdef CONFIG_INLINE_SPIN_UNLOCK_IRQ
 #define _spin_unlock_irq(lock) __spin_unlock_irq(lock)
 #endif
 
-#ifdef __always_inline__read_unlock_irq
+#ifdef CONFIG_INLINE_READ_UNLOCK_IRQ
 #define _read_unlock_irq(lock) __read_unlock_irq(lock)
 #endif
 
-#ifdef __always_inline__write_unlock_irq
+#ifdef CONFIG_INLINE_WRITE_UNLOCK_IRQ
 #define _write_unlock_irq(lock) __write_unlock_irq(lock)
 #endif
 
-#ifdef __always_inline__spin_unlock_irqrestore
+#ifdef CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE
 #define _spin_unlock_irqrestore(lock, flags) __spin_unlock_irqrestore(lock, flags)
 #endif
 
-#ifdef __always_inline__read_unlock_irqrestore
+#ifdef CONFIG_INLINE_READ_UNLOCK_IRQRESTORE
 #define _read_unlock_irqrestore(lock, flags) __read_unlock_irqrestore(lock, flags)
 #endif
 
-#ifdef __always_inline__write_unlock_irqrestore
+#ifdef CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE
 #define _write_unlock_irqrestore(lock, flags) __write_unlock_irqrestore(lock, flags)
 #endif
 
-#endif /* CONFIG_DEBUG_SPINLOCK */
-
 static inline int __spin_trylock(spinlock_t *lock)
 {
 	preempt_disable();
diff --git a/init/Kconfig b/init/Kconfig
index 9e03ef8b311e..06863dd7bf49 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1210,3 +1210,4 @@ source "block/Kconfig"
 config PREEMPT_NOTIFIERS
 	bool
 
+source "kernel/Kconfig.locks"
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
new file mode 100644
index 000000000000..d1f86db71451
--- /dev/null
+++ b/kernel/Kconfig.locks
@@ -0,0 +1,199 @@
+#
+# The ARCH_INLINE foo is necessary because select ignores "depends on"
+#
+config ARCH_INLINE_SPIN_TRYLOCK
+	bool
+
+config ARCH_INLINE_SPIN_TRYLOCK_BH
+	bool
+
+config ARCH_INLINE_SPIN_LOCK
+	bool
+
+config ARCH_INLINE_SPIN_LOCK_BH
+	bool
+
+config ARCH_INLINE_SPIN_LOCK_IRQ
+	bool
+
+config ARCH_INLINE_SPIN_LOCK_IRQSAVE
+	bool
+
+config ARCH_INLINE_SPIN_UNLOCK
+	bool
+
+config ARCH_INLINE_SPIN_UNLOCK_BH
+	bool
+
+config ARCH_INLINE_SPIN_UNLOCK_IRQ
+	bool
+
+config ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE
+	bool
+
+
+config ARCH_INLINE_READ_TRYLOCK
+	bool
+
+config ARCH_INLINE_READ_LOCK
+	bool
+
+config ARCH_INLINE_READ_LOCK_BH
+	bool
+
+config ARCH_INLINE_READ_LOCK_IRQ
+	bool
+
+config ARCH_INLINE_READ_LOCK_IRQSAVE
+	bool
+
+config ARCH_INLINE_READ_UNLOCK
+	bool
+
+config ARCH_INLINE_READ_UNLOCK_BH
+	bool
+
+config ARCH_INLINE_READ_UNLOCK_IRQ
+	bool
+
+config ARCH_INLINE_READ_UNLOCK_IRQRESTORE
+	bool
+
+
+config ARCH_INLINE_WRITE_TRYLOCK
+	bool
+
+config ARCH_INLINE_WRITE_LOCK
+	bool
+
+config ARCH_INLINE_WRITE_LOCK_BH
+	bool
+
+config ARCH_INLINE_WRITE_LOCK_IRQ
+	bool
+
+config ARCH_INLINE_WRITE_LOCK_IRQSAVE
+	bool
+
+config ARCH_INLINE_WRITE_UNLOCK
+	bool
+
+config ARCH_INLINE_WRITE_UNLOCK_BH
+	bool
+
+config ARCH_INLINE_WRITE_UNLOCK_IRQ
+	bool
+
+config ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
+	bool
+
+#
+# lock_* functions are inlined when:
+#   - DEBUG_SPINLOCK=n and GENERIC_LOCKBREAK=n and ARCH_INLINE_*LOCK=y
+#
+# trylock_* functions are inlined when:
+#   - DEBUG_SPINLOCK=n and ARCH_INLINE_*LOCK=y
+#
+# unlock and unlock_irq functions are inlined when:
+#   - DEBUG_SPINLOCK=n and ARCH_INLINE_*LOCK=y
+#  or
+#   - DEBUG_SPINLOCK=n and PREEMPT=n
+#
+# unlock_bh and unlock_irqrestore functions are inlined when:
+#   - DEBUG_SPINLOCK=n and ARCH_INLINE_*LOCK=y
+#
+
+config INLINE_SPIN_TRYLOCK
+	def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_TRYLOCK
+
+config INLINE_SPIN_TRYLOCK_BH
+	def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_TRYLOCK_BH
+
+config INLINE_SPIN_LOCK
+	def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && ARCH_INLINE_SPIN_LOCK
+
+config INLINE_SPIN_LOCK_BH
+	def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \
+		 ARCH_INLINE_SPIN_LOCK_BH
+
+config INLINE_SPIN_LOCK_IRQ
+	def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \
+		 ARCH_INLINE_SPIN_LOCK_IRQ
+
+config INLINE_SPIN_LOCK_IRQSAVE
+	def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \
+		 ARCH_INLINE_SPIN_LOCK_IRQSAVE
+
+config INLINE_SPIN_UNLOCK
+	def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_SPIN_UNLOCK)
+
+config INLINE_SPIN_UNLOCK_BH
+	def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_UNLOCK_BH
+
+config INLINE_SPIN_UNLOCK_IRQ
+	def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_SPIN_UNLOCK_BH)
+
+config INLINE_SPIN_UNLOCK_IRQRESTORE
+	def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE
+
+
+config INLINE_READ_TRYLOCK
+	def_bool !DEBUG_SPINLOCK && ARCH_INLINE_READ_TRYLOCK
+
+config INLINE_READ_LOCK
+	def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && ARCH_INLINE_READ_LOCK
+
+config INLINE_READ_LOCK_BH
+	def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \
+		 ARCH_INLINE_READ_LOCK_BH
+
+config INLINE_READ_LOCK_IRQ
+	def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \
+		 ARCH_INLINE_READ_LOCK_IRQ
+
+config INLINE_READ_LOCK_IRQSAVE
+	def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \
+		 ARCH_INLINE_READ_LOCK_IRQSAVE
+
+config INLINE_READ_UNLOCK
+	def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_READ_UNLOCK)
+
+config INLINE_READ_UNLOCK_BH
+	def_bool !DEBUG_SPINLOCK && ARCH_INLINE_READ_UNLOCK_BH
+
+config INLINE_READ_UNLOCK_IRQ
+	def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_READ_UNLOCK_BH)
+
+config INLINE_READ_UNLOCK_IRQRESTORE
+	def_bool !DEBUG_SPINLOCK && ARCH_INLINE_READ_UNLOCK_IRQRESTORE
+
+
+config INLINE_WRITE_TRYLOCK
+	def_bool !DEBUG_SPINLOCK && ARCH_INLINE_WRITE_TRYLOCK
+
+config INLINE_WRITE_LOCK
+	def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && ARCH_INLINE_WRITE_LOCK
+
+config INLINE_WRITE_LOCK_BH
+	def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \
+		 ARCH_INLINE_WRITE_LOCK_BH
+
+config INLINE_WRITE_LOCK_IRQ
+	def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \
+		 ARCH_INLINE_WRITE_LOCK_IRQ
+
+config INLINE_WRITE_LOCK_IRQSAVE
+	def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \
+		 ARCH_INLINE_WRITE_LOCK_IRQSAVE
+
+config INLINE_WRITE_UNLOCK
+	def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_WRITE_UNLOCK)
+
+config INLINE_WRITE_UNLOCK_BH
+	def_bool !DEBUG_SPINLOCK && ARCH_INLINE_WRITE_UNLOCK_BH
+
+config INLINE_WRITE_UNLOCK_IRQ
+	def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_WRITE_UNLOCK_BH)
+
+config INLINE_WRITE_UNLOCK_IRQRESTORE
+	def_bool !DEBUG_SPINLOCK && ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index 5ddab730cb2f..235a9579a875 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -21,7 +21,7 @@
 #include <linux/debug_locks.h>
 #include <linux/module.h>
 
-#ifndef _spin_trylock
+#ifndef CONFIG_INLINE_SPIN_TRYLOCK
 int __lockfunc _spin_trylock(spinlock_t *lock)
 {
 	return __spin_trylock(lock);
@@ -29,7 +29,7 @@ int __lockfunc _spin_trylock(spinlock_t *lock)
 EXPORT_SYMBOL(_spin_trylock);
 #endif
 
-#ifndef _read_trylock
+#ifndef CONFIG_INLINE_READ_TRYLOCK
 int __lockfunc _read_trylock(rwlock_t *lock)
 {
 	return __read_trylock(lock);
@@ -37,7 +37,7 @@ int __lockfunc _read_trylock(rwlock_t *lock)
 EXPORT_SYMBOL(_read_trylock);
 #endif
 
-#ifndef _write_trylock
+#ifndef CONFIG_INLINE_WRITE_TRYLOCK
 int __lockfunc _write_trylock(rwlock_t *lock)
 {
 	return __write_trylock(lock);
@@ -52,7 +52,7 @@ EXPORT_SYMBOL(_write_trylock);
  */
 #if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC)
 
-#ifndef _read_lock
+#ifndef CONFIG_INLINE_READ_LOCK
 void __lockfunc _read_lock(rwlock_t *lock)
 {
 	__read_lock(lock);
@@ -60,7 +60,7 @@ void __lockfunc _read_lock(rwlock_t *lock)
 EXPORT_SYMBOL(_read_lock);
 #endif
 
-#ifndef _spin_lock_irqsave
+#ifndef CONFIG_INLINE_SPIN_LOCK_IRQSAVE
 unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
 {
 	return __spin_lock_irqsave(lock);
@@ -68,7 +68,7 @@ unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
 EXPORT_SYMBOL(_spin_lock_irqsave);
 #endif
 
-#ifndef _spin_lock_irq
+#ifndef CONFIG_INLINE_SPIN_LOCK_IRQ
 void __lockfunc _spin_lock_irq(spinlock_t *lock)
 {
 	__spin_lock_irq(lock);
@@ -76,7 +76,7 @@ void __lockfunc _spin_lock_irq(spinlock_t *lock)
 EXPORT_SYMBOL(_spin_lock_irq);
 #endif
 
-#ifndef _spin_lock_bh
+#ifndef CONFIG_INLINE_SPIN_LOCK_BH
 void __lockfunc _spin_lock_bh(spinlock_t *lock)
 {
 	__spin_lock_bh(lock);
@@ -84,7 +84,7 @@ void __lockfunc _spin_lock_bh(spinlock_t *lock)
 EXPORT_SYMBOL(_spin_lock_bh);
 #endif
 
-#ifndef _read_lock_irqsave
+#ifndef CONFIG_INLINE_READ_LOCK_IRQSAVE
 unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)
 {
 	return __read_lock_irqsave(lock);
@@ -92,7 +92,7 @@ unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)
 EXPORT_SYMBOL(_read_lock_irqsave);
 #endif
 
-#ifndef _read_lock_irq
+#ifndef CONFIG_INLINE_READ_LOCK_IRQ
 void __lockfunc _read_lock_irq(rwlock_t *lock)
 {
 	__read_lock_irq(lock);
@@ -100,7 +100,7 @@ void __lockfunc _read_lock_irq(rwlock_t *lock)
 EXPORT_SYMBOL(_read_lock_irq);
 #endif
 
-#ifndef _read_lock_bh
+#ifndef CONFIG_INLINE_READ_LOCK_BH
 void __lockfunc _read_lock_bh(rwlock_t *lock)
 {
 	__read_lock_bh(lock);
@@ -108,7 +108,7 @@ void __lockfunc _read_lock_bh(rwlock_t *lock)
 EXPORT_SYMBOL(_read_lock_bh);
 #endif
 
-#ifndef _write_lock_irqsave
+#ifndef CONFIG_INLINE_WRITE_LOCK_IRQSAVE
 unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)
 {
 	return __write_lock_irqsave(lock);
@@ -116,7 +116,7 @@ unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)
 EXPORT_SYMBOL(_write_lock_irqsave);
 #endif
 
-#ifndef _write_lock_irq
+#ifndef CONFIG_INLINE_WRITE_LOCK_IRQ
 void __lockfunc _write_lock_irq(rwlock_t *lock)
 {
 	__write_lock_irq(lock);
@@ -124,7 +124,7 @@ void __lockfunc _write_lock_irq(rwlock_t *lock)
 EXPORT_SYMBOL(_write_lock_irq);
 #endif
 
-#ifndef _write_lock_bh
+#ifndef CONFIG_INLINE_WRITE_LOCK_BH
 void __lockfunc _write_lock_bh(rwlock_t *lock)
 {
 	__write_lock_bh(lock);
@@ -132,7 +132,7 @@ void __lockfunc _write_lock_bh(rwlock_t *lock)
 EXPORT_SYMBOL(_write_lock_bh);
 #endif
 
-#ifndef _spin_lock
+#ifndef CONFIG_INLINE_SPIN_LOCK
 void __lockfunc _spin_lock(spinlock_t *lock)
 {
 	__spin_lock(lock);
@@ -140,7 +140,7 @@ void __lockfunc _spin_lock(spinlock_t *lock)
 EXPORT_SYMBOL(_spin_lock);
 #endif
 
-#ifndef _write_lock
+#ifndef CONFIG_INLINE_WRITE_LOCK
 void __lockfunc _write_lock(rwlock_t *lock)
 {
 	__write_lock(lock);
@@ -272,7 +272,7 @@ EXPORT_SYMBOL(_spin_lock_nest_lock);
 
 #endif
 
-#ifndef _spin_unlock
+#ifndef CONFIG_INLINE_SPIN_UNLOCK
 void __lockfunc _spin_unlock(spinlock_t *lock)
 {
 	__spin_unlock(lock);
@@ -280,7 +280,7 @@ void __lockfunc _spin_unlock(spinlock_t *lock)
 EXPORT_SYMBOL(_spin_unlock);
 #endif
 
-#ifndef _write_unlock
+#ifndef CONFIG_INLINE_WRITE_UNLOCK
 void __lockfunc _write_unlock(rwlock_t *lock)
 {
 	__write_unlock(lock);
@@ -288,7 +288,7 @@ void __lockfunc _write_unlock(rwlock_t *lock)
 EXPORT_SYMBOL(_write_unlock);
 #endif
 
-#ifndef _read_unlock
+#ifndef CONFIG_INLINE_READ_UNLOCK
 void __lockfunc _read_unlock(rwlock_t *lock)
 {
 	__read_unlock(lock);
@@ -296,7 +296,7 @@ void __lockfunc _read_unlock(rwlock_t *lock)
 EXPORT_SYMBOL(_read_unlock);
 #endif
 
-#ifndef _spin_unlock_irqrestore
+#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE
 void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
 {
 	__spin_unlock_irqrestore(lock, flags);
@@ -304,7 +304,7 @@ void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
 EXPORT_SYMBOL(_spin_unlock_irqrestore);
 #endif
 
-#ifndef _spin_unlock_irq
+#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQ
 void __lockfunc _spin_unlock_irq(spinlock_t *lock)
 {
 	__spin_unlock_irq(lock);
@@ -312,7 +312,7 @@ void __lockfunc _spin_unlock_irq(spinlock_t *lock)
 EXPORT_SYMBOL(_spin_unlock_irq);
 #endif
 
-#ifndef _spin_unlock_bh
+#ifndef CONFIG_INLINE_SPIN_UNLOCK_BH
 void __lockfunc _spin_unlock_bh(spinlock_t *lock)
 {
 	__spin_unlock_bh(lock);
@@ -320,7 +320,7 @@ void __lockfunc _spin_unlock_bh(spinlock_t *lock)
 EXPORT_SYMBOL(_spin_unlock_bh);
 #endif
 
-#ifndef _read_unlock_irqrestore
+#ifndef CONFIG_INLINE_READ_UNLOCK_IRQRESTORE
 void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 {
 	__read_unlock_irqrestore(lock, flags);
@@ -328,7 +328,7 @@ void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 EXPORT_SYMBOL(_read_unlock_irqrestore);
 #endif
 
-#ifndef _read_unlock_irq
+#ifndef CONFIG_INLINE_READ_UNLOCK_IRQ
 void __lockfunc _read_unlock_irq(rwlock_t *lock)
 {
 	__read_unlock_irq(lock);
@@ -336,7 +336,7 @@ void __lockfunc _read_unlock_irq(rwlock_t *lock)
 EXPORT_SYMBOL(_read_unlock_irq);
 #endif
 
-#ifndef _read_unlock_bh
+#ifndef CONFIG_INLINE_READ_UNLOCK_BH
 void __lockfunc _read_unlock_bh(rwlock_t *lock)
 {
 	__read_unlock_bh(lock);
@@ -344,7 +344,7 @@ void __lockfunc _read_unlock_bh(rwlock_t *lock)
 EXPORT_SYMBOL(_read_unlock_bh);
 #endif
 
-#ifndef _write_unlock_irqrestore
+#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE
 void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 {
 	__write_unlock_irqrestore(lock, flags);
@@ -352,7 +352,7 @@ void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 EXPORT_SYMBOL(_write_unlock_irqrestore);
 #endif
 
-#ifndef _write_unlock_irq
+#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQ
 void __lockfunc _write_unlock_irq(rwlock_t *lock)
 {
 	__write_unlock_irq(lock);
@@ -360,7 +360,7 @@ void __lockfunc _write_unlock_irq(rwlock_t *lock)
 EXPORT_SYMBOL(_write_unlock_irq);
 #endif
 
-#ifndef _write_unlock_bh
+#ifndef CONFIG_INLINE_WRITE_UNLOCK_BH
 void __lockfunc _write_unlock_bh(rwlock_t *lock)
 {
 	__write_unlock_bh(lock);
@@ -368,7 +368,7 @@ void __lockfunc _write_unlock_bh(rwlock_t *lock)
 EXPORT_SYMBOL(_write_unlock_bh);
 #endif
 
-#ifndef _spin_trylock_bh
+#ifndef CONFIG_INLINE_SPIN_TRYLOCK_BH
 int __lockfunc _spin_trylock_bh(spinlock_t *lock)
 {
 	return __spin_trylock_bh(lock);
-- 
cgit v1.2.3-70-g09d2


From 687b16fb617bd446439425a368ad7c7bbd202c73 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 13 Nov 2009 13:16:15 +0100
Subject: hw-breakpoints: Provide an off-case for counter_arch_bp()

If an arch doesn't support the hw breakpoints, counter_arch_bp()
has no off case to cover the missing breakpoint info structure
from the perf event. The result is a build error in non-x86
configs.

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Prasad <prasad@linux.vnet.ibm.com>
LKML-Reference: <1258114575-32655-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Prasad <prasad@linux.vnet.ibm.com>
---
 include/linux/hw_breakpoint.h | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 7eba9b92e5f3..18710e0c84bd 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -16,11 +16,6 @@ enum {
 	HW_BREAKPOINT_X = 4,
 };
 
-static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
-{
-	return &bp->hw.info;
-}
-
 static inline unsigned long hw_breakpoint_addr(struct perf_event *bp)
 {
 	return bp->attr.bp_addr;
@@ -83,6 +78,11 @@ extern void release_bp_slot(struct perf_event *bp);
 
 extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk);
 
+static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
+{
+	return &bp->hw.info;
+}
+
 #else /* !CONFIG_HAVE_HW_BREAKPOINT */
 
 static inline struct perf_event *
@@ -126,6 +126,11 @@ static inline void release_bp_slot(struct perf_event *bp) 		{ }
 
 static inline void flush_ptrace_hw_breakpoint(struct task_struct *tsk)	{ }
 
+static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
+{
+	return NULL;
+}
+
 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
 
 #endif /* _LINUX_HW_BREAKPOINT_H */
-- 
cgit v1.2.3-70-g09d2


From 688bcaff291cf2fe2734e43f2793d4d05b850518 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 14 Nov 2009 01:12:47 +0100
Subject: hw-breakpoints: Fix build on !perf architectures

the arch/alpha build fails with:

 In file included from tip/kernel/exit.c:52:
 tip/include/linux/hw_breakpoint.h: In function 'hw_breakpoint_addr':
 tip/include/linux/hw_breakpoint.h:21: error: 'struct perf_event' has no member named 'attr'
 [...]

Move these helper inlines inside the CONFIG_HAVE_HW_BREAKPOINT ifdef.

Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Prasad <prasad@linux.vnet.ibm.com>
LKML-Reference: <1258114575-32655-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/hw_breakpoint.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 18710e0c84bd..0b98cbf76da7 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -16,6 +16,8 @@ enum {
 	HW_BREAKPOINT_X = 4,
 };
 
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+
 static inline unsigned long hw_breakpoint_addr(struct perf_event *bp)
 {
 	return bp->attr.bp_addr;
@@ -31,7 +33,6 @@ static inline int hw_breakpoint_len(struct perf_event *bp)
 	return bp->attr.bp_len;
 }
 
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
 extern struct perf_event *
 register_user_hw_breakpoint(unsigned long addr,
 			    int len,
-- 
cgit v1.2.3-70-g09d2


From 811cb50baf63461ce0bdb234927046131fc7fa8b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 13 Nov 2009 23:40:09 +0100
Subject: tracing: Fix event format export

For some reason the export of the event print format to userspace
uses '#fmt' which breaks if the format string is anything but a plain
string, for example if it is built with macros then the macro names
are exported instead of their contents.

Use
	"\"%s\"", fmt
instead of
	"%s", #fmt
to export the string and not the way it is built.

For example, in net/mac80211/driver-trace.h for the trace event drv_start
there is:

        TP_printk(
                LOCAL_PR_FMT, LOCAL_PR_ARG
        )

Which use to produce:

 print fmt: LOCAL_PR_FMT, REC->wiphy_name

Now produces:

 print fmt: "%s", REC->wiphy_name

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
LKML-Reference: <20091113224009.GB23942@elte.hu>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/ftrace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index cc0d9667e182..dacb8ef67000 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -159,7 +159,7 @@
 #undef __get_str
 
 #undef TP_printk
-#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
+#define TP_printk(fmt, args...) "\"%s\", %s\n", fmt, __stringify(args)
 
 #undef TP_fast_assign
 #define TP_fast_assign(args...) args
-- 
cgit v1.2.3-70-g09d2


From 4c49b12853fbb5eff4849b7b6a1e895776f027a1 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Fri, 13 Nov 2009 21:47:33 -0800
Subject: perf_event: Fix invalid type in ioctl definition

u64 is invalid in userspace headers, including ioctl
definitions; use __u64 instead

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: <stable@kernel.org>
LKML-Reference: <20091113214733.7cd76be9@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 45b56faf5cdc..ec3768a81058 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -219,7 +219,7 @@ struct perf_event_attr {
 #define PERF_EVENT_IOC_DISABLE		_IO ('$', 1)
 #define PERF_EVENT_IOC_REFRESH		_IO ('$', 2)
 #define PERF_EVENT_IOC_RESET		_IO ('$', 3)
-#define PERF_EVENT_IOC_PERIOD		_IOW('$', 4, u64)
+#define PERF_EVENT_IOC_PERIOD		_IOW('$', 4, __u64)
 #define PERF_EVENT_IOC_SET_OUTPUT	_IO ('$', 5)
 #define PERF_EVENT_IOC_SET_FILTER	_IOW('$', 6, char *)
 
-- 
cgit v1.2.3-70-g09d2


From 6959450e567c1f17d3ce8489099fc56c3721d577 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Sat, 14 Nov 2009 20:46:38 +0900
Subject: swiotlb: Remove duplicate swiotlb_force extern declarations

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: tony.luck@intel.com
LKML-Reference: <1258199198-16657-4-git-send-email-fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/ia64/include/asm/swiotlb.h | 2 --
 arch/x86/include/asm/swiotlb.h  | 4 ----
 include/linux/swiotlb.h         | 2 ++
 3 files changed, 2 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/include/asm/swiotlb.h b/arch/ia64/include/asm/swiotlb.h
index dcbaea7ce128..f0acde68aaea 100644
--- a/arch/ia64/include/asm/swiotlb.h
+++ b/arch/ia64/include/asm/swiotlb.h
@@ -4,8 +4,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/swiotlb.h>
 
-extern int swiotlb_force;
-
 #ifdef CONFIG_SWIOTLB
 extern int swiotlb;
 extern void pci_swiotlb_init(void);
diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h
index 940f13a213f8..87ffcb12a1b8 100644
--- a/arch/x86/include/asm/swiotlb.h
+++ b/arch/x86/include/asm/swiotlb.h
@@ -3,10 +3,6 @@
 
 #include <linux/swiotlb.h>
 
-/* SWIOTLB interface */
-
-extern int swiotlb_force;
-
 #ifdef CONFIG_SWIOTLB
 extern int swiotlb;
 extern int pci_swiotlb_init(void);
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index eb9bdb4d4854..febedcf67c7e 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -7,6 +7,8 @@ struct device;
 struct dma_attrs;
 struct scatterlist;
 
+extern int swiotlb_force;
+
 /*
  * Maximum allowable number of contiguous slabs to map,
  * must be a power of 2.  What is the appropriate value ?
-- 
cgit v1.2.3-70-g09d2


From 559fdc3c1b624edb1933a875022fe7e27934d11c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 16 Nov 2009 12:45:14 +0100
Subject: perf_event: Optimize perf_output_lock()

The purpose of perf_output_{un,}lock() is to:

 1) avoid publishing incomplete data
    [ possible when publishing a head that is ahead of an entry
      that is still being written ]

 2) guarantee fwd progress
    [ a simple refcount on pending writers doesn't need to drop to
      0, making it so would end up implementing something like forced
      quiecent states of RCU ]

To satisfy the above without undue complexity it serializes
between CPUs, this means that a pending writer can only be the
same cpu in a nested context, and since (under normal operation)
a cpu always makes progress we're good -- if the head is only
published when the bottom  most writer completes.

Now we don't need to disable IRQs in order to serialize between
CPUs, disabling preemption ought to be sufficient, esp since we
already deal with nesting due to NMIs.

This avoids potentially expensive (and needless) local IRQ
disable/enable ops.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <1258373161.26714.254.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h |  1 -
 kernel/perf_event.c        | 21 +++++++++++----------
 2 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index df4e73e33774..7f87563c8485 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -714,7 +714,6 @@ struct perf_output_handle {
 	int				nmi;
 	int				sample;
 	int				locked;
-	unsigned long			flags;
 };
 
 #ifdef CONFIG_PERF_EVENTS
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 6f4ed3b4cd73..3256e36ad251 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -2674,20 +2674,21 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
 static void perf_output_lock(struct perf_output_handle *handle)
 {
 	struct perf_mmap_data *data = handle->data;
-	int cpu;
+	int cur, cpu = get_cpu();
 
 	handle->locked = 0;
 
-	local_irq_save(handle->flags);
-	cpu = smp_processor_id();
-
-	if (in_nmi() && atomic_read(&data->lock) == cpu)
-		return;
+	for (;;) {
+		cur = atomic_cmpxchg(&data->lock, -1, cpu);
+		if (cur == -1) {
+			handle->locked = 1;
+			break;
+		}
+		if (cur == cpu)
+			break;
 
-	while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
 		cpu_relax();
-
-	handle->locked = 1;
+	}
 }
 
 static void perf_output_unlock(struct perf_output_handle *handle)
@@ -2733,7 +2734,7 @@ again:
 	if (atomic_xchg(&data->wakeup, 0))
 		perf_output_wakeup(handle);
 out:
-	local_irq_restore(handle->flags);
+	put_cpu();
 }
 
 void perf_output_copy(struct perf_output_handle *handle,
-- 
cgit v1.2.3-70-g09d2


From 2ea6dec4a22a6f66f6633876212fd4d195cf8277 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 17 Nov 2009 14:27:27 -0800
Subject: generic-ipi: Add smp_call_function_any()

Andrew points out that acpi-cpufreq uses cpumask_any, when it really
would prefer to use the same CPU if possible (to avoid an IPI).  In
general, this seems a good idea to offer.

[ tglx: Documented selection preference and Inlined the UP case to
  	avoid the copy of smp_call_function_single() and the extra
  	EXPORT ]

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Zhao Yakui <yakui.zhao@intel.com>
Cc: Dave Jones <davej@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Mike Galbraith <efault@gmx.de>
Cc: "Zhang, Yanmin" <yanmin_zhang@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/smp.h | 11 ++++++++++-
 kernel/smp.c        | 45 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/smp.h b/include/linux/smp.h
index 39c64bae776d..7a0570e6a596 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -76,6 +76,9 @@ void smp_call_function_many(const struct cpumask *mask,
 void __smp_call_function_single(int cpuid, struct call_single_data *data,
 				int wait);
 
+int smp_call_function_any(const struct cpumask *mask,
+			  void (*func)(void *info), void *info, int wait);
+
 /*
  * Generic and arch helpers
  */
@@ -137,9 +140,15 @@ static inline void smp_send_reschedule(int cpu) { }
 #define smp_prepare_boot_cpu()			do {} while (0)
 #define smp_call_function_many(mask, func, info, wait) \
 			(up_smp_call_function(func, info))
-static inline void init_call_single_data(void)
+static inline void init_call_single_data(void) { }
+
+static inline int
+smp_call_function_any(const struct cpumask *mask, void (*func)(void *info),
+		      void *info, int wait)
 {
+	return smp_call_function_single(0, func, info, wait);
 }
+
 #endif /* !SMP */
 
 /*
diff --git a/kernel/smp.c b/kernel/smp.c
index 8bd618f0364d..a8c76069cf50 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -319,6 +319,51 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
 }
 EXPORT_SYMBOL(smp_call_function_single);
 
+/*
+ * smp_call_function_any - Run a function on any of the given cpus
+ * @mask: The mask of cpus it can run on.
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+ * @wait: If true, wait until function has completed.
+ *
+ * Returns 0 on success, else a negative status code (if no cpus were online).
+ * Note that @wait will be implicitly turned on in case of allocation failures,
+ * since we fall back to on-stack allocation.
+ *
+ * Selection preference:
+ *	1) current cpu if in @mask
+ *	2) any cpu of current node if in @mask
+ *	3) any other online cpu in @mask
+ */
+int smp_call_function_any(const struct cpumask *mask,
+			  void (*func)(void *info), void *info, int wait)
+{
+	unsigned int cpu;
+	const struct cpumask *nodemask;
+	int ret;
+
+	/* Try for same CPU (cheapest) */
+	cpu = get_cpu();
+	if (cpumask_test_cpu(cpu, mask))
+		goto call;
+
+	/* Try for same node. */
+	nodemask = cpumask_of_node(cpu);
+	for (cpu = cpumask_first_and(nodemask, mask); cpu < nr_cpu_ids;
+	     cpu = cpumask_next_and(cpu, nodemask, mask)) {
+		if (cpu_online(cpu))
+			goto call;
+	}
+
+	/* Any online will do: smp_call_function_single handles nr_cpu_ids. */
+	cpu = cpumask_any_and(mask, cpu_online_mask);
+call:
+	ret = smp_call_function_single(cpu, func, info, wait);
+	put_cpu();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(smp_call_function_any);
+
 /**
  * __smp_call_function_single(): Run a function on another CPU
  * @cpu: The CPU to run on.
-- 
cgit v1.2.3-70-g09d2


From 192dcf1d1775736627280a5dd4cb0f605b21857a Mon Sep 17 00:00:00 2001
From: Josh Stone <jistone@redhat.com>
Date: Wed, 18 Nov 2009 13:06:55 -0800
Subject: tracing: Remove the stale include/trace/power.h

Commit 6161352 moved the power tracing to include/trace/events/,
but left the old header behind.  No one is using the old header,
and its declarations are now incorrect, so it should be removed.

Signed-off-by: Josh Stone <jistone@redhat.com>
Acked-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Frank Ch. Eigler <fche@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <1258578415-14752-1-git-send-email-jistone@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/power.h | 32 --------------------------------
 1 file changed, 32 deletions(-)
 delete mode 100644 include/trace/power.h

(limited to 'include')

diff --git a/include/trace/power.h b/include/trace/power.h
deleted file mode 100644
index ef204666e983..000000000000
--- a/include/trace/power.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef _TRACE_POWER_H
-#define _TRACE_POWER_H
-
-#include <linux/ktime.h>
-#include <linux/tracepoint.h>
-
-enum {
-	POWER_NONE = 0,
-	POWER_CSTATE = 1,
-	POWER_PSTATE = 2,
-};
-
-struct power_trace {
-	ktime_t			stamp;
-	ktime_t			end;
-	int			type;
-	int			state;
-};
-
-DECLARE_TRACE(power_start,
-	TP_PROTO(struct power_trace *it, unsigned int type, unsigned int state),
-	      TP_ARGS(it, type, state));
-
-DECLARE_TRACE(power_mark,
-	TP_PROTO(struct power_trace *it, unsigned int type, unsigned int state),
-	      TP_ARGS(it, type, state));
-
-DECLARE_TRACE(power_end,
-	TP_PROTO(struct power_trace *it),
-	      TP_ARGS(it));
-
-#endif /* _TRACE_POWER_H */
-- 
cgit v1.2.3-70-g09d2


From 453f19eea7dbad837425e9b07d84568d14898794 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 20 Nov 2009 22:19:43 +0100
Subject: perf: Allow for custom overflow handlers

in-kernel perf users might wish to have custom actions on the
sample interrupt.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <20091120212508.222339539@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h | 6 ++++++
 kernel/perf_event.c        | 8 +++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index b5cdac0de370..a430ac3074af 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -567,6 +567,8 @@ struct perf_pending_entry {
 
 typedef void (*perf_callback_t)(struct perf_event *, void *);
 
+struct perf_sample_data;
+
 /**
  * struct perf_event - performance event kernel representation:
  */
@@ -658,6 +660,10 @@ struct perf_event {
 	struct pid_namespace		*ns;
 	u64				id;
 
+	void (*overflow_handler)(struct perf_event *event,
+			int nmi, struct perf_sample_data *data,
+			struct pt_regs *regs);
+
 #ifdef CONFIG_EVENT_PROFILE
 	struct event_filter		*filter;
 #endif
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 3852e2656bb0..1dfb6cc4fdea 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -3710,7 +3710,11 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
 			perf_event_disable(event);
 	}
 
-	perf_event_output(event, nmi, data, regs);
+	if (event->overflow_handler)
+		event->overflow_handler(event, nmi, data, regs);
+	else
+		perf_event_output(event, nmi, data, regs);
+
 	return ret;
 }
 
@@ -4836,6 +4840,8 @@ inherit_event(struct perf_event *parent_event,
 	if (parent_event->attr.freq)
 		child_event->hw.sample_period = parent_event->hw.sample_period;
 
+	child_event->overflow_handler = parent_event->overflow_handler;
+
 	/*
 	 * Link it up in the child's context:
 	 */
-- 
cgit v1.2.3-70-g09d2


From 59ed446f792cc07d37b1536b9c4664d14e25e425 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 20 Nov 2009 22:19:55 +0100
Subject: perf: Fix event scaling for inherited counters

Properly account the full hierarchy of counters for both the
count (we already did so) and the scale times (new).

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <20091120212509.153379276@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h |  3 ++-
 kernel/perf_event.c        | 48 +++++++++++++++++++++++++---------------------
 2 files changed, 28 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index a430ac3074af..36fe89f72641 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -782,7 +782,8 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr,
 				int cpu,
 				pid_t pid,
 				perf_callback_t callback);
-extern u64 perf_event_read_value(struct perf_event *event);
+extern u64 perf_event_read_value(struct perf_event *event,
+				 u64 *enabled, u64 *running);
 
 struct perf_sample_data {
 	u64				type;
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index fdfae888a67c..80f40da9a01e 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1774,14 +1774,25 @@ static int perf_event_read_size(struct perf_event *event)
 	return size;
 }
 
-u64 perf_event_read_value(struct perf_event *event)
+u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
 {
 	struct perf_event *child;
 	u64 total = 0;
 
+	*enabled = 0;
+	*running = 0;
+
 	total += perf_event_read(event);
-	list_for_each_entry(child, &event->child_list, child_list)
+	*enabled += event->total_time_enabled +
+			atomic64_read(&event->child_total_time_enabled);
+	*running += event->total_time_running +
+			atomic64_read(&event->child_total_time_running);
+
+	list_for_each_entry(child, &event->child_list, child_list) {
 		total += perf_event_read(child);
+		*enabled += child->total_time_enabled;
+		*running += child->total_time_running;
+	}
 
 	return total;
 }
@@ -1793,19 +1804,15 @@ static int perf_event_read_group(struct perf_event *event,
 	struct perf_event *leader = event->group_leader, *sub;
 	int n = 0, size = 0, ret = 0;
 	u64 values[5];
-	u64 count;
+	u64 count, enabled, running;
 
-	count = perf_event_read_value(leader);
+	count = perf_event_read_value(leader, &enabled, &running);
 
 	values[n++] = 1 + leader->nr_siblings;
-	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
-		values[n++] = leader->total_time_enabled +
-			atomic64_read(&leader->child_total_time_enabled);
-	}
-	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
-		values[n++] = leader->total_time_running +
-			atomic64_read(&leader->child_total_time_running);
-	}
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+		values[n++] = enabled;
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+		values[n++] = running;
 	values[n++] = count;
 	if (read_format & PERF_FORMAT_ID)
 		values[n++] = primary_event_id(leader);
@@ -1820,7 +1827,7 @@ static int perf_event_read_group(struct perf_event *event,
 	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
 		n = 0;
 
-		values[n++] = perf_event_read_value(sub);
+		values[n++] = perf_event_read_value(sub, &enabled, &running);
 		if (read_format & PERF_FORMAT_ID)
 			values[n++] = primary_event_id(sub);
 
@@ -1838,18 +1845,15 @@ static int perf_event_read_group(struct perf_event *event,
 static int perf_event_read_one(struct perf_event *event,
 				 u64 read_format, char __user *buf)
 {
+	u64 enabled, running;
 	u64 values[4];
 	int n = 0;
 
-	values[n++] = perf_event_read_value(event);
-	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
-		values[n++] = event->total_time_enabled +
-			atomic64_read(&event->child_total_time_enabled);
-	}
-	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
-		values[n++] = event->total_time_running +
-			atomic64_read(&event->child_total_time_running);
-	}
+	values[n++] = perf_event_read_value(event, &enabled, &running);
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+		values[n++] = enabled;
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+		values[n++] = running;
 	if (read_format & PERF_FORMAT_ID)
 		values[n++] = primary_event_id(event);
 
-- 
cgit v1.2.3-70-g09d2


From ce71b9df8893ec954e56c5979df6da274f20f65e Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sun, 22 Nov 2009 05:26:55 +0100
Subject: tracing: Use the perf recursion protection from trace event

When we commit a trace to perf, we first check if we are
recursing in the same buffer so that we don't mess-up the buffer
with a recursing trace. But later on, we do the same check from
perf to avoid commit recursion. The recursion check is desired
early before we touch the buffer but we want to do this check
only once.

Then export the recursion protection from perf and use it from
the trace events before submitting a trace.

v2: Put appropriate Reported-by tag

Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Jason Baron <jbaron@redhat.com>
LKML-Reference: <1258864015-10579-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace_event.h       |  9 ++---
 include/linux/perf_event.h         |  4 +++
 include/trace/ftrace.h             | 23 +++++++------
 kernel/perf_event.c                | 68 +++++++++++++++++++++++++-------------
 kernel/trace/trace_event_profile.c | 14 ++++----
 kernel/trace/trace_kprobe.c        | 48 ++++++++++-----------------
 kernel/trace/trace_syscalls.c      | 47 ++++++++++----------------
 7 files changed, 106 insertions(+), 107 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 43360c1d8f70..47bbdf9c38d0 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -137,13 +137,8 @@ struct ftrace_event_call {
 
 #define FTRACE_MAX_PROFILE_SIZE	2048
 
-struct perf_trace_buf {
-	char	buf[FTRACE_MAX_PROFILE_SIZE];
-	int	recursion;
-};
-
-extern struct perf_trace_buf	*perf_trace_buf;
-extern struct perf_trace_buf	*perf_trace_buf_nmi;
+extern char *perf_trace_buf;
+extern char *perf_trace_buf_nmi;
 
 #define MAX_FILTER_PRED		32
 #define MAX_FILTER_STR_VAL	256	/* Should handle KSYM_SYMBOL_LEN */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 36fe89f72641..74e98b1d3391 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -874,6 +874,8 @@ extern int perf_output_begin(struct perf_output_handle *handle,
 extern void perf_output_end(struct perf_output_handle *handle);
 extern void perf_output_copy(struct perf_output_handle *handle,
 			     const void *buf, unsigned int len);
+extern int perf_swevent_get_recursion_context(int **recursion);
+extern void perf_swevent_put_recursion_context(int *recursion);
 #else
 static inline void
 perf_event_task_sched_in(struct task_struct *task, int cpu)		{ }
@@ -902,6 +904,8 @@ static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
 static inline void perf_event_comm(struct task_struct *tsk)		{ }
 static inline void perf_event_fork(struct task_struct *tsk)		{ }
 static inline void perf_event_init(void)				{ }
+static int perf_swevent_get_recursion_context(int **recursion)	{ return -1; }
+static void perf_swevent_put_recursion_context(int *recursion)		{ }
 
 #endif
 
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 4945d1c99864..c222ef5238bf 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -724,16 +724,19 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 static void ftrace_profile_##call(proto)				\
 {									\
 	struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
+	extern int perf_swevent_get_recursion_context(int **recursion); \
+	extern void perf_swevent_put_recursion_context(int *recursion); \
 	struct ftrace_event_call *event_call = &event_##call;		\
 	extern void perf_tp_event(int, u64, u64, void *, int);		\
 	struct ftrace_raw_##call *entry;				\
-	struct perf_trace_buf *trace_buf;				\
 	u64 __addr = 0, __count = 1;					\
 	unsigned long irq_flags;					\
 	struct trace_entry *ent;					\
 	int __entry_size;						\
 	int __data_size;						\
+	char *trace_buf;						\
 	char *raw_data;							\
+	int *recursion;							\
 	int __cpu;							\
 	int pc;								\
 									\
@@ -749,6 +752,10 @@ static void ftrace_profile_##call(proto)				\
 		return;							\
 									\
 	local_irq_save(irq_flags);					\
+									\
+	if (perf_swevent_get_recursion_context(&recursion))		\
+		goto end_recursion;						\
+									\
 	__cpu = smp_processor_id();					\
 									\
 	if (in_nmi())							\
@@ -759,13 +766,7 @@ static void ftrace_profile_##call(proto)				\
 	if (!trace_buf)							\
 		goto end;						\
 									\
-	trace_buf = per_cpu_ptr(trace_buf, __cpu);			\
-	if (trace_buf->recursion++)					\
-		goto end_recursion;					\
-									\
-	barrier();							\
-									\
-	raw_data = trace_buf->buf;					\
+	raw_data = per_cpu_ptr(trace_buf, __cpu);			\
 									\
 	*(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;		\
 	entry = (struct ftrace_raw_##call *)raw_data;			\
@@ -780,9 +781,9 @@ static void ftrace_profile_##call(proto)				\
 	perf_tp_event(event_call->id, __addr, __count, entry,		\
 			     __entry_size);				\
 									\
-end_recursion:								\
-	trace_buf->recursion--;						\
-end:									\
+end:								\
+	perf_swevent_put_recursion_context(recursion);			\
+end_recursion:									\
 	local_irq_restore(irq_flags);					\
 									\
 }
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 718fa939b1a7..aba822722300 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -3880,34 +3880,42 @@ static void perf_swevent_ctx_event(struct perf_event_context *ctx,
 	}
 }
 
-static int *perf_swevent_recursion_context(struct perf_cpu_context *cpuctx)
+/*
+ * Must be called with preemption disabled
+ */
+int perf_swevent_get_recursion_context(int **recursion)
 {
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+
 	if (in_nmi())
-		return &cpuctx->recursion[3];
+		*recursion = &cpuctx->recursion[3];
+	else if (in_irq())
+		*recursion = &cpuctx->recursion[2];
+	else if (in_softirq())
+		*recursion = &cpuctx->recursion[1];
+	else
+		*recursion = &cpuctx->recursion[0];
 
-	if (in_irq())
-		return &cpuctx->recursion[2];
+	if (**recursion)
+		return -1;
 
-	if (in_softirq())
-		return &cpuctx->recursion[1];
+	(**recursion)++;
 
-	return &cpuctx->recursion[0];
+	return 0;
 }
 
-static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
-				    u64 nr, int nmi,
-				    struct perf_sample_data *data,
-				    struct pt_regs *regs)
+void perf_swevent_put_recursion_context(int *recursion)
 {
-	struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
-	int *recursion = perf_swevent_recursion_context(cpuctx);
-	struct perf_event_context *ctx;
-
-	if (*recursion)
-		goto out;
+	(*recursion)--;
+}
 
-	(*recursion)++;
-	barrier();
+static void __do_perf_sw_event(enum perf_type_id type, u32 event_id,
+			       u64 nr, int nmi,
+			       struct perf_sample_data *data,
+			       struct pt_regs *regs)
+{
+	struct perf_event_context *ctx;
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 
 	rcu_read_lock();
 	perf_swevent_ctx_event(&cpuctx->ctx, type, event_id,
@@ -3920,12 +3928,25 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
 	if (ctx)
 		perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs);
 	rcu_read_unlock();
+}
 
-	barrier();
-	(*recursion)--;
+static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
+				    u64 nr, int nmi,
+				    struct perf_sample_data *data,
+				    struct pt_regs *regs)
+{
+	int *recursion;
+
+	preempt_disable();
+
+	if (perf_swevent_get_recursion_context(&recursion))
+		goto out;
+
+	__do_perf_sw_event(type, event_id, nr, nmi, data, regs);
 
+	perf_swevent_put_recursion_context(recursion);
 out:
-	put_cpu_var(perf_cpu_context);
+	preempt_enable();
 }
 
 void __perf_sw_event(u32 event_id, u64 nr, int nmi,
@@ -4159,7 +4180,8 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
 	if (!regs)
 		regs = task_pt_regs(current);
 
-	do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
+	/* Trace events already protected against recursion */
+	__do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
 				&data, regs);
 }
 EXPORT_SYMBOL_GPL(perf_tp_event);
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index e0d351b01f5a..d9c60f80aa0d 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -9,31 +9,33 @@
 #include "trace.h"
 
 
-struct perf_trace_buf *perf_trace_buf;
+char *perf_trace_buf;
 EXPORT_SYMBOL_GPL(perf_trace_buf);
 
-struct perf_trace_buf *perf_trace_buf_nmi;
+char *perf_trace_buf_nmi;
 EXPORT_SYMBOL_GPL(perf_trace_buf_nmi);
 
+typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ;
+
 /* Count the events in use (per event id, not per instance) */
 static int	total_profile_count;
 
 static int ftrace_profile_enable_event(struct ftrace_event_call *event)
 {
-	struct perf_trace_buf *buf;
+	char *buf;
 	int ret = -ENOMEM;
 
 	if (atomic_inc_return(&event->profile_count))
 		return 0;
 
 	if (!total_profile_count) {
-		buf = alloc_percpu(struct perf_trace_buf);
+		buf = (char *)alloc_percpu(perf_trace_t);
 		if (!buf)
 			goto fail_buf;
 
 		rcu_assign_pointer(perf_trace_buf, buf);
 
-		buf = alloc_percpu(struct perf_trace_buf);
+		buf = (char *)alloc_percpu(perf_trace_t);
 		if (!buf)
 			goto fail_buf_nmi;
 
@@ -79,7 +81,7 @@ int ftrace_profile_enable(int event_id)
 
 static void ftrace_profile_disable_event(struct ftrace_event_call *event)
 {
-	struct perf_trace_buf *buf, *nmi_buf;
+	char *buf, *nmi_buf;
 
 	if (!atomic_add_negative(-1, &event->profile_count))
 		return;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 3696476f307d..22e6f68b05b3 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1208,11 +1208,12 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
 	struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
 	struct ftrace_event_call *call = &tp->call;
 	struct kprobe_trace_entry *entry;
-	struct perf_trace_buf *trace_buf;
 	struct trace_entry *ent;
 	int size, __size, i, pc, __cpu;
 	unsigned long irq_flags;
+	char *trace_buf;
 	char *raw_data;
+	int *recursion;
 
 	pc = preempt_count();
 	__size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
@@ -1227,6 +1228,10 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
 	 * This also protects the rcu read side
 	 */
 	local_irq_save(irq_flags);
+
+	if (perf_swevent_get_recursion_context(&recursion))
+		goto end_recursion;
+
 	__cpu = smp_processor_id();
 
 	if (in_nmi())
@@ -1237,18 +1242,7 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
 	if (!trace_buf)
 		goto end;
 
-	trace_buf = per_cpu_ptr(trace_buf, __cpu);
-
-	if (trace_buf->recursion++)
-		goto end_recursion;
-
-	/*
-	 * Make recursion update visible before entering perf_tp_event
-	 * so that we protect from perf recursions.
-	 */
-	barrier();
-
-	raw_data = trace_buf->buf;
+	raw_data = per_cpu_ptr(trace_buf, __cpu);
 
 	/* Zero dead bytes from alignment to avoid buffer leak to userspace */
 	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -1263,9 +1257,9 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
 		entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
 	perf_tp_event(call->id, entry->ip, 1, entry, size);
 
-end_recursion:
-	trace_buf->recursion--;
 end:
+	perf_swevent_put_recursion_context(recursion);
+end_recursion:
 	local_irq_restore(irq_flags);
 
 	return 0;
@@ -1278,10 +1272,11 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
 	struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
 	struct ftrace_event_call *call = &tp->call;
 	struct kretprobe_trace_entry *entry;
-	struct perf_trace_buf *trace_buf;
 	struct trace_entry *ent;
 	int size, __size, i, pc, __cpu;
 	unsigned long irq_flags;
+	char *trace_buf;
+	int *recursion;
 	char *raw_data;
 
 	pc = preempt_count();
@@ -1297,6 +1292,10 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
 	 * This also protects the rcu read side
 	 */
 	local_irq_save(irq_flags);
+
+	if (perf_swevent_get_recursion_context(&recursion))
+		goto end_recursion;
+
 	__cpu = smp_processor_id();
 
 	if (in_nmi())
@@ -1307,18 +1306,7 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
 	if (!trace_buf)
 		goto end;
 
-	trace_buf = per_cpu_ptr(trace_buf, __cpu);
-
-	if (trace_buf->recursion++)
-		goto end_recursion;
-
-	/*
-	 * Make recursion update visible before entering perf_tp_event
-	 * so that we protect from perf recursions.
-	 */
-	barrier();
-
-	raw_data = trace_buf->buf;
+	raw_data = per_cpu_ptr(trace_buf, __cpu);
 
 	/* Zero dead bytes from alignment to avoid buffer leak to userspace */
 	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -1334,9 +1322,9 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
 		entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
 	perf_tp_event(call->id, entry->ret_ip, 1, entry, size);
 
-end_recursion:
-	trace_buf->recursion--;
 end:
+	perf_swevent_put_recursion_context(recursion);
+end_recursion:
 	local_irq_restore(irq_flags);
 
 	return 0;
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 51213b0aa81b..0bb934875263 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -477,10 +477,11 @@ static int sys_prof_refcount_exit;
 static void prof_syscall_enter(struct pt_regs *regs, long id)
 {
 	struct syscall_metadata *sys_data;
-	struct perf_trace_buf *trace_buf;
 	struct syscall_trace_enter *rec;
 	unsigned long flags;
+	char *trace_buf;
 	char *raw_data;
+	int *recursion;
 	int syscall_nr;
 	int size;
 	int cpu;
@@ -505,6 +506,9 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
 	/* Protect the per cpu buffer, begin the rcu read side */
 	local_irq_save(flags);
 
+	if (perf_swevent_get_recursion_context(&recursion))
+		goto end_recursion;
+
 	cpu = smp_processor_id();
 
 	if (in_nmi())
@@ -515,18 +519,7 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
 	if (!trace_buf)
 		goto end;
 
-	trace_buf = per_cpu_ptr(trace_buf, cpu);
-
-	if (trace_buf->recursion++)
-		goto end_recursion;
-
-	/*
-	 * Make recursion update visible before entering perf_tp_event
-	 * so that we protect from perf recursions.
-	 */
-	barrier();
-
-	raw_data = trace_buf->buf;
+	raw_data = per_cpu_ptr(trace_buf, cpu);
 
 	/* zero the dead bytes from align to not leak stack to user */
 	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -539,9 +532,9 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
 			       (unsigned long *)&rec->args);
 	perf_tp_event(sys_data->enter_id, 0, 1, rec, size);
 
-end_recursion:
-	trace_buf->recursion--;
 end:
+	perf_swevent_put_recursion_context(recursion);
+end_recursion:
 	local_irq_restore(flags);
 }
 
@@ -588,10 +581,11 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 {
 	struct syscall_metadata *sys_data;
 	struct syscall_trace_exit *rec;
-	struct perf_trace_buf *trace_buf;
 	unsigned long flags;
 	int syscall_nr;
+	char *trace_buf;
 	char *raw_data;
+	int *recursion;
 	int size;
 	int cpu;
 
@@ -617,6 +611,10 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 
 	/* Protect the per cpu buffer, begin the rcu read side */
 	local_irq_save(flags);
+
+	if (perf_swevent_get_recursion_context(&recursion))
+		goto end_recursion;
+
 	cpu = smp_processor_id();
 
 	if (in_nmi())
@@ -627,18 +625,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 	if (!trace_buf)
 		goto end;
 
-	trace_buf = per_cpu_ptr(trace_buf, cpu);
-
-	if (trace_buf->recursion++)
-		goto end_recursion;
-
-	/*
-	 * Make recursion update visible before entering perf_tp_event
-	 * so that we protect from perf recursions.
-	 */
-	barrier();
-
-	raw_data = trace_buf->buf;
+	raw_data = per_cpu_ptr(trace_buf, cpu);
 
 	/* zero the dead bytes from align to not leak stack to user */
 	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -652,9 +639,9 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 
 	perf_tp_event(sys_data->exit_id, 0, 1, rec, size);
 
-end_recursion:
-	trace_buf->recursion--;
 end:
+	perf_swevent_put_recursion_context(recursion);
+end_recursion:
 	local_irq_restore(flags);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 5093ebad5f2348076fdc3dac7d2358b1ad7f85f7 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sun, 22 Nov 2009 05:21:35 +0100
Subject: hw-breakpoints: Separate the kernel part from breakpoint headers

So that we can include this header from userspace tools, like
perf tools, to get the breakpoint types and len definitions.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Prasad <prasad@linux.vnet.ibm.com>
LKML-Reference: <1258863695-10464-4-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/hw_breakpoint.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 0b98cbf76da7..4659e0c55ea6 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -16,6 +16,7 @@ enum {
 	HW_BREAKPOINT_X = 4,
 };
 
+#ifdef __KERNEL__
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 
 static inline unsigned long hw_breakpoint_addr(struct perf_event *bp)
@@ -133,5 +134,6 @@ static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
 }
 
 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
+#endif /* __KERNEL__ */
 
 #endif /* _LINUX_HW_BREAKPOINT_H */
-- 
cgit v1.2.3-70-g09d2


From 9f680ab41485edfdc96331b70afa7513aa0a7720 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sun, 22 Nov 2009 08:53:49 -0800
Subject: rcu: Eliminate unneeded function wrapping

The functions rcu_init() is a wrapper for __rcu_init(), and also
sets up the CPU-hotplug notifier for rcu_barrier_cpu_hotplug().
But TINY_RCU doesn't need CPU-hotplug notification, and the
rcu_barrier_cpu_hotplug() is a simple wrapper for
rcu_cpu_notify().

So push rcu_init() out to kernel/rcutree.c and kernel/rcutiny.c
and get rid of the wrapper function rcu_barrier_cpu_hotplug().

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <12589088302320-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcutiny.h |  2 --
 include/linux/rcutree.h |  3 ---
 kernel/rcupdate.c       | 22 ----------------------
 kernel/rcutiny.c        | 11 +----------
 kernel/rcutree.c        | 17 ++++++++++++++---
 5 files changed, 15 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 2c1fe8373e71..a3b6272af2dd 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -38,7 +38,6 @@ void rcu_bh_qs(int cpu);
 
 #define rcu_init_sched()	do { } while (0)
 extern void rcu_check_callbacks(int cpu, int user);
-extern void __rcu_init(void);
 
 /*
  * Return the number of grace periods.
@@ -69,7 +68,6 @@ static inline void synchronize_rcu_bh_expedited(void)
 }
 
 struct notifier_block;
-extern int rcu_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu);
 
 #ifdef CONFIG_NO_HZ
 
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 9642c6bcb399..111a65257350 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -34,8 +34,6 @@ struct notifier_block;
 
 extern void rcu_sched_qs(int cpu);
 extern void rcu_bh_qs(int cpu);
-extern int rcu_cpu_notify(struct notifier_block *self,
-			  unsigned long action, void *hcpu);
 extern int rcu_needs_cpu(int cpu);
 extern int rcu_expedited_torture_stats(char *page);
 
@@ -83,7 +81,6 @@ static inline void synchronize_rcu_bh_expedited(void)
 	synchronize_sched_expedited();
 }
 
-extern void __rcu_init(void);
 extern void rcu_check_callbacks(int cpu, int user);
 
 extern long rcu_batches_completed(void);
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 7625f207f65e..eb6b534db318 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -161,28 +161,6 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
 
 #endif /* #ifndef CONFIG_TINY_RCU */
 
-static int __cpuinit rcu_barrier_cpu_hotplug(struct notifier_block *self,
-		unsigned long action, void *hcpu)
-{
-	return rcu_cpu_notify(self, action, hcpu);
-}
-
-void __init rcu_init(void)
-{
-	int i;
-
-	__rcu_init();
-	cpu_notifier(rcu_barrier_cpu_hotplug, 0);
-
-	/*
-	 * We don't need protection against CPU-hotplug here because
-	 * this is called early in boot, before either interrupts
-	 * or the scheduler are operational.
-	 */
-	for_each_online_cpu(i)
-		rcu_barrier_cpu_hotplug(NULL, CPU_UP_PREPARE, (void *)(long)i);
-}
-
 void rcu_scheduler_starting(void)
 {
 	WARN_ON(num_online_cpus() != 1);
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index b33ec3aa377a..9f6d9ff2572c 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -177,15 +177,6 @@ static void rcu_process_callbacks(struct softirq_action *unused)
 	__rcu_process_callbacks(&rcu_bh_ctrlblk);
 }
 
-/*
- * Null function to handle CPU being onlined.  Longer term, we want to
- * make TINY_RCU avoid using rcupdate.c, but later...
- */
-int rcu_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
-{
-	return NOTIFY_OK;
-}
-
 /*
  * Wait for a grace period to elapse.  But it is illegal to invoke
  * synchronize_sched() from within an RCU read-side critical section.
@@ -285,7 +276,7 @@ void rcu_barrier_sched(void)
 }
 EXPORT_SYMBOL_GPL(rcu_barrier_sched);
 
-void __rcu_init(void)
+void __init rcu_init(void)
 {
 	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
 }
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index b79bfcd28e95..e3d3bbddbcd5 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1644,8 +1644,8 @@ static void __cpuinit rcu_online_cpu(int cpu)
 /*
  * Handle CPU online/offline notification events.
  */
-int __cpuinit rcu_cpu_notify(struct notifier_block *self,
-			     unsigned long action, void *hcpu)
+static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
+				    unsigned long action, void *hcpu)
 {
 	long cpu = (long)hcpu;
 
@@ -1781,8 +1781,10 @@ do { \
 	} \
 } while (0)
 
-void __init __rcu_init(void)
+void __init rcu_init(void)
 {
+	int i;
+
 	rcu_bootup_announce();
 #ifdef CONFIG_RCU_CPU_STALL_DETECTOR
 	printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
@@ -1791,6 +1793,15 @@ void __init __rcu_init(void)
 	RCU_INIT_FLAVOR(&rcu_bh_state, rcu_bh_data);
 	__rcu_init_preempt();
 	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
+
+	/*
+	 * We don't need protection against CPU-hotplug here because
+	 * this is called early in boot, before either interrupts
+	 * or the scheduler are operational.
+	 */
+	cpu_notifier(rcu_cpu_notify, 0);
+	for_each_online_cpu(i)
+		rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)i);
 }
 
 #include "rcutree_plugin.h"
-- 
cgit v1.2.3-70-g09d2


From 6ebb237bece23275d1da149b61a342f0d4d06a08 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sun, 22 Nov 2009 08:53:50 -0800
Subject: rcu: Re-arrange code to reduce #ifdef pain

Remove #ifdefs from kernel/rcupdate.c and
include/linux/rcupdate.h by moving code to
include/linux/rcutiny.h, include/linux/rcutree.h, and
kernel/rcutree.c.

Also remove some definitions that are no longer used.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <1258908830885-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcupdate.h |  12 ------
 include/linux/rcutiny.h  |  11 +++++
 include/linux/rcutree.h  |   4 +-
 kernel/rcupdate.c        | 104 -----------------------------------------------
 kernel/rcutree.c         |  80 ++++++++++++++++++++++++++++++++++++
 kernel/rcutree_plugin.h  |  24 +++++++++++
 6 files changed, 118 insertions(+), 117 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 2f1bc42a3b82..24440f4bf476 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -52,11 +52,6 @@ struct rcu_head {
 };
 
 /* Exported common interfaces */
-#ifdef CONFIG_TREE_PREEMPT_RCU
-extern void synchronize_rcu(void);
-#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
-#define synchronize_rcu synchronize_sched
-#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
 extern void synchronize_rcu_bh(void);
 extern void synchronize_sched(void);
 extern void rcu_barrier(void);
@@ -67,13 +62,6 @@ extern int sched_expedited_torture_stats(char *page);
 
 /* Internal to kernel */
 extern void rcu_init(void);
-extern void rcu_scheduler_starting(void);
-#ifndef CONFIG_TINY_RCU
-extern int rcu_needs_cpu(int cpu);
-#else
-static inline int rcu_needs_cpu(int cpu) { return 0; }
-#endif
-extern int rcu_scheduler_active;
 
 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
 #include <linux/rcutree.h>
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index a3b6272af2dd..c4ba9a78721e 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -39,6 +39,11 @@ void rcu_bh_qs(int cpu);
 #define rcu_init_sched()	do { } while (0)
 extern void rcu_check_callbacks(int cpu, int user);
 
+static inline int rcu_needs_cpu(int cpu)
+{
+	return 0;
+}
+
 /*
  * Return the number of grace periods.
  */
@@ -57,6 +62,8 @@ static inline long rcu_batches_completed_bh(void)
 
 extern int rcu_expedited_torture_stats(char *page);
 
+#define synchronize_rcu synchronize_sched
+
 static inline void synchronize_rcu_expedited(void)
 {
 	synchronize_sched();
@@ -86,6 +93,10 @@ static inline void rcu_exit_nohz(void)
 
 #endif /* #else #ifdef CONFIG_NO_HZ */
 
+static inline void rcu_scheduler_starting(void)
+{
+}
+
 static inline void exit_rcu(void)
 {
 }
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 111a65257350..c93eee5911b0 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -35,12 +35,14 @@ struct notifier_block;
 extern void rcu_sched_qs(int cpu);
 extern void rcu_bh_qs(int cpu);
 extern int rcu_needs_cpu(int cpu);
+extern void rcu_scheduler_starting(void);
 extern int rcu_expedited_torture_stats(char *page);
 
 #ifdef CONFIG_TREE_PREEMPT_RCU
 
 extern void __rcu_read_lock(void);
 extern void __rcu_read_unlock(void);
+extern void synchronize_rcu(void);
 extern void exit_rcu(void);
 
 #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
@@ -55,7 +57,7 @@ static inline void __rcu_read_unlock(void)
 	preempt_enable();
 }
 
-#define __synchronize_sched() synchronize_rcu()
+#define synchronize_rcu synchronize_sched
 
 static inline void exit_rcu(void)
 {
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index eb6b534db318..9b7fd4723878 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -44,7 +44,6 @@
 #include <linux/cpu.h>
 #include <linux/mutex.h>
 #include <linux/module.h>
-#include <linux/kernel_stat.h>
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 static struct lock_class_key rcu_lock_key;
@@ -53,8 +52,6 @@ struct lockdep_map rcu_lock_map =
 EXPORT_SYMBOL_GPL(rcu_lock_map);
 #endif
 
-int rcu_scheduler_active __read_mostly;
-
 /*
  * Awaken the corresponding synchronize_rcu() instance now that a
  * grace period has elapsed.
@@ -66,104 +63,3 @@ void wakeme_after_rcu(struct rcu_head  *head)
 	rcu = container_of(head, struct rcu_synchronize, head);
 	complete(&rcu->completion);
 }
-
-#ifndef CONFIG_TINY_RCU
-
-#ifdef CONFIG_TREE_PREEMPT_RCU
-
-/**
- * synchronize_rcu - wait until a grace period has elapsed.
- *
- * Control will return to the caller some time after a full grace
- * period has elapsed, in other words after all currently executing RCU
- * read-side critical sections have completed.  RCU read-side critical
- * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
- * and may be nested.
- */
-void synchronize_rcu(void)
-{
-	struct rcu_synchronize rcu;
-
-	if (!rcu_scheduler_active)
-		return;
-
-	init_completion(&rcu.completion);
-	/* Will wake me after RCU finished. */
-	call_rcu(&rcu.head, wakeme_after_rcu);
-	/* Wait for it. */
-	wait_for_completion(&rcu.completion);
-}
-EXPORT_SYMBOL_GPL(synchronize_rcu);
-
-#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
-
-/**
- * synchronize_sched - wait until an rcu-sched grace period has elapsed.
- *
- * Control will return to the caller some time after a full rcu-sched
- * grace period has elapsed, in other words after all currently executing
- * rcu-sched read-side critical sections have completed.   These read-side
- * critical sections are delimited by rcu_read_lock_sched() and
- * rcu_read_unlock_sched(), and may be nested.  Note that preempt_disable(),
- * local_irq_disable(), and so on may be used in place of
- * rcu_read_lock_sched().
- *
- * This means that all preempt_disable code sequences, including NMI and
- * hardware-interrupt handlers, in progress on entry will have completed
- * before this primitive returns.  However, this does not guarantee that
- * softirq handlers will have completed, since in some kernels, these
- * handlers can run in process context, and can block.
- *
- * This primitive provides the guarantees made by the (now removed)
- * synchronize_kernel() API.  In contrast, synchronize_rcu() only
- * guarantees that rcu_read_lock() sections will have completed.
- * In "classic RCU", these two guarantees happen to be one and
- * the same, but can differ in realtime RCU implementations.
- */
-void synchronize_sched(void)
-{
-	struct rcu_synchronize rcu;
-
-	if (rcu_blocking_is_gp())
-		return;
-
-	init_completion(&rcu.completion);
-	/* Will wake me after RCU finished. */
-	call_rcu_sched(&rcu.head, wakeme_after_rcu);
-	/* Wait for it. */
-	wait_for_completion(&rcu.completion);
-}
-EXPORT_SYMBOL_GPL(synchronize_sched);
-
-/**
- * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
- *
- * Control will return to the caller some time after a full rcu_bh grace
- * period has elapsed, in other words after all currently executing rcu_bh
- * read-side critical sections have completed.  RCU read-side critical
- * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(),
- * and may be nested.
- */
-void synchronize_rcu_bh(void)
-{
-	struct rcu_synchronize rcu;
-
-	if (rcu_blocking_is_gp())
-		return;
-
-	init_completion(&rcu.completion);
-	/* Will wake me after RCU finished. */
-	call_rcu_bh(&rcu.head, wakeme_after_rcu);
-	/* Wait for it. */
-	wait_for_completion(&rcu.completion);
-}
-EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
-
-#endif /* #ifndef CONFIG_TINY_RCU */
-
-void rcu_scheduler_starting(void)
-{
-	WARN_ON(num_online_cpus() != 1);
-	WARN_ON(nr_context_switches() > 0);
-	rcu_scheduler_active = 1;
-}
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index e3d3bbddbcd5..4ca7e0292fd8 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -46,6 +46,7 @@
 #include <linux/cpu.h>
 #include <linux/mutex.h>
 #include <linux/time.h>
+#include <linux/kernel_stat.h>
 
 #include "rcutree.h"
 
@@ -79,6 +80,8 @@ DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
 struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state);
 DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
 
+static int rcu_scheduler_active __read_mostly;
+
 
 /*
  * Return true if an RCU grace period is in progress.  The ACCESS_ONCE()s
@@ -1396,6 +1399,68 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 }
 EXPORT_SYMBOL_GPL(call_rcu_bh);
 
+/**
+ * synchronize_sched - wait until an rcu-sched grace period has elapsed.
+ *
+ * Control will return to the caller some time after a full rcu-sched
+ * grace period has elapsed, in other words after all currently executing
+ * rcu-sched read-side critical sections have completed.   These read-side
+ * critical sections are delimited by rcu_read_lock_sched() and
+ * rcu_read_unlock_sched(), and may be nested.  Note that preempt_disable(),
+ * local_irq_disable(), and so on may be used in place of
+ * rcu_read_lock_sched().
+ *
+ * This means that all preempt_disable code sequences, including NMI and
+ * hardware-interrupt handlers, in progress on entry will have completed
+ * before this primitive returns.  However, this does not guarantee that
+ * softirq handlers will have completed, since in some kernels, these
+ * handlers can run in process context, and can block.
+ *
+ * This primitive provides the guarantees made by the (now removed)
+ * synchronize_kernel() API.  In contrast, synchronize_rcu() only
+ * guarantees that rcu_read_lock() sections will have completed.
+ * In "classic RCU", these two guarantees happen to be one and
+ * the same, but can differ in realtime RCU implementations.
+ */
+void synchronize_sched(void)
+{
+	struct rcu_synchronize rcu;
+
+	if (rcu_blocking_is_gp())
+		return;
+
+	init_completion(&rcu.completion);
+	/* Will wake me after RCU finished. */
+	call_rcu_sched(&rcu.head, wakeme_after_rcu);
+	/* Wait for it. */
+	wait_for_completion(&rcu.completion);
+}
+EXPORT_SYMBOL_GPL(synchronize_sched);
+
+/**
+ * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
+ *
+ * Control will return to the caller some time after a full rcu_bh grace
+ * period has elapsed, in other words after all currently executing rcu_bh
+ * read-side critical sections have completed.  RCU read-side critical
+ * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(),
+ * and may be nested.
+ */
+void synchronize_rcu_bh(void)
+{
+	struct rcu_synchronize rcu;
+
+	if (rcu_blocking_is_gp())
+		return;
+
+	init_completion(&rcu.completion);
+	/* Will wake me after RCU finished. */
+	call_rcu_bh(&rcu.head, wakeme_after_rcu);
+	/* Wait for it. */
+	wait_for_completion(&rcu.completion);
+}
+EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
+
 /*
  * Check to see if there is any immediate RCU-related work to be done
  * by the current CPU, for the specified type of RCU, returning 1 if so.
@@ -1480,6 +1545,21 @@ int rcu_needs_cpu(int cpu)
 	       rcu_preempt_needs_cpu(cpu);
 }
 
+/*
+ * This function is invoked towards the end of the scheduler's initialization
+ * process.  Before this is called, the idle task might contain
+ * RCU read-side critical sections (during which time, this idle
+ * task is booting the system).  After this function is called, the
+ * idle tasks are prohibited from containing RCU read-side critical
+ * sections.
+ */
+void rcu_scheduler_starting(void)
+{
+	WARN_ON(num_online_cpus() != 1);
+	WARN_ON(nr_context_switches() > 0);
+	rcu_scheduler_active = 1;
+}
+
 static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
 static atomic_t rcu_barrier_cpu_count;
 static DEFINE_MUTEX(rcu_barrier_mutex);
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 0bdb592eee66..1d295c789d3d 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -425,6 +425,30 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 }
 EXPORT_SYMBOL_GPL(call_rcu);
 
+/**
+ * synchronize_rcu - wait until a grace period has elapsed.
+ *
+ * Control will return to the caller some time after a full grace
+ * period has elapsed, in other words after all currently executing RCU
+ * read-side critical sections have completed.  RCU read-side critical
+ * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
+ * and may be nested.
+ */
+void synchronize_rcu(void)
+{
+	struct rcu_synchronize rcu;
+
+	if (!rcu_scheduler_active)
+		return;
+
+	init_completion(&rcu.completion);
+	/* Will wake me after RCU finished. */
+	call_rcu(&rcu.head, wakeme_after_rcu);
+	/* Wait for it. */
+	wait_for_completion(&rcu.completion);
+}
+EXPORT_SYMBOL_GPL(synchronize_rcu);
+
 /*
  * Wait for an rcu-preempt grace period.  We are supposed to expedite the
  * grace period, but this is the crude slow compatability hack, so just
-- 
cgit v1.2.3-70-g09d2


From 4ed7c92d68a5387ba5f7030dc76eab03558e27f5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 23 Nov 2009 11:37:29 +0100
Subject: perf_events: Undo some recursion damage

Make perf_swevent_get_recursion_context return a context number
and disable preemption.

This could be used to remove the IRQ disable from the trace bit
and index the per-cpu buffer with.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <20091123103819.993226816@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h    |  8 ++---
 include/trace/ftrace.h        | 17 ++++++-----
 kernel/perf_event.c           | 71 +++++++++++++++++++------------------------
 kernel/trace/trace_kprobe.c   | 14 +++++----
 kernel/trace/trace_syscalls.c | 14 +++++----
 5 files changed, 61 insertions(+), 63 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 74e98b1d3391..43adbd7f0010 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -874,8 +874,8 @@ extern int perf_output_begin(struct perf_output_handle *handle,
 extern void perf_output_end(struct perf_output_handle *handle);
 extern void perf_output_copy(struct perf_output_handle *handle,
 			     const void *buf, unsigned int len);
-extern int perf_swevent_get_recursion_context(int **recursion);
-extern void perf_swevent_put_recursion_context(int *recursion);
+extern int perf_swevent_get_recursion_context(void);
+extern void perf_swevent_put_recursion_context(int rctx);
 #else
 static inline void
 perf_event_task_sched_in(struct task_struct *task, int cpu)		{ }
@@ -904,8 +904,8 @@ static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
 static inline void perf_event_comm(struct task_struct *tsk)		{ }
 static inline void perf_event_fork(struct task_struct *tsk)		{ }
 static inline void perf_event_init(void)				{ }
-static int perf_swevent_get_recursion_context(int **recursion)	{ return -1; }
-static void perf_swevent_put_recursion_context(int *recursion)		{ }
+static inline int  perf_swevent_get_recursion_context(void)  { return -1; }
+static inline void perf_swevent_put_recursion_context(int rctx)		{ }
 
 #endif
 
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index c222ef5238bf..c3417c13e3ed 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -724,8 +724,8 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 static void ftrace_profile_##call(proto)				\
 {									\
 	struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
-	extern int perf_swevent_get_recursion_context(int **recursion); \
-	extern void perf_swevent_put_recursion_context(int *recursion); \
+	extern int perf_swevent_get_recursion_context(void);		\
+	extern void perf_swevent_put_recursion_context(int rctx);	\
 	struct ftrace_event_call *event_call = &event_##call;		\
 	extern void perf_tp_event(int, u64, u64, void *, int);		\
 	struct ftrace_raw_##call *entry;				\
@@ -736,8 +736,8 @@ static void ftrace_profile_##call(proto)				\
 	int __data_size;						\
 	char *trace_buf;						\
 	char *raw_data;							\
-	int *recursion;							\
 	int __cpu;							\
+	int rctx;							\
 	int pc;								\
 									\
 	pc = preempt_count();						\
@@ -753,8 +753,9 @@ static void ftrace_profile_##call(proto)				\
 									\
 	local_irq_save(irq_flags);					\
 									\
-	if (perf_swevent_get_recursion_context(&recursion))		\
-		goto end_recursion;						\
+	rctx = perf_swevent_get_recursion_context();			\
+	if (rctx < 0)							\
+		goto end_recursion;					\
 									\
 	__cpu = smp_processor_id();					\
 									\
@@ -781,9 +782,9 @@ static void ftrace_profile_##call(proto)				\
 	perf_tp_event(event_call->id, __addr, __count, entry,		\
 			     __entry_size);				\
 									\
-end:								\
-	perf_swevent_put_recursion_context(recursion);			\
-end_recursion:									\
+end:									\
+	perf_swevent_put_recursion_context(rctx);			\
+end_recursion:								\
 	local_irq_restore(irq_flags);					\
 									\
 }
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 50f11b5f8c3d..0b0d5f72fe7d 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -3869,45 +3869,50 @@ static void perf_swevent_ctx_event(struct perf_event_context *ctx,
 	}
 }
 
-/*
- * Must be called with preemption disabled
- */
-int perf_swevent_get_recursion_context(int **recursion)
+int perf_swevent_get_recursion_context(void)
 {
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
+	int rctx;
 
 	if (in_nmi())
-		*recursion = &cpuctx->recursion[3];
+		rctx = 3;
 	else if (in_irq())
-		*recursion = &cpuctx->recursion[2];
+		rctx = 2;
 	else if (in_softirq())
-		*recursion = &cpuctx->recursion[1];
+		rctx = 1;
 	else
-		*recursion = &cpuctx->recursion[0];
+		rctx = 0;
 
-	if (**recursion)
+	if (cpuctx->recursion[rctx]) {
+		put_cpu_var(perf_cpu_context);
 		return -1;
+	}
 
-	(**recursion)++;
+	cpuctx->recursion[rctx]++;
+	barrier();
 
-	return 0;
+	return rctx;
 }
 EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
 
-void perf_swevent_put_recursion_context(int *recursion)
+void perf_swevent_put_recursion_context(int rctx)
 {
-	(*recursion)--;
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	barrier();
+	cpuctx->recursion[rctx]++;
+	put_cpu_var(perf_cpu_context);
 }
 EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);
 
-static void __do_perf_sw_event(enum perf_type_id type, u32 event_id,
-			       u64 nr, int nmi,
-			       struct perf_sample_data *data,
-			       struct pt_regs *regs)
+static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
+				    u64 nr, int nmi,
+				    struct perf_sample_data *data,
+				    struct pt_regs *regs)
 {
+	struct perf_cpu_context *cpuctx;
 	struct perf_event_context *ctx;
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 
+	cpuctx = &__get_cpu_var(perf_cpu_context);
 	rcu_read_lock();
 	perf_swevent_ctx_event(&cpuctx->ctx, type, event_id,
 				 nr, nmi, data, regs);
@@ -3921,34 +3926,22 @@ static void __do_perf_sw_event(enum perf_type_id type, u32 event_id,
 	rcu_read_unlock();
 }
 
-static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
-				    u64 nr, int nmi,
-				    struct perf_sample_data *data,
-				    struct pt_regs *regs)
-{
-	int *recursion;
-
-	preempt_disable();
-
-	if (perf_swevent_get_recursion_context(&recursion))
-		goto out;
-
-	__do_perf_sw_event(type, event_id, nr, nmi, data, regs);
-
-	perf_swevent_put_recursion_context(recursion);
-out:
-	preempt_enable();
-}
-
 void __perf_sw_event(u32 event_id, u64 nr, int nmi,
 			    struct pt_regs *regs, u64 addr)
 {
 	struct perf_sample_data data;
+	int rctx;
+
+	rctx = perf_swevent_get_recursion_context();
+	if (rctx < 0)
+		return;
 
 	data.addr = addr;
 	data.raw  = NULL;
 
 	do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs);
+
+	perf_swevent_put_recursion_context(rctx);
 }
 
 static void perf_swevent_read(struct perf_event *event)
@@ -4172,7 +4165,7 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
 		regs = task_pt_regs(current);
 
 	/* Trace events already protected against recursion */
-	__do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
+	do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
 				&data, regs);
 }
 EXPORT_SYMBOL_GPL(perf_tp_event);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 22e6f68b05b3..79ce6a2bd74f 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1213,7 +1213,7 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
 	unsigned long irq_flags;
 	char *trace_buf;
 	char *raw_data;
-	int *recursion;
+	int rctx;
 
 	pc = preempt_count();
 	__size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
@@ -1229,7 +1229,8 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
 	 */
 	local_irq_save(irq_flags);
 
-	if (perf_swevent_get_recursion_context(&recursion))
+	rctx = perf_swevent_get_recursion_context();
+	if (rctx < 0)
 		goto end_recursion;
 
 	__cpu = smp_processor_id();
@@ -1258,7 +1259,7 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
 	perf_tp_event(call->id, entry->ip, 1, entry, size);
 
 end:
-	perf_swevent_put_recursion_context(recursion);
+	perf_swevent_put_recursion_context(rctx);
 end_recursion:
 	local_irq_restore(irq_flags);
 
@@ -1276,8 +1277,8 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
 	int size, __size, i, pc, __cpu;
 	unsigned long irq_flags;
 	char *trace_buf;
-	int *recursion;
 	char *raw_data;
+	int rctx;
 
 	pc = preempt_count();
 	__size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
@@ -1293,7 +1294,8 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
 	 */
 	local_irq_save(irq_flags);
 
-	if (perf_swevent_get_recursion_context(&recursion))
+	rctx = perf_swevent_get_recursion_context();
+	if (rctx < 0)
 		goto end_recursion;
 
 	__cpu = smp_processor_id();
@@ -1323,7 +1325,7 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
 	perf_tp_event(call->id, entry->ret_ip, 1, entry, size);
 
 end:
-	perf_swevent_put_recursion_context(recursion);
+	perf_swevent_put_recursion_context(rctx);
 end_recursion:
 	local_irq_restore(irq_flags);
 
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 41b6dd963daa..9189cbe86079 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -481,8 +481,8 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
 	unsigned long flags;
 	char *trace_buf;
 	char *raw_data;
-	int *recursion;
 	int syscall_nr;
+	int rctx;
 	int size;
 	int cpu;
 
@@ -506,7 +506,8 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
 	/* Protect the per cpu buffer, begin the rcu read side */
 	local_irq_save(flags);
 
-	if (perf_swevent_get_recursion_context(&recursion))
+	rctx = perf_swevent_get_recursion_context();
+	if (rctx < 0)
 		goto end_recursion;
 
 	cpu = smp_processor_id();
@@ -530,7 +531,7 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
 	perf_tp_event(sys_data->enter_id, 0, 1, rec, size);
 
 end:
-	perf_swevent_put_recursion_context(recursion);
+	perf_swevent_put_recursion_context(rctx);
 end_recursion:
 	local_irq_restore(flags);
 }
@@ -582,7 +583,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 	int syscall_nr;
 	char *trace_buf;
 	char *raw_data;
-	int *recursion;
+	int rctx;
 	int size;
 	int cpu;
 
@@ -609,7 +610,8 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 	/* Protect the per cpu buffer, begin the rcu read side */
 	local_irq_save(flags);
 
-	if (perf_swevent_get_recursion_context(&recursion))
+	rctx = perf_swevent_get_recursion_context();
+	if (rctx < 0)
 		goto end_recursion;
 
 	cpu = smp_processor_id();
@@ -634,7 +636,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 	perf_tp_event(sys_data->exit_id, 0, 1, rec, size);
 
 end:
-	perf_swevent_put_recursion_context(recursion);
+	perf_swevent_put_recursion_context(rctx);
 end_recursion:
 	local_irq_restore(flags);
 }
-- 
cgit v1.2.3-70-g09d2


From e6db4876575f3fdd5b1df2cbff826df95ab9af6a Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 23 Nov 2009 15:42:32 +0100
Subject: hw-breakpoints: Include only linux/perf_event.h from kernel part of
 bp headers

As userspace only needs the breakpoints enum types from the
breakpoints headers.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Prasad <prasad@linux.vnet.ibm.com>
LKML-Reference: <1258987355-8751-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/hw_breakpoint.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 4659e0c55ea6..76a48ab9a81e 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -1,8 +1,6 @@
 #ifndef _LINUX_HW_BREAKPOINT_H
 #define _LINUX_HW_BREAKPOINT_H
 
-#include <linux/perf_event.h>
-
 enum {
 	HW_BREAKPOINT_LEN_1 = 1,
 	HW_BREAKPOINT_LEN_2 = 2,
@@ -19,6 +17,8 @@ enum {
 #ifdef __KERNEL__
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 
+#include <linux/perf_event.h>
+
 static inline unsigned long hw_breakpoint_addr(struct perf_event *bp)
 {
 	return bp->attr.bp_addr;
-- 
cgit v1.2.3-70-g09d2


From fa7c27ee9394fc0d52404b2a89882e95868a60b9 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 23 Nov 2009 22:30:12 +0100
Subject: hw-breakpoints: Fix misordered ifdef

Fix a misplaced ifdef. We need the perf event headers also in
off-case to avoid the following build error:

 include/linux/hw_breakpoint.h:94: error: expected declaration specifiers or '...' before 'perf_callback_t'
 include/linux/hw_breakpoint.h:102: error: expected declaration specifiers or '...' before 'perf_callback_t'
 include/linux/hw_breakpoint.h:109: error: expected declaration specifiers or '...' before 'perf_callback_t'
 include/linux/hw_breakpoint.h:116: error: expected declaration specifiers or '...' before 'perf_callback_t'

Reported-by: Kisskb-bot by Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Prasad <prasad@linux.vnet.ibm.com>
LKML-Reference: <1259011812-8093-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/hw_breakpoint.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 76a48ab9a81e..c9f7f7c7b0e0 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -15,10 +15,11 @@ enum {
 };
 
 #ifdef __KERNEL__
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
 
 #include <linux/perf_event.h>
 
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+
 static inline unsigned long hw_breakpoint_addr(struct perf_event *bp)
 {
 	return bp->attr.bp_addr;
-- 
cgit v1.2.3-70-g09d2


From b3a222e52e4d4be77cc4520a57af1a4a0d8222d1 Mon Sep 17 00:00:00 2001
From: "Serge E. Hallyn" <serue@us.ibm.com>
Date: Mon, 23 Nov 2009 16:21:30 -0600
Subject: remove CONFIG_SECURITY_FILE_CAPABILITIES compile option

As far as I know, all distros currently ship kernels with default
CONFIG_SECURITY_FILE_CAPABILITIES=y.  Since having the option on
leaves a 'no_file_caps' option to boot without file capabilities,
the main reason to keep the option is that turning it off saves
you (on my s390x partition) 5k.  In particular, vmlinux sizes
came to:

without patch fscaps=n:		 	53598392
without patch fscaps=y:		 	53603406
with this patch applied:		53603342

with the security-next tree.

Against this we must weigh the fact that there is no simple way for
userspace to figure out whether file capabilities are supported,
while things like per-process securebits, capability bounding
sets, and adding bits to pI if CAP_SETPCAP is in pE are not supported
with SECURITY_FILE_CAPABILITIES=n, leaving a bit of a problem for
applications wanting to know whether they can use them and/or why
something failed.

It also adds another subtly different set of semantics which we must
maintain at the risk of severe security regressions.

So this patch removes the SECURITY_FILE_CAPABILITIES compile
option.  It drops the kernel size by about 50k over the stock
SECURITY_FILE_CAPABILITIES=y kernel, by removing the
cap_limit_ptraced_target() function.

Changelog:
	Nov 20: remove cap_limit_ptraced_target() as it's logic
		was ifndef'ed.

Signed-off-by: Serge E. Hallyn <serue@us.ibm.com>
Acked-by: Andrew G. Morgan" <morgan@kernel.org>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/capability.h |  2 --
 include/linux/init_task.h  |  4 ---
 kernel/capability.c        |  2 --
 security/Kconfig           |  9 ------
 security/commoncap.c       | 72 ++--------------------------------------------
 5 files changed, 2 insertions(+), 87 deletions(-)

(limited to 'include')

diff --git a/include/linux/capability.h b/include/linux/capability.h
index c8f2a5f70ed5..39e5ff512fbe 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -92,9 +92,7 @@ struct vfs_cap_data {
 #define _KERNEL_CAPABILITY_VERSION _LINUX_CAPABILITY_VERSION_3
 #define _KERNEL_CAPABILITY_U32S    _LINUX_CAPABILITY_U32S_3
 
-#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
 extern int file_caps_enabled;
-#endif
 
 typedef struct kernel_cap_struct {
 	__u32 cap[_KERNEL_CAPABILITY_U32S];
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 21a6f5d9af22..8d10aa7fd4c9 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -83,16 +83,12 @@ extern struct group_info init_groups;
 #define INIT_IDS
 #endif
 
-#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
 /*
  * Because of the reduced scope of CAP_SETPCAP when filesystem
  * capabilities are in effect, it is safe to allow CAP_SETPCAP to
  * be available in the default configuration.
  */
 # define CAP_INIT_BSET  CAP_FULL_SET
-#else
-# define CAP_INIT_BSET  CAP_INIT_EFF_SET
-#endif
 
 #ifdef CONFIG_TREE_PREEMPT_RCU
 #define INIT_TASK_RCU_PREEMPT(tsk)					\
diff --git a/kernel/capability.c b/kernel/capability.c
index c450375e855f..7f876e60521f 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -29,7 +29,6 @@ EXPORT_SYMBOL(__cap_empty_set);
 EXPORT_SYMBOL(__cap_full_set);
 EXPORT_SYMBOL(__cap_init_eff_set);
 
-#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
 int file_caps_enabled = 1;
 
 static int __init file_caps_disable(char *str)
@@ -38,7 +37,6 @@ static int __init file_caps_disable(char *str)
 	return 1;
 }
 __setup("no_file_caps", file_caps_disable);
-#endif
 
 /*
  * More recent versions of libcap are available from:
diff --git a/security/Kconfig b/security/Kconfig
index 95cc08913ca1..226b9556b25f 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -91,15 +91,6 @@ config SECURITY_PATH
 	  implement pathname based access controls.
 	  If you are unsure how to answer this question, answer N.
 
-config SECURITY_FILE_CAPABILITIES
-	bool "File POSIX Capabilities"
-	default n
-	help
-	  This enables filesystem capabilities, allowing you to give
-	  binaries a subset of root's powers without using setuid 0.
-
-	  If in doubt, answer N.
-
 config INTEL_TXT
 	bool "Enable Intel(R) Trusted Execution Technology (Intel(R) TXT)"
 	depends on HAVE_INTEL_TXT
diff --git a/security/commoncap.c b/security/commoncap.c
index 45b87af4ae5d..f800fdb3de94 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -173,7 +173,6 @@ int cap_capget(struct task_struct *target, kernel_cap_t *effective,
  */
 static inline int cap_inh_is_capped(void)
 {
-#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
 
 	/* they are so limited unless the current task has the CAP_SETPCAP
 	 * capability
@@ -181,7 +180,6 @@ static inline int cap_inh_is_capped(void)
 	if (cap_capable(current, current_cred(), CAP_SETPCAP,
 			SECURITY_CAP_AUDIT) == 0)
 		return 0;
-#endif
 	return 1;
 }
 
@@ -239,8 +237,6 @@ static inline void bprm_clear_caps(struct linux_binprm *bprm)
 	bprm->cap_effective = false;
 }
 
-#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
-
 /**
  * cap_inode_need_killpriv - Determine if inode change affects privileges
  * @dentry: The inode/dentry in being changed with change marked ATTR_KILL_PRIV
@@ -421,49 +417,6 @@ out:
 	return rc;
 }
 
-#else
-int cap_inode_need_killpriv(struct dentry *dentry)
-{
-	return 0;
-}
-
-int cap_inode_killpriv(struct dentry *dentry)
-{
-	return 0;
-}
-
-int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps)
-{
-	memset(cpu_caps, 0, sizeof(struct cpu_vfs_cap_data));
- 	return -ENODATA;
-}
-
-static inline int get_file_caps(struct linux_binprm *bprm, bool *effective)
-{
-	bprm_clear_caps(bprm);
-	return 0;
-}
-#endif
-
-/*
- * Determine whether a exec'ing process's new permitted capabilities should be
- * limited to just what it already has.
- *
- * This prevents processes that are being ptraced from gaining access to
- * CAP_SETPCAP, unless the process they're tracing already has it, and the
- * binary they're executing has filecaps that elevate it.
- *
- *  Returns 1 if they should be limited, 0 if they are not.
- */
-static inline int cap_limit_ptraced_target(void)
-{
-#ifndef CONFIG_SECURITY_FILE_CAPABILITIES
-	if (capable(CAP_SETPCAP))
-		return 0;
-#endif
-	return 1;
-}
-
 /**
  * cap_bprm_set_creds - Set up the proposed credentials for execve().
  * @bprm: The execution parameters, including the proposed creds
@@ -523,9 +476,8 @@ skip:
 			new->euid = new->uid;
 			new->egid = new->gid;
 		}
-		if (cap_limit_ptraced_target())
-			new->cap_permitted = cap_intersect(new->cap_permitted,
-							   old->cap_permitted);
+		new->cap_permitted = cap_intersect(new->cap_permitted,
+						   old->cap_permitted);
 	}
 
 	new->suid = new->fsuid = new->euid;
@@ -739,7 +691,6 @@ int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags)
 	return 0;
 }
 
-#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
 /*
  * Rationale: code calling task_setscheduler, task_setioprio, and
  * task_setnice, assumes that
@@ -820,22 +771,6 @@ static long cap_prctl_drop(struct cred *new, unsigned long cap)
 	return 0;
 }
 
-#else
-int cap_task_setscheduler (struct task_struct *p, int policy,
-			   struct sched_param *lp)
-{
-	return 0;
-}
-int cap_task_setioprio (struct task_struct *p, int ioprio)
-{
-	return 0;
-}
-int cap_task_setnice (struct task_struct *p, int nice)
-{
-	return 0;
-}
-#endif
-
 /**
  * cap_task_prctl - Implement process control functions for this security module
  * @option: The process control function requested
@@ -866,7 +801,6 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 		error = !!cap_raised(new->cap_bset, arg2);
 		goto no_change;
 
-#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
 	case PR_CAPBSET_DROP:
 		error = cap_prctl_drop(new, arg2);
 		if (error < 0)
@@ -917,8 +851,6 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 		error = new->securebits;
 		goto no_change;
 
-#endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */
-
 	case PR_GET_KEEPCAPS:
 		if (issecure(SECURE_KEEP_CAPS))
 			error = 1;
-- 
cgit v1.2.3-70-g09d2


From c9286b7e293a1ea054e857ff3f5a23d0ad8d4f36 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 16 Nov 2009 19:50:38 +0100
Subject: locking: Remove unused prototype

commit 910067d1(remove generic__raw_read_trylock()) removed the
implementation but left the prototype around. Remove it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/spinlock.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index f0ca7a7a1757..faf1482028df 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -79,8 +79,6 @@
  */
 #include <linux/spinlock_types.h>
 
-extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock);
-
 /*
  * Pull the __raw*() functions/declarations (UP-nondebug doesnt need them):
  */
-- 
cgit v1.2.3-70-g09d2


From a49ed0bf427a8328a3296eebedc7697fe5098dbf Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 16 Nov 2009 19:57:50 +0100
Subject: locking: Use __[SPIN|RW]_LOCK_UNLOCKED in [spin|rw]_lock_init()

SPIN_LOCK_UNLOCKED and RW_LOCK_UNLOCKED are deprecated. Replace them
with the __*_LOCK_UNLOCKED variants.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/spinlock.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index faf1482028df..71dccfeb0d88 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -100,7 +100,7 @@ do {								\
 
 #else
 # define spin_lock_init(lock)					\
-	do { *(lock) = SPIN_LOCK_UNLOCKED; } while (0)
+	do { *(lock) = __SPIN_LOCK_UNLOCKED(lock); } while (0)
 #endif
 
 #ifdef CONFIG_DEBUG_SPINLOCK
@@ -114,7 +114,7 @@ do {								\
 } while (0)
 #else
 # define rwlock_init(lock)					\
-	do { *(lock) = RW_LOCK_UNLOCKED; } while (0)
+	do { *(lock) = __RW_LOCK_UNLOCKED(lock); } while (0)
 #endif
 
 #define spin_is_locked(lock)	__raw_spin_is_locked(&(lock)->raw_lock)
-- 
cgit v1.2.3-70-g09d2


From ff038f5c37c2070829004a0678372766c2b32180 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 18 Nov 2009 20:27:27 -0500
Subject: tracing: Create new TRACE_EVENT_TEMPLATE

There are some places in the kernel that define several tracepoints and
they are all identical besides the name. The code to enable, disable and
record is created for every trace point even if most of the code is
identical.

This patch adds TRACE_EVENT_TEMPLATE that lets the developer create
a template TRACE_EVENT and create trace points with DEFINE_EVENT, which
is based off of a given template. Each trace point used by this
will share most of the code, and bring down the size of the kernel
when there are several duplicate events.

Usage is:

TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print);

Which would be the same as defining a normal TRACE_EVENT.

To create the trace events that the trace points will use:

DEFINE_EVENT(template, name, proto, args) is done. The template
is the name of the TRACE_EVENT_TEMPLATE to use. The name is the
name of the trace point. The parameters proto and args must be the same
as the proto and args of the template. If they are not the same,
then a compile error will result. I tried hard removing this duplication
but the C preprocessor is not powerful enough (or my CPP magic
experience points is not at a high enough level) to not need them.

A lot of trace events are coming in with new XFS development. Most of
the trace points are identical except for the name. The following shows
the advantage of having TRACE_EVENT_TEMPLATE:

$ size fs/xfs/xfs.o.*
    text          data     bss     dec     hex filename
  452114          2788    3520  458422   6feb6 fs/xfs/xfs.o.old
  638482         38116    3744  680342   a6196 fs/xfs/xfs.o.template
  996954         38116    4480 1039550   fdcbe fs/xfs/xfs.o.trace

xfs.o.old is without any tracepoints.
xfs.o.template uses the new TRACE_EVENT_TEMPLATE.
xfs.o.trace uses the current TRACE_EVENT macros.

Requested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/tracepoint.h   |   4 ++
 include/trace/define_trace.h |   6 ++
 include/trace/ftrace.h       | 149 +++++++++++++++++++++++++++++++------------
 3 files changed, 117 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 2aac8a83e89b..88a5b5a809ec 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -280,6 +280,10 @@ static inline void tracepoint_synchronize_unregister(void)
  * TRACE_EVENT_FN to perform any (un)registration work.
  */
 
+#define TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print)
+#define DEFINE_EVENT(template, name, proto, args)		\
+	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+
 #define TRACE_EVENT(name, proto, args, struct, assign, print)	\
 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
 #define TRACE_EVENT_FN(name, proto, args, struct,		\
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index 2a4b3bf74033..244985814a43 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -31,6 +31,10 @@
 		assign, print, reg, unreg)			\
 	DEFINE_TRACE_FN(name, reg, unreg)
 
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, name, proto, args) \
+	DEFINE_TRACE(name)
+
 #undef DECLARE_TRACE
 #define DECLARE_TRACE(name, proto, args)	\
 	DEFINE_TRACE(name)
@@ -63,6 +67,8 @@
 
 #undef TRACE_EVENT
 #undef TRACE_EVENT_FN
+#undef TRACE_EVENT_TEMPLATE
+#undef DEFINE_EVENT
 #undef TRACE_HEADER_MULTI_READ
 
 /* Only undef what we defined in this file */
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index c3417c13e3ed..2969f65d8002 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -18,6 +18,26 @@
 
 #include <linux/ftrace_event.h>
 
+/*
+ * TRACE_EVENT_TEMPLATE can be used to add a generic function
+ * handlers for events. That is, if all events have the same
+ * parameters and just have distinct trace points.
+ * Each tracepoint can be defined with DEFINE_EVENT and that
+ * will map the TRACE_EVENT_TEMPLATE to the tracepoint.
+ *
+ * TRACE_EVENT is a one to one mapping between tracepoint and template.
+ */
+#undef TRACE_EVENT
+#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \
+	TRACE_EVENT_TEMPLATE(name,			       \
+			     PARAMS(proto),		       \
+			     PARAMS(args),		       \
+			     PARAMS(tstruct),		       \
+			     PARAMS(assign),		       \
+			     PARAMS(print));		       \
+	DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args));
+
+
 #undef __field
 #define __field(type, item)		type	item;
 
@@ -36,13 +56,15 @@
 #undef TP_STRUCT__entry
 #define TP_STRUCT__entry(args...) args
 
-#undef TRACE_EVENT
-#define TRACE_EVENT(name, proto, args, tstruct, assign, print)	\
-	struct ftrace_raw_##name {				\
-		struct trace_entry	ent;			\
-		tstruct						\
-		char			__data[0];		\
-	};							\
+#undef TRACE_EVENT_TEMPLATE
+#define TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print)	\
+	struct ftrace_raw_##name {					\
+		struct trace_entry	ent;				\
+		tstruct							\
+		char			__data[0];			\
+	};
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, name, proto, args)	\
 	static struct ftrace_event_call event_##name
 
 #undef __cpparg
@@ -89,12 +111,15 @@
 #undef __string
 #define __string(item, src) __dynamic_array(char, item, -1)
 
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
+#undef TRACE_EVENT_TEMPLATE
+#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print)	\
 	struct ftrace_data_offsets_##call {				\
 		tstruct;						\
 	};
 
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, name, proto, args)
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 /*
@@ -170,8 +195,8 @@
 #undef TP_perf_assign
 #define TP_perf_assign(args...)
 
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
+#undef TRACE_EVENT_TEMPLATE
+#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, func, print)	\
 static int								\
 ftrace_format_##call(struct ftrace_event_call *unused,			\
 		      struct trace_seq *s)				\
@@ -186,6 +211,9 @@ ftrace_format_##call(struct ftrace_event_call *unused,			\
 	return ret;							\
 }
 
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, name, proto, args)
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 /*
@@ -255,10 +283,11 @@ ftrace_format_##call(struct ftrace_event_call *unused,			\
 		ftrace_print_symbols_seq(p, value, symbols);		\
 	})
 
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
+#undef TRACE_EVENT_TEMPLATE
+#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print)	\
 static enum print_line_t						\
-ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
+ftrace_raw_output_id_##call(int event_id, const char *name,		\
+			    struct trace_iterator *iter, int flags)	\
 {									\
 	struct trace_seq *s = &iter->seq;				\
 	struct ftrace_raw_##call *field;				\
@@ -268,7 +297,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 									\
 	entry = iter->ent;						\
 									\
-	if (entry->type != event_##call.id) {				\
+	if (entry->type != event_id) {					\
 		WARN_ON_ONCE(1);					\
 		return TRACE_TYPE_UNHANDLED;				\
 	}								\
@@ -277,14 +306,25 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 									\
 	p = &get_cpu_var(ftrace_event_seq);				\
 	trace_seq_init(p);						\
-	ret = trace_seq_printf(s, #call ": " print);			\
+	ret = trace_seq_printf(s, "%s: ", name);			\
+	if (ret)							\
+		ret = trace_seq_printf(s, print);			\
 	put_cpu();							\
 	if (!ret)							\
 		return TRACE_TYPE_PARTIAL_LINE;				\
 									\
 	return TRACE_TYPE_HANDLED;					\
 }
-	
+
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, name, proto, args)			\
+static enum print_line_t						\
+ftrace_raw_output_##name(struct trace_iterator *iter, int flags)	\
+{									\
+	return ftrace_raw_output_id_##template(event_##name.id,		\
+					       #name, iter, flags);	\
+}
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 #undef __field_ext
@@ -318,8 +358,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 #undef __string
 #define __string(item, src) __dynamic_array(char, item, -1)
 
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
+#undef TRACE_EVENT_TEMPLATE
+#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, func, print)	\
 static int								\
 ftrace_define_fields_##call(struct ftrace_event_call *event_call)	\
 {									\
@@ -335,6 +375,9 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call)	\
 	return ret;							\
 }
 
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, name, proto, args)
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 /*
@@ -361,10 +404,10 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call)	\
 	__data_size += (len) * sizeof(type);
 
 #undef __string
-#define __string(item, src) __dynamic_array(char, item, strlen(src) + 1)       \
+#define __string(item, src) __dynamic_array(char, item, strlen(src) + 1)
 
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
+#undef TRACE_EVENT_TEMPLATE
+#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print)	\
 static inline int ftrace_get_offsets_##call(				\
 	struct ftrace_data_offsets_##call *__data_offsets, proto)       \
 {									\
@@ -376,6 +419,9 @@ static inline int ftrace_get_offsets_##call(				\
 	return __data_size;						\
 }
 
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, name, proto, args)
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 #ifdef CONFIG_EVENT_PROFILE
@@ -397,19 +443,22 @@ static inline int ftrace_get_offsets_##call(				\
  *
  */
 
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
+#undef TRACE_EVENT_TEMPLATE
+#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print)
+
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, name, proto, args)			\
 									\
-static void ftrace_profile_##call(proto);				\
+static void ftrace_profile_##name(proto);				\
 									\
-static int ftrace_profile_enable_##call(struct ftrace_event_call *unused)\
+static int ftrace_profile_enable_##name(struct ftrace_event_call *unused)\
 {									\
-	return register_trace_##call(ftrace_profile_##call);		\
+	return register_trace_##name(ftrace_profile_##name);		\
 }									\
 									\
-static void ftrace_profile_disable_##call(struct ftrace_event_call *unused)\
+static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\
 {									\
-	unregister_trace_##call(ftrace_profile_##call);			\
+	unregister_trace_##name(ftrace_profile_##name);			\
 }
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
@@ -550,15 +599,13 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *unused)\
 #define __assign_str(dst, src)						\
 	strcpy(__get_str(dst), src);
 
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
-									\
-static struct ftrace_event_call event_##call;				\
+#undef TRACE_EVENT_TEMPLATE
+#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print)	\
 									\
-static void ftrace_raw_event_##call(proto)				\
+static void ftrace_raw_event_id_##call(struct ftrace_event_call *event_call, \
+				       proto)				\
 {									\
 	struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
-	struct ftrace_event_call *event_call = &event_##call;		\
 	struct ring_buffer_event *event;				\
 	struct ftrace_raw_##call *entry;				\
 	struct ring_buffer *buffer;					\
@@ -572,7 +619,7 @@ static void ftrace_raw_event_##call(proto)				\
 	__data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
 									\
 	event = trace_current_buffer_lock_reserve(&buffer,		\
-				 event_##call.id,			\
+				 event_call->id,			\
 				 sizeof(*entry) + __data_size,		\
 				 irq_flags, pc);			\
 	if (!event)							\
@@ -587,6 +634,14 @@ static void ftrace_raw_event_##call(proto)				\
 	if (!filter_current_check_discard(buffer, event_call, entry, event)) \
 		trace_nowake_buffer_unlock_commit(buffer,		\
 						  event, irq_flags, pc); \
+}
+
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, call, proto, args)			\
+									\
+static void ftrace_raw_event_##call(proto)				\
+{									\
+	ftrace_raw_event_id_##template(&event_##call, args);		\
 }									\
 									\
 static int ftrace_raw_reg_event_##call(struct ftrace_event_call *unused)\
@@ -630,8 +685,8 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 	.raw_init		= ftrace_raw_init_event_##call,		\
 	.regfunc		= ftrace_raw_reg_event_##call,		\
 	.unregfunc		= ftrace_raw_unreg_event_##call,	\
-	.show_format		= ftrace_format_##call,			\
-	.define_fields		= ftrace_define_fields_##call,		\
+	.show_format		= ftrace_format_##template,		\
+	.define_fields		= ftrace_define_fields_##template,	\
 	_TRACE_PROFILE_INIT(call)					\
 }
 
@@ -719,14 +774,15 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 #undef __perf_count
 #define __perf_count(c) __count = (c)
 
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
-static void ftrace_profile_##call(proto)				\
+#undef TRACE_EVENT_TEMPLATE
+#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print)	\
+static void								\
+ftrace_profile_templ_##call(struct ftrace_event_call *event_call,	\
+			    proto)					\
 {									\
 	struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
 	extern int perf_swevent_get_recursion_context(void);		\
 	extern void perf_swevent_put_recursion_context(int rctx);	\
-	struct ftrace_event_call *event_call = &event_##call;		\
 	extern void perf_tp_event(int, u64, u64, void *, int);		\
 	struct ftrace_raw_##call *entry;				\
 	u64 __addr = 0, __count = 1;					\
@@ -789,6 +845,15 @@ end_recursion:								\
 									\
 }
 
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, call, proto, args)		\
+static void ftrace_profile_##call(proto)			\
+{								\
+	struct ftrace_event_call *event_call = &event_##call;	\
+								\
+	ftrace_profile_templ_##template(event_call, args);	\
+}
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 #endif /* CONFIG_EVENT_PROFILE */
 
-- 
cgit v1.2.3-70-g09d2


From e5bc9721684e9412f3e0465222f317c362a8ab47 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 18 Nov 2009 20:36:26 -0500
Subject: tracing: Create new DEFINE_EVENT_PRINT

After creating the TRACE_EVENT_TEMPLATE I started to look at other
trace points to see what duplication was made. I noticed that there
are several trace points where they are almost identical except for
the name and the output format. Since TRACE_EVENT_TEMPLATE was successful
in bringing down the size of trace events, I added a DEFINE_EVENT_PRINT.

DEFINE_EVENT_PRINT is used just like DEFINE_EVENT is. That is, the
DEFINE_EVENT_PRINT also uses a TRACE_EVENT_TEMPLATE, but it allows the
developer to overwrite the print format. If there are two or more
TRACE_EVENTS that are identical except for the name and print, then
they can be converted to use a TRACE_EVENT_TEMPLATE. Since the
TRACE_EVENT_TEMPLATE already does the print output, the first trace event
would have its print format held in the TRACE_EVENT_TEMPLATE and
be defined with a DEFINE_EVENT. The rest will use the DEFINE_EVENT_PRINT
and override the print format.

Converting the sched trace points to both DEFINE_EVENT and
DEFINE_EVENT_PRINT. Five were converted to DEFINE_EVENT and two were
converted to DEFINE_EVENT_PRINT.

I was able to get the following:

$ size kernel/sched.o-*
   text	   data	    bss	    dec	    hex	filename
  79299	   6776	   2520	  88595	  15a13	kernel/sched.o-notrace
 101941	  11896	   2584	 116421	  1c6c5	kernel/sched.o-templ
 104779	  11896	   2584	 119259	  1d1db	kernel/sched.o-trace

sched.o-notrace is the scheduler compiled with no trace points.
sched.o-templ is with the use of DEFINE_EVENT and DEFINE_EVENT_PRINT
sched.o-trace is the current trace events.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/tracepoint.h   |   2 +
 include/trace/define_trace.h |   5 ++
 include/trace/ftrace.h       | 123 +++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 126 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 88a5b5a809ec..7063383cca13 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -283,6 +283,8 @@ static inline void tracepoint_synchronize_unregister(void)
 #define TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print)
 #define DEFINE_EVENT(template, name, proto, args)		\
 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
+	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
 
 #define TRACE_EVENT(name, proto, args, struct, assign, print)	\
 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index 244985814a43..5d7d855ae21e 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -35,6 +35,10 @@
 #define DEFINE_EVENT(template, name, proto, args) \
 	DEFINE_TRACE(name)
 
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
+	DEFINE_TRACE(name)
+
 #undef DECLARE_TRACE
 #define DECLARE_TRACE(name, proto, args)	\
 	DEFINE_TRACE(name)
@@ -69,6 +73,7 @@
 #undef TRACE_EVENT_FN
 #undef TRACE_EVENT_TEMPLATE
 #undef DEFINE_EVENT
+#undef DEFINE_EVENT_PRINT
 #undef TRACE_HEADER_MULTI_READ
 
 /* Only undef what we defined in this file */
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 2969f65d8002..b0461772bc8d 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -67,6 +67,10 @@
 #define DEFINE_EVENT(template, name, proto, args)	\
 	static struct ftrace_event_call event_##name
 
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
+	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
 #undef __cpparg
 #define __cpparg(arg...) arg
 
@@ -120,6 +124,10 @@
 #undef DEFINE_EVENT
 #define DEFINE_EVENT(template, name, proto, args)
 
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
+	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 /*
@@ -198,15 +206,28 @@
 #undef TRACE_EVENT_TEMPLATE
 #define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, func, print)	\
 static int								\
-ftrace_format_##call(struct ftrace_event_call *unused,			\
-		      struct trace_seq *s)				\
+ftrace_format_setup_##call(struct ftrace_event_call *unused,		\
+			   struct trace_seq *s)				\
 {									\
 	struct ftrace_raw_##call field __attribute__((unused));		\
 	int ret = 0;							\
 									\
 	tstruct;							\
 									\
-	trace_seq_printf(s, "\nprint fmt: " print);			\
+	return ret;							\
+}									\
+									\
+static int								\
+ftrace_format_##call(struct ftrace_event_call *unused,			\
+		     struct trace_seq *s)				\
+{									\
+	int ret = 0;							\
+									\
+	ret = ftrace_format_setup_##call(unused, s);			\
+	if (!ret)							\
+		return ret;						\
+									\
+	ret = trace_seq_printf(s, "\nprint fmt: " print);		\
 									\
 	return ret;							\
 }
@@ -214,6 +235,23 @@ ftrace_format_##call(struct ftrace_event_call *unused,			\
 #undef DEFINE_EVENT
 #define DEFINE_EVENT(template, name, proto, args)
 
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print)		\
+static int								\
+ftrace_format_##name(struct ftrace_event_call *unused,			\
+		      struct trace_seq *s)				\
+{									\
+	int ret = 0;							\
+									\
+	ret = ftrace_format_setup_##template(unused, s);		\
+	if (!ret)							\
+		return ret;						\
+									\
+	trace_seq_printf(s, "\nprint fmt: " print);			\
+									\
+	return ret;							\
+}
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 /*
@@ -325,6 +363,38 @@ ftrace_raw_output_##name(struct trace_iterator *iter, int flags)	\
 					       #name, iter, flags);	\
 }
 
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, call, proto, args, print)		\
+static enum print_line_t						\
+ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
+{									\
+	struct trace_seq *s = &iter->seq;				\
+	struct ftrace_raw_##template *field;				\
+	struct trace_entry *entry;					\
+	struct trace_seq *p;						\
+	int ret;							\
+									\
+	entry = iter->ent;						\
+									\
+	if (entry->type != event_##call.id) {				\
+		WARN_ON_ONCE(1);					\
+		return TRACE_TYPE_UNHANDLED;				\
+	}								\
+									\
+	field = (typeof(field))entry;					\
+									\
+	p = &get_cpu_var(ftrace_event_seq);				\
+	trace_seq_init(p);						\
+	ret = trace_seq_printf(s, "%s: ", #call);			\
+	if (ret)							\
+		ret = trace_seq_printf(s, print);			\
+	put_cpu();							\
+	if (!ret)							\
+		return TRACE_TYPE_PARTIAL_LINE;				\
+									\
+	return TRACE_TYPE_HANDLED;					\
+}
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 #undef __field_ext
@@ -378,6 +448,10 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call)	\
 #undef DEFINE_EVENT
 #define DEFINE_EVENT(template, name, proto, args)
 
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
+	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 /*
@@ -422,6 +496,10 @@ static inline int ftrace_get_offsets_##call(				\
 #undef DEFINE_EVENT
 #define DEFINE_EVENT(template, name, proto, args)
 
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
+	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 #ifdef CONFIG_EVENT_PROFILE
@@ -461,6 +539,10 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\
 	unregister_trace_##name(ftrace_profile_##name);			\
 }
 
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
+	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 #endif
@@ -674,7 +756,19 @@ static int ftrace_raw_init_event_##call(struct ftrace_event_call *unused)\
 	event_##call.id = id;						\
 	INIT_LIST_HEAD(&event_##call.fields);				\
 	return 0;							\
-}									\
+}
+
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
+	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+#undef TRACE_EVENT_TEMPLATE
+#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print)
+
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, call, proto, args)			\
 									\
 static struct ftrace_event_call __used					\
 __attribute__((__aligned__(4)))						\
@@ -690,6 +784,23 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 	_TRACE_PROFILE_INIT(call)					\
 }
 
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, call, proto, args, print)		\
+									\
+static struct ftrace_event_call __used					\
+__attribute__((__aligned__(4)))						\
+__attribute__((section("_ftrace_events"))) event_##call = {		\
+	.name			= #call,				\
+	.system			= __stringify(TRACE_SYSTEM),		\
+	.event			= &ftrace_event_type_##call,		\
+	.raw_init		= ftrace_raw_init_event_##call,		\
+	.regfunc		= ftrace_raw_reg_event_##call,		\
+	.unregfunc		= ftrace_raw_unreg_event_##call,	\
+	.show_format		= ftrace_format_##call,			\
+	.define_fields		= ftrace_define_fields_##template,	\
+	_TRACE_PROFILE_INIT(call)					\
+}
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 /*
@@ -854,6 +965,10 @@ static void ftrace_profile_##call(proto)			\
 	ftrace_profile_templ_##template(event_call, args);	\
 }
 
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
+	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 #endif /* CONFIG_EVENT_PROFILE */
 
-- 
cgit v1.2.3-70-g09d2


From 75ec29ab848a7e92a41aaafaeb33d1afbc839be4 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 18 Nov 2009 20:48:08 -0500
Subject: tracing: Convert some sched trace events to DEFINE_EVENT and _PRINT

Converting some of the scheduler trace events to use the
TRACE_EVENT_TEMPLATE, DEFINE_EVENT and DEFINE_EVENT_PRINT helped to
save some space:

$ size kernel/sched.o-*
   text	   data	    bss	    dec	    hex	filename
  79299	   6776	   2520	  88595	  15a13	kernel/sched.o-notrace
 101941	  11896	   2584	 116421	  1c6c5	kernel/sched.o-templ
 104779	  11896	   2584	 119259	  1d1db	kernel/sched.o-trace

sched.o-notrace is without any tracepoints compiled
sched.o-templ is with this patch
sched.o-trace is the tracepoints before this patch

The trace events converted to DEFINE_EVENT:

sched_wakeup, sched_wakeup_new, sched_process_free, sched_process_exit,
and sched_stat_wait.

The trace events converted to DEFINE_EVENT_PRINT:

sched_stat_sleep and sched_stat_iowait.

Note, since the TRACE_EVENT_TEMPLATE always uses a print, the
sched_stat_wait print format is defined in the template and this
template is used by sched_stat_sleep and sched_stat_iowait. But the
later two override the print format.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/events/sched.h | 170 +++++++++++++------------------------------
 1 file changed, 52 insertions(+), 118 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index b50b9856c59f..238f74b58486 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -83,7 +83,7 @@ TRACE_EVENT(sched_wait_task,
  * (NOTE: the 'rq' argument is not used by generic trace events,
  *        but used by the latency tracer plugin. )
  */
-TRACE_EVENT(sched_wakeup,
+TRACE_EVENT_TEMPLATE(sched_wakeup_template,
 
 	TP_PROTO(struct rq *rq, struct task_struct *p, int success),
 
@@ -110,38 +110,19 @@ TRACE_EVENT(sched_wakeup,
 		  __entry->success, __entry->target_cpu)
 );
 
+DEFINE_EVENT(sched_wakeup_template, sched_wakeup,
+	     TP_PROTO(struct rq *rq, struct task_struct *p, int success),
+	     TP_ARGS(rq, p, success));
+
 /*
  * Tracepoint for waking up a new task:
  *
  * (NOTE: the 'rq' argument is not used by generic trace events,
  *        but used by the latency tracer plugin. )
  */
-TRACE_EVENT(sched_wakeup_new,
-
-	TP_PROTO(struct rq *rq, struct task_struct *p, int success),
-
-	TP_ARGS(rq, p, success),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-		__field(	int,	prio			)
-		__field(	int,	success			)
-		__field(	int,	target_cpu		)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid		= p->pid;
-		__entry->prio		= p->prio;
-		__entry->success	= success;
-		__entry->target_cpu	= task_cpu(p);
-	),
-
-	TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d",
-		  __entry->comm, __entry->pid, __entry->prio,
-		  __entry->success, __entry->target_cpu)
-);
+DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
+	     TP_PROTO(struct rq *rq, struct task_struct *p, int success),
+	     TP_ARGS(rq, p, success));
 
 /*
  * Tracepoint for task switches, performed by the scheduler:
@@ -216,10 +197,7 @@ TRACE_EVENT(sched_migrate_task,
 		  __entry->orig_cpu, __entry->dest_cpu)
 );
 
-/*
- * Tracepoint for freeing a task:
- */
-TRACE_EVENT(sched_process_free,
+TRACE_EVENT_TEMPLATE(sched_process_template,
 
 	TP_PROTO(struct task_struct *p),
 
@@ -242,29 +220,19 @@ TRACE_EVENT(sched_process_free,
 );
 
 /*
- * Tracepoint for a task exiting:
+ * Tracepoint for freeing a task:
  */
-TRACE_EVENT(sched_process_exit,
-
-	TP_PROTO(struct task_struct *p),
+DEFINE_EVENT(sched_process_template, sched_process_free,
+	     TP_PROTO(struct task_struct *p),
+	     TP_ARGS(p));
+	     
 
-	TP_ARGS(p),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-		__field(	int,	prio			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid		= p->pid;
-		__entry->prio		= p->prio;
-	),
-
-	TP_printk("comm=%s pid=%d prio=%d",
-		  __entry->comm, __entry->pid, __entry->prio)
-);
+/*
+ * Tracepoint for a task exiting:
+ */
+DEFINE_EVENT(sched_process_template, sched_process_exit,
+	     TP_PROTO(struct task_struct *p),
+	     TP_ARGS(p));
 
 /*
  * Tracepoint for a waiting task:
@@ -348,12 +316,7 @@ TRACE_EVENT(sched_signal_send,
  * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
  *     adding sched_stat support to SCHED_FIFO/RR would be welcome.
  */
-
-/*
- * Tracepoint for accounting wait time (time the task is runnable
- * but not actually running due to scheduler contention).
- */
-TRACE_EVENT(sched_stat_wait,
+TRACE_EVENT_TEMPLATE(sched_stat_template,
 
 	TP_PROTO(struct task_struct *tsk, u64 delay),
 
@@ -379,6 +342,37 @@ TRACE_EVENT(sched_stat_wait,
 			(unsigned long long)__entry->delay)
 );
 
+
+/*
+ * Tracepoint for accounting wait time (time the task is runnable
+ * but not actually running due to scheduler contention).
+ */
+DEFINE_EVENT(sched_stat_template, sched_stat_wait,
+	     TP_PROTO(struct task_struct *tsk, u64 delay),
+	     TP_ARGS(tsk, delay));
+
+/*
+ * Tracepoint for accounting sleep time (time the task is not runnable,
+ * including iowait, see below).
+ */
+DEFINE_EVENT_PRINT(sched_stat_template, sched_stat_sleep,
+		   TP_PROTO(struct task_struct *tsk, u64 delay),
+		   TP_ARGS(tsk, delay),
+		   TP_printk("task: %s:%d sleep: %Lu [ns]",
+			     __entry->comm, __entry->pid,
+			     (unsigned long long)__entry->delay));
+
+/*
+ * Tracepoint for accounting iowait time (time the task is not runnable
+ * due to waiting on IO to complete).
+ */
+DEFINE_EVENT_PRINT(sched_stat_template, sched_stat_iowait,
+		   TP_PROTO(struct task_struct *tsk, u64 delay),
+		   TP_ARGS(tsk, delay),
+		   TP_printk("task: %s:%d iowait: %Lu [ns]",
+			     __entry->comm, __entry->pid,
+			     (unsigned long long)__entry->delay));
+
 /*
  * Tracepoint for accounting runtime (time the task is executing
  * on a CPU).
@@ -412,66 +406,6 @@ TRACE_EVENT(sched_stat_runtime,
 			(unsigned long long)__entry->vruntime)
 );
 
-/*
- * Tracepoint for accounting sleep time (time the task is not runnable,
- * including iowait, see below).
- */
-TRACE_EVENT(sched_stat_sleep,
-
-	TP_PROTO(struct task_struct *tsk, u64 delay),
-
-	TP_ARGS(tsk, delay),
-
-	TP_STRUCT__entry(
-		__array( char,	comm,	TASK_COMM_LEN	)
-		__field( pid_t,	pid			)
-		__field( u64,	delay			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
-		__entry->pid	= tsk->pid;
-		__entry->delay	= delay;
-	)
-	TP_perf_assign(
-		__perf_count(delay);
-	),
-
-	TP_printk("comm=%s pid=%d delay=%Lu [ns]",
-			__entry->comm, __entry->pid,
-			(unsigned long long)__entry->delay)
-);
-
-/*
- * Tracepoint for accounting iowait time (time the task is not runnable
- * due to waiting on IO to complete).
- */
-TRACE_EVENT(sched_stat_iowait,
-
-	TP_PROTO(struct task_struct *tsk, u64 delay),
-
-	TP_ARGS(tsk, delay),
-
-	TP_STRUCT__entry(
-		__array( char,	comm,	TASK_COMM_LEN	)
-		__field( pid_t,	pid			)
-		__field( u64,	delay			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
-		__entry->pid	= tsk->pid;
-		__entry->delay	= delay;
-	)
-	TP_perf_assign(
-		__perf_count(delay);
-	),
-
-	TP_printk("comm=%s pid=%d delay=%Lu [ns]",
-			__entry->comm, __entry->pid,
-			(unsigned long long)__entry->delay)
-);
-
 #endif /* _TRACE_SCHED_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3-70-g09d2


From b8007ef7422270864eae523cb38d7522a53a94d3 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 3 Nov 2009 13:45:32 +0800
Subject: tracing: Separate raw syscall from syscall tracer

The current syscall tracer mixes raw syscalls and real syscalls.

echo 1 > events/syscalls/enable
And we get these from the output:

(XXXX insteads "            grep-20914 [001] 588211.446347" .. etc)

XXXX: sys_read(fd: 3, buf: 80609a8, count: 7000)
XXXX: sys_enter: NR 3 (3, 80609a8, 7000, a, 1000, bfce8ef8)
XXXX: sys_read -> 0x138
XXXX: sys_exit: NR 3 = 312
XXXX: sys_read(fd: 3, buf: 8060ae0, count: 7000)
XXXX: sys_enter: NR 3 (3, 8060ae0, 7000, a, 1000, bfce8ef8)
XXXX: sys_read -> 0x138
XXXX: sys_exit: NR 3 = 312

There are 2 drawbacks here.
A) two almost identical records are saved in ringbuffer
   when a syscall enters or exits. (4 records for every syscall)
   This wastes precious space in the ring buffer.
B) the lines including "sys_enter/sys_exit" produces
   hardly any useful information for the output (no labels).

The user can use this method to prevent these drawbacks:
echo 1 > events/syscalls/enable
echo 0 > events/syscalls/sys_enter/enable
echo 0 > events/syscalls/sys_exit/enable

But this is not user friendly. So we separate raw syscall
from syscall tracer.

After this fix applied:
syscall tracer's output (echo 1 > events/syscalls/enable):

XXXX: sys_read(fd: 3, buf: bfe87d88, count: 200)
XXXX: sys_read -> 0x200
XXXX: sys_fstat64(fd: 3, statbuf: bfe87c98)
XXXX: sys_fstat64 -> 0x0
XXXX: sys_close(fd: 3)

raw syscall tracer's output (echo 1 > events/raw_syscalls/enable):

XXXX: sys_enter: NR 175 (0, bf92bf18, bf92bf98, 8, b748cff4, bf92bef8)
XXXX: sys_exit: NR 175 = 0
XXXX: sys_enter: NR 175 (2, bf92bf98, 0, 8, b748cff4, bf92bef8)
XXXX: sys_exit: NR 175 = 0
XXXX: sys_enter: NR 3 (9, bf927f9c, 4000, b77e2518, b77dce60, bf92bff8)

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
LKML-Reference: <4AEFC37C.5080609@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/events/syscalls.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h
index 397dff2dbd5a..fb726ac7caee 100644
--- a/include/trace/events/syscalls.h
+++ b/include/trace/events/syscalls.h
@@ -1,5 +1,6 @@
 #undef TRACE_SYSTEM
-#define TRACE_SYSTEM syscalls
+#define TRACE_SYSTEM raw_syscalls
+#define TRACE_INCLUDE_FILE syscalls
 
 #if !defined(_TRACE_EVENTS_SYSCALLS_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_EVENTS_SYSCALLS_H
-- 
cgit v1.2.3-70-g09d2


From 091ad3658e3c76c5fb05f65bfb64a0246f8f31b5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 26 Nov 2009 09:04:55 +0100
Subject: events: Rename TRACE_EVENT_TEMPLATE() to DECLARE_EVENT_CLASS()

It is not quite obvious at first sight what TRACE_EVENT_TEMPLATE
does: does it define an event as well beyond defining a template?

To clarify this, rename it to DECLARE_EVENT_CLASS, which follows
the various 'DECLARE_*()' idioms we already have in the kernel:

  DECLARE_EVENT_CLASS(class)

    DEFINE_EVENT(class, event1)
    DEFINE_EVENT(class, event2)
    DEFINE_EVENT(class, event3)

To complete this logic we should also rename TRACE_EVENT() to:

  DEFINE_SINGLE_EVENT(single_event)

... but in a more quiet moment of the kernel cycle.

Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4B0E286A.2000405@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/tracepoint.h   |  2 +-
 include/trace/define_trace.h |  2 +-
 include/trace/events/sched.h |  6 +++---
 include/trace/ftrace.h       | 46 ++++++++++++++++++++++----------------------
 4 files changed, 28 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 7063383cca13..f59604ed0ec6 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -280,7 +280,7 @@ static inline void tracepoint_synchronize_unregister(void)
  * TRACE_EVENT_FN to perform any (un)registration work.
  */
 
-#define TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print)
+#define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print)
 #define DEFINE_EVENT(template, name, proto, args)		\
 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
 #define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index 5d7d855ae21e..5acfb1eb4df9 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -71,7 +71,7 @@
 
 #undef TRACE_EVENT
 #undef TRACE_EVENT_FN
-#undef TRACE_EVENT_TEMPLATE
+#undef DECLARE_EVENT_CLASS
 #undef DEFINE_EVENT
 #undef DEFINE_EVENT_PRINT
 #undef TRACE_HEADER_MULTI_READ
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 238f74b58486..5ce795021851 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -83,7 +83,7 @@ TRACE_EVENT(sched_wait_task,
  * (NOTE: the 'rq' argument is not used by generic trace events,
  *        but used by the latency tracer plugin. )
  */
-TRACE_EVENT_TEMPLATE(sched_wakeup_template,
+DECLARE_EVENT_CLASS(sched_wakeup_template,
 
 	TP_PROTO(struct rq *rq, struct task_struct *p, int success),
 
@@ -197,7 +197,7 @@ TRACE_EVENT(sched_migrate_task,
 		  __entry->orig_cpu, __entry->dest_cpu)
 );
 
-TRACE_EVENT_TEMPLATE(sched_process_template,
+DECLARE_EVENT_CLASS(sched_process_template,
 
 	TP_PROTO(struct task_struct *p),
 
@@ -316,7 +316,7 @@ TRACE_EVENT(sched_signal_send,
  * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
  *     adding sched_stat support to SCHED_FIFO/RR would be welcome.
  */
-TRACE_EVENT_TEMPLATE(sched_stat_template,
+DECLARE_EVENT_CLASS(sched_stat_template,
 
 	TP_PROTO(struct task_struct *tsk, u64 delay),
 
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index b0461772bc8d..2c9c073e45ad 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -19,17 +19,17 @@
 #include <linux/ftrace_event.h>
 
 /*
- * TRACE_EVENT_TEMPLATE can be used to add a generic function
+ * DECLARE_EVENT_CLASS can be used to add a generic function
  * handlers for events. That is, if all events have the same
  * parameters and just have distinct trace points.
  * Each tracepoint can be defined with DEFINE_EVENT and that
- * will map the TRACE_EVENT_TEMPLATE to the tracepoint.
+ * will map the DECLARE_EVENT_CLASS to the tracepoint.
  *
  * TRACE_EVENT is a one to one mapping between tracepoint and template.
  */
 #undef TRACE_EVENT
 #define TRACE_EVENT(name, proto, args, tstruct, assign, print) \
-	TRACE_EVENT_TEMPLATE(name,			       \
+	DECLARE_EVENT_CLASS(name,			       \
 			     PARAMS(proto),		       \
 			     PARAMS(args),		       \
 			     PARAMS(tstruct),		       \
@@ -56,8 +56,8 @@
 #undef TP_STRUCT__entry
 #define TP_STRUCT__entry(args...) args
 
-#undef TRACE_EVENT_TEMPLATE
-#define TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print)	\
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print)	\
 	struct ftrace_raw_##name {					\
 		struct trace_entry	ent;				\
 		tstruct							\
@@ -115,8 +115,8 @@
 #undef __string
 #define __string(item, src) __dynamic_array(char, item, -1)
 
-#undef TRACE_EVENT_TEMPLATE
-#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print)	\
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
 	struct ftrace_data_offsets_##call {				\
 		tstruct;						\
 	};
@@ -203,8 +203,8 @@
 #undef TP_perf_assign
 #define TP_perf_assign(args...)
 
-#undef TRACE_EVENT_TEMPLATE
-#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, func, print)	\
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print)	\
 static int								\
 ftrace_format_setup_##call(struct ftrace_event_call *unused,		\
 			   struct trace_seq *s)				\
@@ -321,8 +321,8 @@ ftrace_format_##name(struct ftrace_event_call *unused,			\
 		ftrace_print_symbols_seq(p, value, symbols);		\
 	})
 
-#undef TRACE_EVENT_TEMPLATE
-#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print)	\
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
 static enum print_line_t						\
 ftrace_raw_output_id_##call(int event_id, const char *name,		\
 			    struct trace_iterator *iter, int flags)	\
@@ -428,8 +428,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 #undef __string
 #define __string(item, src) __dynamic_array(char, item, -1)
 
-#undef TRACE_EVENT_TEMPLATE
-#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, func, print)	\
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print)	\
 static int								\
 ftrace_define_fields_##call(struct ftrace_event_call *event_call)	\
 {									\
@@ -480,8 +480,8 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call)	\
 #undef __string
 #define __string(item, src) __dynamic_array(char, item, strlen(src) + 1)
 
-#undef TRACE_EVENT_TEMPLATE
-#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print)	\
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
 static inline int ftrace_get_offsets_##call(				\
 	struct ftrace_data_offsets_##call *__data_offsets, proto)       \
 {									\
@@ -521,8 +521,8 @@ static inline int ftrace_get_offsets_##call(				\
  *
  */
 
-#undef TRACE_EVENT_TEMPLATE
-#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print)
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)
 
 #undef DEFINE_EVENT
 #define DEFINE_EVENT(template, name, proto, args)			\
@@ -681,8 +681,8 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\
 #define __assign_str(dst, src)						\
 	strcpy(__get_str(dst), src);
 
-#undef TRACE_EVENT_TEMPLATE
-#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print)	\
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
 									\
 static void ftrace_raw_event_id_##call(struct ftrace_event_call *event_call, \
 				       proto)				\
@@ -764,8 +764,8 @@ static int ftrace_raw_init_event_##call(struct ftrace_event_call *unused)\
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
-#undef TRACE_EVENT_TEMPLATE
-#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print)
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)
 
 #undef DEFINE_EVENT
 #define DEFINE_EVENT(template, call, proto, args)			\
@@ -885,8 +885,8 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 #undef __perf_count
 #define __perf_count(c) __count = (c)
 
-#undef TRACE_EVENT_TEMPLATE
-#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print)	\
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
 static void								\
 ftrace_profile_templ_##call(struct ftrace_event_call *event_call,	\
 			    proto)					\
-- 
cgit v1.2.3-70-g09d2


From 925684d6d589e40e41007edf47c69e729d911263 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Thu, 26 Nov 2009 15:03:23 +0800
Subject: tracing: Convert module refcnt events to DEFINE_EVENT

Use DECLARE_EVENT_CLASS to remove duplicate code:

   text    data     bss     dec     hex filename
  29854    1980     128   31962    7cda kernel/module.o.old
  28750    1980     128   30858    788a kernel/module.o

Two events are converted:

  module_refcnt: module_get, module_put

No change in functionality.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4B0E283B.3010508@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/events/module.h | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/module.h b/include/trace/events/module.h
index 84160fb18478..4b0f48ba16a6 100644
--- a/include/trace/events/module.h
+++ b/include/trace/events/module.h
@@ -51,7 +51,7 @@ TRACE_EVENT(module_free,
 	TP_printk("%s", __get_str(name))
 );
 
-TRACE_EVENT(module_get,
+DECLARE_EVENT_CLASS(module_refcnt,
 
 	TP_PROTO(struct module *mod, unsigned long ip, int refcnt),
 
@@ -73,26 +73,18 @@ TRACE_EVENT(module_get,
 		  __get_str(name), (void *)__entry->ip, __entry->refcnt)
 );
 
-TRACE_EVENT(module_put,
+DEFINE_EVENT(module_refcnt, module_get,
 
 	TP_PROTO(struct module *mod, unsigned long ip, int refcnt),
 
-	TP_ARGS(mod, ip, refcnt),
+	TP_ARGS(mod, ip, refcnt)
+);
 
-	TP_STRUCT__entry(
-		__field(	unsigned long,	ip		)
-		__field(	int,		refcnt		)
-		__string(	name,		mod->name	)
-	),
+DEFINE_EVENT(module_refcnt, module_put,
 
-	TP_fast_assign(
-		__entry->ip	= ip;
-		__entry->refcnt	= refcnt;
-		__assign_str(name, mod->name);
-	),
+	TP_PROTO(struct module *mod, unsigned long ip, int refcnt),
 
-	TP_printk("%s call_site=%pf refcnt=%d",
-		  __get_str(name), (void *)__entry->ip, __entry->refcnt)
+	TP_ARGS(mod, ip, refcnt)
 );
 
 TRACE_EVENT(module_request,
-- 
cgit v1.2.3-70-g09d2


From 53d0422c2d10808fddb2c30859193bfea164c7e3 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Thu, 26 Nov 2009 15:04:10 +0800
Subject: tracing: Convert some kmem events to DEFINE_EVENT

Use DECLARE_EVENT_CLASS to remove duplicate code:

   text    data     bss     dec     hex filename
 333987   69800   27228  431015   693a7 mm/built-in.o.old
 330030   69800   27228  427058   68432 mm/built-in.o

8 events are converted:

  kmem_alloc: kmalloc, kmem_cache_alloc
  kmem_alloc_node: kmalloc_node, kmem_cache_alloc_node
  kmem_free: kfree, kmem_cache_free
  mm_page: mm_page_alloc_zone_locked, mm_page_pcpu_drain

No change in functionality.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mel Gorman <mel@csn.ul.ie>
LKML-Reference: <4B0E286A.2000405@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/events/kmem.h | 130 ++++++++++++++------------------------------
 mm/page_alloc.c             |   4 +-
 mm/util.c                   |   3 -
 3 files changed, 43 insertions(+), 94 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index eaf46bdd18a5..3adca0ca9dbe 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -44,7 +44,7 @@
 	{(unsigned long)__GFP_MOVABLE,		"GFP_MOVABLE"}		\
 	) : "GFP_NOWAIT"
 
-TRACE_EVENT(kmalloc,
+DECLARE_EVENT_CLASS(kmem_alloc,
 
 	TP_PROTO(unsigned long call_site,
 		 const void *ptr,
@@ -78,41 +78,23 @@ TRACE_EVENT(kmalloc,
 		show_gfp_flags(__entry->gfp_flags))
 );
 
-TRACE_EVENT(kmem_cache_alloc,
+DEFINE_EVENT(kmem_alloc, kmalloc,
 
-	TP_PROTO(unsigned long call_site,
-		 const void *ptr,
-		 size_t bytes_req,
-		 size_t bytes_alloc,
-		 gfp_t gfp_flags),
+	TP_PROTO(unsigned long call_site, const void *ptr,
+		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
+);
 
-	TP_STRUCT__entry(
-		__field(	unsigned long,	call_site	)
-		__field(	const void *,	ptr		)
-		__field(	size_t,		bytes_req	)
-		__field(	size_t,		bytes_alloc	)
-		__field(	gfp_t,		gfp_flags	)
-	),
+DEFINE_EVENT(kmem_alloc, kmem_cache_alloc,
 
-	TP_fast_assign(
-		__entry->call_site	= call_site;
-		__entry->ptr		= ptr;
-		__entry->bytes_req	= bytes_req;
-		__entry->bytes_alloc	= bytes_alloc;
-		__entry->gfp_flags	= gfp_flags;
-	),
+	TP_PROTO(unsigned long call_site, const void *ptr,
+		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
 
-	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
-		__entry->call_site,
-		__entry->ptr,
-		__entry->bytes_req,
-		__entry->bytes_alloc,
-		show_gfp_flags(__entry->gfp_flags))
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
 );
 
-TRACE_EVENT(kmalloc_node,
+DECLARE_EVENT_CLASS(kmem_alloc_node,
 
 	TP_PROTO(unsigned long call_site,
 		 const void *ptr,
@@ -150,45 +132,25 @@ TRACE_EVENT(kmalloc_node,
 		__entry->node)
 );
 
-TRACE_EVENT(kmem_cache_alloc_node,
+DEFINE_EVENT(kmem_alloc_node, kmalloc_node,
 
-	TP_PROTO(unsigned long call_site,
-		 const void *ptr,
-		 size_t bytes_req,
-		 size_t bytes_alloc,
-		 gfp_t gfp_flags,
-		 int node),
+	TP_PROTO(unsigned long call_site, const void *ptr,
+		 size_t bytes_req, size_t bytes_alloc,
+		 gfp_t gfp_flags, int node),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
+);
 
-	TP_STRUCT__entry(
-		__field(	unsigned long,	call_site	)
-		__field(	const void *,	ptr		)
-		__field(	size_t,		bytes_req	)
-		__field(	size_t,		bytes_alloc	)
-		__field(	gfp_t,		gfp_flags	)
-		__field(	int,		node		)
-	),
+DEFINE_EVENT(kmem_alloc_node, kmem_cache_alloc_node,
 
-	TP_fast_assign(
-		__entry->call_site	= call_site;
-		__entry->ptr		= ptr;
-		__entry->bytes_req	= bytes_req;
-		__entry->bytes_alloc	= bytes_alloc;
-		__entry->gfp_flags	= gfp_flags;
-		__entry->node		= node;
-	),
+	TP_PROTO(unsigned long call_site, const void *ptr,
+		 size_t bytes_req, size_t bytes_alloc,
+		 gfp_t gfp_flags, int node),
 
-	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d",
-		__entry->call_site,
-		__entry->ptr,
-		__entry->bytes_req,
-		__entry->bytes_alloc,
-		show_gfp_flags(__entry->gfp_flags),
-		__entry->node)
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
 );
 
-TRACE_EVENT(kfree,
+DECLARE_EVENT_CLASS(kmem_free,
 
 	TP_PROTO(unsigned long call_site, const void *ptr),
 
@@ -207,23 +169,18 @@ TRACE_EVENT(kfree,
 	TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
 );
 
-TRACE_EVENT(kmem_cache_free,
+DEFINE_EVENT(kmem_free, kfree,
 
 	TP_PROTO(unsigned long call_site, const void *ptr),
 
-	TP_ARGS(call_site, ptr),
+	TP_ARGS(call_site, ptr)
+);
 
-	TP_STRUCT__entry(
-		__field(	unsigned long,	call_site	)
-		__field(	const void *,	ptr		)
-	),
+DEFINE_EVENT(kmem_free, kmem_cache_free,
 
-	TP_fast_assign(
-		__entry->call_site	= call_site;
-		__entry->ptr		= ptr;
-	),
+	TP_PROTO(unsigned long call_site, const void *ptr),
 
-	TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
+	TP_ARGS(call_site, ptr)
 );
 
 TRACE_EVENT(mm_page_free_direct,
@@ -299,7 +256,7 @@ TRACE_EVENT(mm_page_alloc,
 		show_gfp_flags(__entry->gfp_flags))
 );
 
-TRACE_EVENT(mm_page_alloc_zone_locked,
+DECLARE_EVENT_CLASS(mm_page,
 
 	TP_PROTO(struct page *page, unsigned int order, int migratetype),
 
@@ -325,29 +282,22 @@ TRACE_EVENT(mm_page_alloc_zone_locked,
 		__entry->order == 0)
 );
 
-TRACE_EVENT(mm_page_pcpu_drain,
+DEFINE_EVENT(mm_page, mm_page_alloc_zone_locked,
 
-	TP_PROTO(struct page *page, int order, int migratetype),
+	TP_PROTO(struct page *page, unsigned int order, int migratetype),
 
-	TP_ARGS(page, order, migratetype),
+	TP_ARGS(page, order, migratetype)
+);
 
-	TP_STRUCT__entry(
-		__field(	struct page *,	page		)
-		__field(	int,		order		)
-		__field(	int,		migratetype	)
-	),
+DEFINE_EVENT_PRINT(mm_page, mm_page_pcpu_drain,
 
-	TP_fast_assign(
-		__entry->page		= page;
-		__entry->order		= order;
-		__entry->migratetype	= migratetype;
-	),
+	TP_PROTO(struct page *page, unsigned int order, int migratetype),
+
+	TP_ARGS(page, order, migratetype),
 
 	TP_printk("page=%p pfn=%lu order=%d migratetype=%d",
-		__entry->page,
-		page_to_pfn(__entry->page),
-		__entry->order,
-		__entry->migratetype)
+		__entry->page, page_to_pfn(__entry->page),
+		__entry->order, __entry->migratetype)
 );
 
 TRACE_EVENT(mm_page_alloc_extfrag,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2bc2ac63f41e..bdb22f55d006 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -48,12 +48,14 @@
 #include <linux/page_cgroup.h>
 #include <linux/debugobjects.h>
 #include <linux/kmemleak.h>
-#include <trace/events/kmem.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
 #include "internal.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/kmem.h>
+
 /*
  * Array of node states.
  */
diff --git a/mm/util.c b/mm/util.c
index 7c35ad95f927..15d197571b4d 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -6,9 +6,6 @@
 #include <linux/sched.h>
 #include <asm/uaccess.h>
 
-#define CREATE_TRACE_POINTS
-#include <trace/events/kmem.h>
-
 /**
  * kstrdup - allocate space for and copy an existing string
  * @s: the string to duplicate
-- 
cgit v1.2.3-70-g09d2


From c467307c1a812c3150b27a68c2b2d3397bb40a4f Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Thu, 26 Nov 2009 15:04:31 +0800
Subject: tracing: Convert softirq events to DEFINE_EVENT

Use DECLARE_EVENT_CLASS to remove duplicate code:

   text    data     bss     dec     hex filename
  12781     952      36   13769    35c9 kernel/softirq.o.old
  11981     952      32   12965    32a5 kernel/softirq.o

Two events are converted:

  softirq: softirq_entry, softirq_exit

No change in functionality.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4B0E287F.4030708@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/events/irq.h | 46 +++++++++++++++++++++-------------------------
 1 file changed, 21 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index dcfcd4407623..0e4cfb694fe7 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -82,18 +82,7 @@ TRACE_EVENT(irq_handler_exit,
 		  __entry->irq, __entry->ret ? "handled" : "unhandled")
 );
 
-/**
- * softirq_entry - called immediately before the softirq handler
- * @h: pointer to struct softirq_action
- * @vec: pointer to first struct softirq_action in softirq_vec array
- *
- * The @h parameter, contains a pointer to the struct softirq_action
- * which has a pointer to the action handler that is called. By subtracting
- * the @vec pointer from the @h pointer, we can determine the softirq
- * number. Also, when used in combination with the softirq_exit tracepoint
- * we can determine the softirq latency.
- */
-TRACE_EVENT(softirq_entry,
+DECLARE_EVENT_CLASS(softirq,
 
 	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
 
@@ -111,6 +100,24 @@ TRACE_EVENT(softirq_entry,
 		  show_softirq_name(__entry->vec))
 );
 
+/**
+ * softirq_entry - called immediately before the softirq handler
+ * @h: pointer to struct softirq_action
+ * @vec: pointer to first struct softirq_action in softirq_vec array
+ *
+ * The @h parameter, contains a pointer to the struct softirq_action
+ * which has a pointer to the action handler that is called. By subtracting
+ * the @vec pointer from the @h pointer, we can determine the softirq
+ * number. Also, when used in combination with the softirq_exit tracepoint
+ * we can determine the softirq latency.
+ */
+DEFINE_EVENT(softirq, softirq_entry,
+
+	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
+
+	TP_ARGS(h, vec)
+);
+
 /**
  * softirq_exit - called immediately after the softirq handler returns
  * @h: pointer to struct softirq_action
@@ -122,22 +129,11 @@ TRACE_EVENT(softirq_entry,
  * combination with the softirq_entry tracepoint we can determine the softirq
  * latency.
  */
-TRACE_EVENT(softirq_exit,
+DEFINE_EVENT(softirq, softirq_exit,
 
 	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
 
-	TP_ARGS(h, vec),
-
-	TP_STRUCT__entry(
-		__field(	int,	vec			)
-	),
-
-	TP_fast_assign(
-		__entry->vec = (int)(h - vec);
-	),
-
-	TP_printk("vec=%d [action=%s]", __entry->vec,
-		  show_softirq_name(__entry->vec))
+	TP_ARGS(h, vec)
 );
 
 #endif /*  _TRACE_IRQ_H */
-- 
cgit v1.2.3-70-g09d2


From 382ece710bf88b08440b598731361e5a47582b62 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Thu, 26 Nov 2009 15:05:03 +0800
Subject: tracing: Convert some workqueue events to DEFINE_EVENT

Use DECLARE_EVENT_CLASS to remove duplicate code:

   text    data     bss     dec     hex filename
  13171     800      72   14043    36db kernel/workqueue.o.old
  12243     800      68   13111    3337 kernel/workqueue.o

Two events are converted:

  workqueue: workqueue_insertion, workqueue_execution

No change in functionality.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4B0E289F.5010104@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/events/workqueue.h | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h
index e4612dbd7ba6..d6c974474e70 100644
--- a/include/trace/events/workqueue.h
+++ b/include/trace/events/workqueue.h
@@ -8,7 +8,7 @@
 #include <linux/sched.h>
 #include <linux/tracepoint.h>
 
-TRACE_EVENT(workqueue_insertion,
+DECLARE_EVENT_CLASS(workqueue,
 
 	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
 
@@ -30,26 +30,18 @@ TRACE_EVENT(workqueue_insertion,
 		__entry->thread_pid, __entry->func)
 );
 
-TRACE_EVENT(workqueue_execution,
+DEFINE_EVENT(workqueue, workqueue_insertion,
 
 	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
 
-	TP_ARGS(wq_thread, work),
+	TP_ARGS(wq_thread, work)
+);
 
-	TP_STRUCT__entry(
-		__array(char,		thread_comm,	TASK_COMM_LEN)
-		__field(pid_t,		thread_pid)
-		__field(work_func_t,	func)
-	),
+DEFINE_EVENT(workqueue, workqueue_execution,
 
-	TP_fast_assign(
-		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
-		__entry->thread_pid	= wq_thread->pid;
-		__entry->func		= work->func;
-	),
+	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
 
-	TP_printk("thread=%s:%d func=%pf", __entry->thread_comm,
-		__entry->thread_pid, __entry->func)
+	TP_ARGS(wq_thread, work)
 );
 
 /* Trace the creation of one workqueue thread on a cpu */
-- 
cgit v1.2.3-70-g09d2


From 7703466b4c0a21b88d701882bef0d45bcb0a0281 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Thu, 26 Nov 2009 15:05:38 +0800
Subject: tracing: Convert some power events to DEFINE_EVENT

Use DECLARE_EVENT_CLASS to remove duplicate code:

   text    data     bss     dec     hex filename
   4312     524      12    4848    12f0 kernel/trace/power-traces.o.old
   3455     524       8    3987     f93 kernel/trace/power-traces.o

Two events are converted:

  power: power_start, power_frequency

No change in functionality.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Arjan van de Ven <arjan@infradead.org>
LKML-Reference: <4B0E28C2.1090906@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/events/power.h | 36 +++++++++++++++---------------------
 1 file changed, 15 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/power.h b/include/trace/events/power.h
index 9bb96e5a2848..c4efe9b8280d 100644
--- a/include/trace/events/power.h
+++ b/include/trace/events/power.h
@@ -16,7 +16,7 @@ enum {
 };
 #endif
 
-TRACE_EVENT(power_start,
+DECLARE_EVENT_CLASS(power,
 
 	TP_PROTO(unsigned int type, unsigned int state),
 
@@ -35,42 +35,36 @@ TRACE_EVENT(power_start,
 	TP_printk("type=%lu state=%lu", (unsigned long)__entry->type, (unsigned long)__entry->state)
 );
 
-TRACE_EVENT(power_end,
-
-	TP_PROTO(int dummy),
+DEFINE_EVENT(power, power_start,
 
-	TP_ARGS(dummy),
+	TP_PROTO(unsigned int type, unsigned int state),
 
-	TP_STRUCT__entry(
-		__field(	u64,		dummy		)
-	),
+	TP_ARGS(type, state)
+);
 
-	TP_fast_assign(
-		__entry->dummy = 0xffff;
-	),
+DEFINE_EVENT(power, power_frequency,
 
-	TP_printk("dummy=%lu", (unsigned long)__entry->dummy)
+	TP_PROTO(unsigned int type, unsigned int state),
 
+	TP_ARGS(type, state)
 );
 
+TRACE_EVENT(power_end,
 
-TRACE_EVENT(power_frequency,
-
-	TP_PROTO(unsigned int type, unsigned int state),
+	TP_PROTO(int dummy),
 
-	TP_ARGS(type, state),
+	TP_ARGS(dummy),
 
 	TP_STRUCT__entry(
-		__field(	u64,		type		)
-		__field(	u64,		state		)
+		__field(	u64,		dummy		)
 	),
 
 	TP_fast_assign(
-		__entry->type = type;
-		__entry->state = state;
+		__entry->dummy = 0xffff;
 	),
 
-	TP_printk("type=%lu state=%lu", (unsigned long)__entry->type, (unsigned long) __entry->state)
+	TP_printk("dummy=%lu", (unsigned long)__entry->dummy)
+
 );
 
 #endif /* _TRACE_POWER_H */
-- 
cgit v1.2.3-70-g09d2


From 77ca1e0294f25fc26053ba14353e703158acef26 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Thu, 26 Nov 2009 15:06:14 +0800
Subject: tracing: Convert some block events to DEFINE_EVENT

use DECLARE_EVENT_CLASS to remove duplicate code:

   text    data     bss     dec     hex filename
  53570    3284     184   57038    dece block/blk-core.o.old
  43702    3284     144   47130    b81a block/blk-core.o

12 events are converted:

  block_rq: block_rq_insert, block_rq_issue
  block_rq_with_error: block_rq_{abort, requeue, complete}
  block_bio: block_bio_{backmerge, frontmerge, queue}
  block_get_rq: block_getrq, block_sleeprq
  block_unplug: block_unplug_timer, block_unplug_io

No change in functionality.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4B0E28E6.7060609@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/events/block.h | 202 +++++++++----------------------------------
 1 file changed, 42 insertions(+), 160 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index 00405b5f624a..5fb72733331e 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -8,7 +8,7 @@
 #include <linux/blkdev.h>
 #include <linux/tracepoint.h>
 
-TRACE_EVENT(block_rq_abort,
+DECLARE_EVENT_CLASS(block_rq_with_error,
 
 	TP_PROTO(struct request_queue *q, struct request *rq),
 
@@ -40,41 +40,28 @@ TRACE_EVENT(block_rq_abort,
 		  __entry->nr_sector, __entry->errors)
 );
 
-TRACE_EVENT(block_rq_insert,
+DEFINE_EVENT(block_rq_with_error, block_rq_abort,
 
 	TP_PROTO(struct request_queue *q, struct request *rq),
 
-	TP_ARGS(q, rq),
+	TP_ARGS(q, rq)
+);
 
-	TP_STRUCT__entry(
-		__field(  dev_t,	dev			)
-		__field(  sector_t,	sector			)
-		__field(  unsigned int,	nr_sector		)
-		__field(  unsigned int,	bytes			)
-		__array(  char,		rwbs,	6		)
-		__array(  char,         comm,   TASK_COMM_LEN   )
-		__dynamic_array( char,	cmd,	blk_cmd_buf_len(rq)	)
-	),
+DEFINE_EVENT(block_rq_with_error, block_rq_requeue,
 
-	TP_fast_assign(
-		__entry->dev	   = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
-		__entry->sector    = blk_pc_request(rq) ? 0 : blk_rq_pos(rq);
-		__entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq);
-		__entry->bytes     = blk_pc_request(rq) ? blk_rq_bytes(rq) : 0;
+	TP_PROTO(struct request_queue *q, struct request *rq),
 
-		blk_fill_rwbs_rq(__entry->rwbs, rq);
-		blk_dump_cmd(__get_str(cmd), rq);
-		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
-	),
+	TP_ARGS(q, rq)
+);
 
-	TP_printk("%d,%d %s %u (%s) %llu + %u [%s]",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->rwbs, __entry->bytes, __get_str(cmd),
-		  (unsigned long long)__entry->sector,
-		  __entry->nr_sector, __entry->comm)
+DEFINE_EVENT(block_rq_with_error, block_rq_complete,
+
+	TP_PROTO(struct request_queue *q, struct request *rq),
+
+	TP_ARGS(q, rq)
 );
 
-TRACE_EVENT(block_rq_issue,
+DECLARE_EVENT_CLASS(block_rq,
 
 	TP_PROTO(struct request_queue *q, struct request *rq),
 
@@ -86,7 +73,7 @@ TRACE_EVENT(block_rq_issue,
 		__field(  unsigned int,	nr_sector		)
 		__field(  unsigned int,	bytes			)
 		__array(  char,		rwbs,	6		)
-		__array(  char,		comm,   TASK_COMM_LEN   )
+		__array(  char,         comm,   TASK_COMM_LEN   )
 		__dynamic_array( char,	cmd,	blk_cmd_buf_len(rq)	)
 	),
 
@@ -108,68 +95,18 @@ TRACE_EVENT(block_rq_issue,
 		  __entry->nr_sector, __entry->comm)
 );
 
-TRACE_EVENT(block_rq_requeue,
+DEFINE_EVENT(block_rq, block_rq_insert,
 
 	TP_PROTO(struct request_queue *q, struct request *rq),
 
-	TP_ARGS(q, rq),
-
-	TP_STRUCT__entry(
-		__field(  dev_t,	dev			)
-		__field(  sector_t,	sector			)
-		__field(  unsigned int,	nr_sector		)
-		__field(  int,		errors			)
-		__array(  char,		rwbs,	6		)
-		__dynamic_array( char,	cmd,	blk_cmd_buf_len(rq)	)
-	),
-
-	TP_fast_assign(
-		__entry->dev	   = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
-		__entry->sector    = blk_pc_request(rq) ? 0 : blk_rq_pos(rq);
-		__entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq);
-		__entry->errors	   = rq->errors;
-
-		blk_fill_rwbs_rq(__entry->rwbs, rq);
-		blk_dump_cmd(__get_str(cmd), rq);
-	),
-
-	TP_printk("%d,%d %s (%s) %llu + %u [%d]",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->rwbs, __get_str(cmd),
-		  (unsigned long long)__entry->sector,
-		  __entry->nr_sector, __entry->errors)
+	TP_ARGS(q, rq)
 );
 
-TRACE_EVENT(block_rq_complete,
+DEFINE_EVENT(block_rq, block_rq_issue,
 
 	TP_PROTO(struct request_queue *q, struct request *rq),
 
-	TP_ARGS(q, rq),
-
-	TP_STRUCT__entry(
-		__field(  dev_t,	dev			)
-		__field(  sector_t,	sector			)
-		__field(  unsigned int,	nr_sector		)
-		__field(  int,		errors			)
-		__array(  char,		rwbs,	6		)
-		__dynamic_array( char,	cmd,	blk_cmd_buf_len(rq)	)
-	),
-
-	TP_fast_assign(
-		__entry->dev	   = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
-		__entry->sector    = blk_pc_request(rq) ? 0 : blk_rq_pos(rq);
-		__entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq);
-		__entry->errors    = rq->errors;
-
-		blk_fill_rwbs_rq(__entry->rwbs, rq);
-		blk_dump_cmd(__get_str(cmd), rq);
-	),
-
-	TP_printk("%d,%d %s (%s) %llu + %u [%d]",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->rwbs, __get_str(cmd),
-		  (unsigned long long)__entry->sector,
-		  __entry->nr_sector, __entry->errors)
+	TP_ARGS(q, rq)
 );
 
 TRACE_EVENT(block_bio_bounce,
@@ -228,7 +165,7 @@ TRACE_EVENT(block_bio_complete,
 		  __entry->nr_sector, __entry->error)
 );
 
-TRACE_EVENT(block_bio_backmerge,
+DECLARE_EVENT_CLASS(block_bio,
 
 	TP_PROTO(struct request_queue *q, struct bio *bio),
 
@@ -256,63 +193,28 @@ TRACE_EVENT(block_bio_backmerge,
 		  __entry->nr_sector, __entry->comm)
 );
 
-TRACE_EVENT(block_bio_frontmerge,
+DEFINE_EVENT(block_bio, block_bio_backmerge,
 
 	TP_PROTO(struct request_queue *q, struct bio *bio),
 
-	TP_ARGS(q, bio),
-
-	TP_STRUCT__entry(
-		__field( dev_t,		dev			)
-		__field( sector_t,	sector			)
-		__field( unsigned,	nr_sector		)
-		__array( char,		rwbs,	6		)
-		__array( char,		comm,	TASK_COMM_LEN	)
-	),
-
-	TP_fast_assign(
-		__entry->dev		= bio->bi_bdev->bd_dev;
-		__entry->sector		= bio->bi_sector;
-		__entry->nr_sector	= bio->bi_size >> 9;
-		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
-		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
-	),
-
-	TP_printk("%d,%d %s %llu + %u [%s]",
-		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
-		  (unsigned long long)__entry->sector,
-		  __entry->nr_sector, __entry->comm)
+	TP_ARGS(q, bio)
 );
 
-TRACE_EVENT(block_bio_queue,
+DEFINE_EVENT(block_bio, block_bio_frontmerge,
 
 	TP_PROTO(struct request_queue *q, struct bio *bio),
 
-	TP_ARGS(q, bio),
+	TP_ARGS(q, bio)
+);
 
-	TP_STRUCT__entry(
-		__field( dev_t,		dev			)
-		__field( sector_t,	sector			)
-		__field( unsigned int,	nr_sector		)
-		__array( char,		rwbs,	6		)
-		__array( char,		comm,	TASK_COMM_LEN	)
-	),
+DEFINE_EVENT(block_bio, block_bio_queue,
 
-	TP_fast_assign(
-		__entry->dev		= bio->bi_bdev->bd_dev;
-		__entry->sector		= bio->bi_sector;
-		__entry->nr_sector	= bio->bi_size >> 9;
-		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
-		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
-	),
+	TP_PROTO(struct request_queue *q, struct bio *bio),
 
-	TP_printk("%d,%d %s %llu + %u [%s]",
-		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
-		  (unsigned long long)__entry->sector,
-		  __entry->nr_sector, __entry->comm)
+	TP_ARGS(q, bio)
 );
 
-TRACE_EVENT(block_getrq,
+DECLARE_EVENT_CLASS(block_get_rq,
 
 	TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
 
@@ -341,33 +243,18 @@ TRACE_EVENT(block_getrq,
 		  __entry->nr_sector, __entry->comm)
 );
 
-TRACE_EVENT(block_sleeprq,
+DEFINE_EVENT(block_get_rq, block_getrq,
 
 	TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
 
-	TP_ARGS(q, bio, rw),
+	TP_ARGS(q, bio, rw)
+);
 
-	TP_STRUCT__entry(
-		__field( dev_t,		dev			)
-		__field( sector_t,	sector			)
-		__field( unsigned int,	nr_sector		)
-		__array( char,		rwbs,	6		)
-		__array( char,		comm,	TASK_COMM_LEN	)
-	),
+DEFINE_EVENT(block_get_rq, block_sleeprq,
 
-	TP_fast_assign(
-		__entry->dev		= bio ? bio->bi_bdev->bd_dev : 0;
-		__entry->sector		= bio ? bio->bi_sector : 0;
-		__entry->nr_sector	= bio ? bio->bi_size >> 9 : 0;
-		blk_fill_rwbs(__entry->rwbs,
-			    bio ? bio->bi_rw : 0, __entry->nr_sector);
-		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
-	),
+	TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
 
-	TP_printk("%d,%d %s %llu + %u [%s]",
-		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
-		  (unsigned long long)__entry->sector,
-		  __entry->nr_sector, __entry->comm)
+	TP_ARGS(q, bio, rw)
 );
 
 TRACE_EVENT(block_plug,
@@ -387,7 +274,7 @@ TRACE_EVENT(block_plug,
 	TP_printk("[%s]", __entry->comm)
 );
 
-TRACE_EVENT(block_unplug_timer,
+DECLARE_EVENT_CLASS(block_unplug,
 
 	TP_PROTO(struct request_queue *q),
 
@@ -406,23 +293,18 @@ TRACE_EVENT(block_unplug_timer,
 	TP_printk("[%s] %d", __entry->comm, __entry->nr_rq)
 );
 
-TRACE_EVENT(block_unplug_io,
+DEFINE_EVENT(block_unplug, block_unplug_timer,
 
 	TP_PROTO(struct request_queue *q),
 
-	TP_ARGS(q),
+	TP_ARGS(q)
+);
 
-	TP_STRUCT__entry(
-		__field( int,		nr_rq			)
-		__array( char,		comm,	TASK_COMM_LEN	)
-	),
+DEFINE_EVENT(block_unplug, block_unplug_io,
 
-	TP_fast_assign(
-		__entry->nr_rq	= q->rq.count[READ] + q->rq.count[WRITE];
-		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
-	),
+	TP_PROTO(struct request_queue *q),
 
-	TP_printk("[%s] %d", __entry->comm, __entry->nr_rq)
+	TP_ARGS(q)
 );
 
 TRACE_EVENT(block_split,
-- 
cgit v1.2.3-70-g09d2


From 071688f36e7eba3e37b2fc48e35bfdab99b80b4d Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Thu, 26 Nov 2009 15:06:55 +0800
Subject: tracing: Convert some jbd2 events to DEFINE_EVENT

Use DECLARE_EVENT_CLASS to remove duplicate code:

   text    data     bss     dec     hex filename
  34903    1693     448   37044    90b4 fs/jbd2/journal.o.old
  31931    1693     416   34040    84f8 fs/jbd2/journal.o

Four events are converted:

  jbd2_commit: jbd2_start_commit,
               jbd2_commit_{locking, flushing, logging}

No change in functionality.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4B0E290F.7030909@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/events/jbd2.h | 63 ++++++++-------------------------------------
 1 file changed, 11 insertions(+), 52 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h
index 3c60b75adb9e..96b370a050de 100644
--- a/include/trace/events/jbd2.h
+++ b/include/trace/events/jbd2.h
@@ -30,7 +30,7 @@ TRACE_EVENT(jbd2_checkpoint,
 		  jbd2_dev_to_name(__entry->dev), __entry->result)
 );
 
-TRACE_EVENT(jbd2_start_commit,
+DECLARE_EVENT_CLASS(jbd2_commit,
 
 	TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
 
@@ -53,73 +53,32 @@ TRACE_EVENT(jbd2_start_commit,
 		  __entry->sync_commit)
 );
 
-TRACE_EVENT(jbd2_commit_locking,
+DEFINE_EVENT(jbd2_commit, jbd2_start_commit,
 
 	TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
 
-	TP_ARGS(journal, commit_transaction),
-
-	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
-		__field(	char,	sync_commit		  )
-		__field(	int,	transaction		  )
-	),
-
-	TP_fast_assign(
-		__entry->dev		= journal->j_fs_dev->bd_dev;
-		__entry->sync_commit = commit_transaction->t_synchronous_commit;
-		__entry->transaction	= commit_transaction->t_tid;
-	),
-
-	TP_printk("dev %s transaction %d sync %d",
-		  jbd2_dev_to_name(__entry->dev), __entry->transaction,
-		  __entry->sync_commit)
+	TP_ARGS(journal, commit_transaction)
 );
 
-TRACE_EVENT(jbd2_commit_flushing,
+DEFINE_EVENT(jbd2_commit, jbd2_commit_locking,
 
 	TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
 
-	TP_ARGS(journal, commit_transaction),
-
-	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
-		__field(	char,	sync_commit		  )
-		__field(	int,	transaction		  )
-	),
-
-	TP_fast_assign(
-		__entry->dev		= journal->j_fs_dev->bd_dev;
-		__entry->sync_commit = commit_transaction->t_synchronous_commit;
-		__entry->transaction	= commit_transaction->t_tid;
-	),
-
-	TP_printk("dev %s transaction %d sync %d",
-		  jbd2_dev_to_name(__entry->dev), __entry->transaction,
-		  __entry->sync_commit)
+	TP_ARGS(journal, commit_transaction)
 );
 
-TRACE_EVENT(jbd2_commit_logging,
+DEFINE_EVENT(jbd2_commit, jbd2_commit_flushing,
 
 	TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
 
-	TP_ARGS(journal, commit_transaction),
+	TP_ARGS(journal, commit_transaction)
+);
 
-	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
-		__field(	char,	sync_commit		  )
-		__field(	int,	transaction		  )
-	),
+DEFINE_EVENT(jbd2_commit, jbd2_commit_logging,
 
-	TP_fast_assign(
-		__entry->dev		= journal->j_fs_dev->bd_dev;
-		__entry->sync_commit = commit_transaction->t_synchronous_commit;
-		__entry->transaction	= commit_transaction->t_tid;
-	),
+	TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
 
-	TP_printk("dev %s transaction %d sync %d",
-		  jbd2_dev_to_name(__entry->dev), __entry->transaction,
-		  __entry->sync_commit)
+	TP_ARGS(journal, commit_transaction)
 );
 
 TRACE_EVENT(jbd2_end_commit,
-- 
cgit v1.2.3-70-g09d2


From b5eb34c3592545c756e50d882c08417eb60740a7 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Thu, 26 Nov 2009 15:07:36 +0800
Subject: tracing: Convert some ext4 events to DEFINE_TRACE

Use DECLARE_EVENT_CLASS to remove duplicate code:

   text    data     bss     dec     hex filename
 294695    6104     340  301139   49853 fs/ext4/ext4.o.old
 289983    6104     324  296411   485db fs/ext4/ext4.o

5 events are convertd:

  ext4__write_begin: ext4_write_begin, ext4_da_write_begin
  ext4__write_end: ext4_{ordered, writeback, journalled}_write_end

No change in functionality.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4B0E2938.2040708@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/events/ext4.h | 129 ++++++++++++--------------------------------
 1 file changed, 35 insertions(+), 94 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index d09550bf3f95..318f76535bd4 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -90,7 +90,7 @@ TRACE_EVENT(ext4_allocate_inode,
 		  (unsigned long) __entry->dir, __entry->mode)
 );
 
-TRACE_EVENT(ext4_write_begin,
+DECLARE_EVENT_CLASS(ext4__write_begin,
 
 	TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
 		 unsigned int flags),
@@ -118,7 +118,23 @@ TRACE_EVENT(ext4_write_begin,
 		  __entry->pos, __entry->len, __entry->flags)
 );
 
-TRACE_EVENT(ext4_ordered_write_end,
+DEFINE_EVENT(ext4__write_begin, ext4_write_begin,
+
+	TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+		 unsigned int flags),
+
+	TP_ARGS(inode, pos, len, flags)
+);
+
+DEFINE_EVENT(ext4__write_begin, ext4_da_write_begin,
+
+	TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+		 unsigned int flags),
+
+	TP_ARGS(inode, pos, len, flags)
+);
+
+DECLARE_EVENT_CLASS(ext4__write_end,
 	TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
 			unsigned int copied),
 
@@ -145,57 +161,36 @@ TRACE_EVENT(ext4_ordered_write_end,
 		  __entry->pos, __entry->len, __entry->copied)
 );
 
-TRACE_EVENT(ext4_writeback_write_end,
+DEFINE_EVENT(ext4__write_end, ext4_ordered_write_end,
+
 	TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
 		 unsigned int copied),
 
-	TP_ARGS(inode, pos, len, copied),
+	TP_ARGS(inode, pos, len, copied)
+);
 
-	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
-		__field(	ino_t,	ino			)
-		__field(	loff_t,	pos			)
-		__field(	unsigned int, len		)
-		__field(	unsigned int, copied		)
-	),
+DEFINE_EVENT(ext4__write_end, ext4_writeback_write_end,
 
-	TP_fast_assign(
-		__entry->dev	= inode->i_sb->s_dev;
-		__entry->ino	= inode->i_ino;
-		__entry->pos	= pos;
-		__entry->len	= len;
-		__entry->copied	= copied;
-	),
+	TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+		 unsigned int copied),
 
-	TP_printk("dev %s ino %lu pos %llu len %u copied %u",
-		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
-		  __entry->pos, __entry->len, __entry->copied)
+	TP_ARGS(inode, pos, len, copied)
 );
 
-TRACE_EVENT(ext4_journalled_write_end,
+DEFINE_EVENT(ext4__write_end, ext4_journalled_write_end,
+
 	TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
 		 unsigned int copied),
-	TP_ARGS(inode, pos, len, copied),
 
-	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
-		__field(	ino_t,	ino			)
-		__field(	loff_t,	pos			)
-		__field(	unsigned int, len		)
-		__field(	unsigned int, copied		)
-	),
+	TP_ARGS(inode, pos, len, copied)
+);
 
-	TP_fast_assign(
-		__entry->dev	= inode->i_sb->s_dev;
-		__entry->ino	= inode->i_ino;
-		__entry->pos	= pos;
-		__entry->len	= len;
-		__entry->copied	= copied;
-	),
+DEFINE_EVENT(ext4__write_end, ext4_da_write_end,
 
-	TP_printk("dev %s ino %lu pos %llu len %u copied %u",
-		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
-		  __entry->pos, __entry->len, __entry->copied)
+	TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+		 unsigned int copied),
+
+	TP_ARGS(inode, pos, len, copied)
 );
 
 TRACE_EVENT(ext4_writepage,
@@ -337,60 +332,6 @@ TRACE_EVENT(ext4_da_writepages_result,
 		  (unsigned long) __entry->writeback_index)
 );
 
-TRACE_EVENT(ext4_da_write_begin,
-	TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
-			unsigned int flags),
-
-	TP_ARGS(inode, pos, len, flags),
-
-	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
-		__field(	ino_t,	ino			)
-		__field(	loff_t,	pos			)
-		__field(	unsigned int, len		)
-		__field(	unsigned int, flags		)
-	),
-
-	TP_fast_assign(
-		__entry->dev	= inode->i_sb->s_dev;
-		__entry->ino	= inode->i_ino;
-		__entry->pos	= pos;
-		__entry->len	= len;
-		__entry->flags	= flags;
-	),
-
-	TP_printk("dev %s ino %lu pos %llu len %u flags %u",
-		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
-		  __entry->pos, __entry->len, __entry->flags)
-);
-
-TRACE_EVENT(ext4_da_write_end,
-	TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
-			unsigned int copied),
-
-	TP_ARGS(inode, pos, len, copied),
-
-	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
-		__field(	ino_t,	ino			)
-		__field(	loff_t,	pos			)
-		__field(	unsigned int, len		)
-		__field(	unsigned int, copied		)
-	),
-
-	TP_fast_assign(
-		__entry->dev	= inode->i_sb->s_dev;
-		__entry->ino	= inode->i_ino;
-		__entry->pos	= pos;
-		__entry->len	= len;
-		__entry->copied	= copied;
-	),
-
-	TP_printk("dev %s ino %lu pos %llu len %u copied %u",
-		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
-		  __entry->pos, __entry->len, __entry->copied)
-);
-
 TRACE_EVENT(ext4_discard_blocks,
 	TP_PROTO(struct super_block *sb, unsigned long long blk,
 			unsigned long long count),
-- 
cgit v1.2.3-70-g09d2


From 470dda7417f284b9cfc96560b2acd98df63798a2 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Thu, 26 Nov 2009 15:08:01 +0800
Subject: tracing: Restore original format of sched events

The original format for sched_stat_iowait and sched_stat_sleep:

  $ cat events/sched/sched_stat_iowait/format
  ...
  print fmt: "comm=%s pid=%d delay=%Lu [ns]", ...
  $ cat events/sched/sched_stat_sleep/format
  ...
  print fmt: "comm=%s pid=%d delay=%Lu [ns]", ...

But commit commit 75ec29ab848a7e92a41aaafaeb33d1afbc839be4
("tracing: Convert some sched trace events to DEFINE_EVENT and
_PRINT") broke the format:

  $ cat events/sched/sched_stat_iowait/format
  print fmt: "task: %s:%d iowait: %Lu [ns]", ...
  $ cat events/sched/sched_stat_sleep/format
  print fmt: "task: %s:%d sleep: %Lu [ns]", ...

No change in functionality.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4B0E2951.9050800@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/events/sched.h | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 5ce795021851..9d316b22388c 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -355,23 +355,17 @@ DEFINE_EVENT(sched_stat_template, sched_stat_wait,
  * Tracepoint for accounting sleep time (time the task is not runnable,
  * including iowait, see below).
  */
-DEFINE_EVENT_PRINT(sched_stat_template, sched_stat_sleep,
-		   TP_PROTO(struct task_struct *tsk, u64 delay),
-		   TP_ARGS(tsk, delay),
-		   TP_printk("task: %s:%d sleep: %Lu [ns]",
-			     __entry->comm, __entry->pid,
-			     (unsigned long long)__entry->delay));
+DEFINE_EVENT(sched_stat_template, sched_stat_sleep,
+	     TP_PROTO(struct task_struct *tsk, u64 delay),
+	     TP_ARGS(tsk, delay));
 
 /*
  * Tracepoint for accounting iowait time (time the task is not runnable
  * due to waiting on IO to complete).
  */
-DEFINE_EVENT_PRINT(sched_stat_template, sched_stat_iowait,
-		   TP_PROTO(struct task_struct *tsk, u64 delay),
-		   TP_ARGS(tsk, delay),
-		   TP_printk("task: %s:%d iowait: %Lu [ns]",
-			     __entry->comm, __entry->pid,
-			     (unsigned long long)__entry->delay));
+DEFINE_EVENT(sched_stat_template, sched_stat_iowait,
+	     TP_PROTO(struct task_struct *tsk, u64 delay),
+	     TP_ARGS(tsk, delay));
 
 /*
  * Tracepoint for accounting runtime (time the task is executing
-- 
cgit v1.2.3-70-g09d2


From d1eb650ff4130972fa21462fa49cd35a2865403b Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Tue, 24 Nov 2009 16:56:45 -0500
Subject: tracepoint: Move signal sending tracepoint to events/signal.h

Move signal sending event to events/signal.h. This patch also
renames sched_signal_send event to signal_generate.

Changes in v4:
 - Fix a typo of task_struct pointer.

Changes in v3:
 - Add docbook style comments

Changes in v2:
 - Add siginfo argument
 - Add siginfo storing macro

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Reviewed-by: Jason Baron <jbaron@redhat.com>
Acked-by: Roland McGrath <roland@redhat.com>
Cc: systemtap <systemtap@sources.redhat.com>
Cc: DLE <dle-develop@lists.sourceforge.net>
Cc: Oleg Nesterov <oleg@redhat.com>
LKML-Reference: <20091124215645.30449.60208.stgit@dhcp-100-2-132.bos.redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/DocBook/tracepoint.tmpl |  5 +++
 include/trace/events/sched.h          | 25 -------------
 include/trace/events/signal.h         | 66 +++++++++++++++++++++++++++++++++++
 kernel/signal.c                       |  5 +--
 4 files changed, 74 insertions(+), 27 deletions(-)
 create mode 100644 include/trace/events/signal.h

(limited to 'include')

diff --git a/Documentation/DocBook/tracepoint.tmpl b/Documentation/DocBook/tracepoint.tmpl
index b0756d0fd579..8bca1d5cec09 100644
--- a/Documentation/DocBook/tracepoint.tmpl
+++ b/Documentation/DocBook/tracepoint.tmpl
@@ -86,4 +86,9 @@
 !Iinclude/trace/events/irq.h
   </chapter>
 
+  <chapter id="signal">
+   <title>SIGNAL</title>
+!Iinclude/trace/events/signal.h
+  </chapter>
+
 </book>
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 9d316b22388c..cfceb0b73e20 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -287,31 +287,6 @@ TRACE_EVENT(sched_process_fork,
 		__entry->child_comm, __entry->child_pid)
 );
 
-/*
- * Tracepoint for sending a signal:
- */
-TRACE_EVENT(sched_signal_send,
-
-	TP_PROTO(int sig, struct task_struct *p),
-
-	TP_ARGS(sig, p),
-
-	TP_STRUCT__entry(
-		__field(	int,	sig			)
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid	= p->pid;
-		__entry->sig	= sig;
-	),
-
-	TP_printk("sig=%d comm=%s pid=%d",
-		  __entry->sig, __entry->comm, __entry->pid)
-);
-
 /*
  * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
  *     adding sched_stat support to SCHED_FIFO/RR would be welcome.
diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h
new file mode 100644
index 000000000000..ef51756a801d
--- /dev/null
+++ b/include/trace/events/signal.h
@@ -0,0 +1,66 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM signal
+
+#if !defined(_TRACE_SIGNAL_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SIGNAL_H
+
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/tracepoint.h>
+
+#define TP_STORE_SIGINFO(__entry, info)				\
+	do {							\
+		if (info == SEND_SIG_NOINFO) {			\
+			__entry->errno	= 0;			\
+			__entry->code	= SI_USER;		\
+		} else if (info == SEND_SIG_PRIV) {		\
+			__entry->errno	= 0;			\
+			__entry->code	= SI_KERNEL;		\
+		} else {					\
+			__entry->errno	= info->si_errno;	\
+			__entry->code	= info->si_code;	\
+		}						\
+	} while (0)
+
+/**
+ * signal_generate - called when a signal is generated
+ * @sig: signal number
+ * @info: pointer to struct siginfo
+ * @task: pointer to struct task_struct
+ *
+ * Current process sends a 'sig' signal to 'task' process with
+ * 'info' siginfo. If 'info' is SEND_SIG_NOINFO or SEND_SIG_PRIV,
+ * 'info' is not a pointer and you can't access its field. Instead,
+ * SEND_SIG_NOINFO means that si_code is SI_USER, and SEND_SIG_PRIV
+ * means that si_code is SI_KERNEL.
+ */
+TRACE_EVENT(signal_generate,
+
+	TP_PROTO(int sig, struct siginfo *info, struct task_struct *task),
+
+	TP_ARGS(sig, info, task),
+
+	TP_STRUCT__entry(
+		__field(	int,	sig			)
+		__field(	int,	errno			)
+		__field(	int,	code			)
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+	),
+
+	TP_fast_assign(
+		__entry->sig	= sig;
+		TP_STORE_SIGINFO(__entry, info);
+		memcpy(__entry->comm, task->comm, TASK_COMM_LEN);
+		__entry->pid	= task->pid;
+	),
+
+	TP_printk("sig=%d errno=%d code=%d comm=%s pid=%d",
+		  __entry->sig, __entry->errno, __entry->code,
+		  __entry->comm, __entry->pid)
+);
+
+#endif /* _TRACE_SIGNAL_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/kernel/signal.c b/kernel/signal.c
index 6705320784fd..a1e0cc6b32c3 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -27,7 +27,8 @@
 #include <linux/freezer.h>
 #include <linux/pid_namespace.h>
 #include <linux/nsproxy.h>
-#include <trace/events/sched.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/signal.h>
 
 #include <asm/param.h>
 #include <asm/uaccess.h>
@@ -834,7 +835,7 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
 	struct sigqueue *q;
 	int override_rlimit;
 
-	trace_sched_signal_send(sig, t);
+	trace_signal_generate(sig, info, t);
 
 	assert_spin_locked(&t->sighand->siglock);
 
-- 
cgit v1.2.3-70-g09d2


From f9d4257e01d266e67420cc99d456b6d4c8464f54 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Tue, 24 Nov 2009 16:56:51 -0500
Subject: tracepoint: Add signal deliver event

Add a tracepoint where a process gets a signal. This tracepoint
shows signal-number, sa-handler and sa-flag.

Changes in v3:
 - Add docbook style comments

Changes in v2:
 - Add siginfo argument
 - Fix comment

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Reviewed-by: Jason Baron <jbaron@redhat.com>
Acked-by: Roland McGrath <roland@redhat.com>
Cc: systemtap <systemtap@sources.redhat.com>
Cc: DLE <dle-develop@lists.sourceforge.net>
Cc: Oleg Nesterov <oleg@redhat.com>
LKML-Reference: <20091124215651.30449.20926.stgit@dhcp-100-2-132.bos.redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/events/signal.h | 39 +++++++++++++++++++++++++++++++++++++++
 kernel/signal.c               |  3 +++
 2 files changed, 42 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h
index ef51756a801d..a6d71de0dc0d 100644
--- a/include/trace/events/signal.h
+++ b/include/trace/events/signal.h
@@ -60,6 +60,45 @@ TRACE_EVENT(signal_generate,
 		  __entry->comm, __entry->pid)
 );
 
+/**
+ * signal_deliver - called when a signal is delivered
+ * @sig: signal number
+ * @info: pointer to struct siginfo
+ * @ka: pointer to struct k_sigaction
+ *
+ * A 'sig' signal is delivered to current process with 'info' siginfo,
+ * and it will be handled by 'ka'. ka->sa.sa_handler can be SIG_IGN or
+ * SIG_DFL.
+ * Note that some signals reported by signal_generate tracepoint can be
+ * lost, ignored or modified (by debugger) before hitting this tracepoint.
+ * This means, this can show which signals are actually delivered, but
+ * matching generated signals and delivered signals may not be correct.
+ */
+TRACE_EVENT(signal_deliver,
+
+	TP_PROTO(int sig, struct siginfo *info, struct k_sigaction *ka),
+
+	TP_ARGS(sig, info, ka),
+
+	TP_STRUCT__entry(
+		__field(	int,		sig		)
+		__field(	int,		errno		)
+		__field(	int,		code		)
+		__field(	unsigned long,	sa_handler	)
+		__field(	unsigned long,	sa_flags	)
+	),
+
+	TP_fast_assign(
+		__entry->sig	= sig;
+		TP_STORE_SIGINFO(__entry, info);
+		__entry->sa_handler	= (unsigned long)ka->sa.sa_handler;
+		__entry->sa_flags	= ka->sa.sa_flags;
+	),
+
+	TP_printk("sig=%d errno=%d code=%d sa_handler=%lx sa_flags=%lx",
+		  __entry->sig, __entry->errno, __entry->code,
+		  __entry->sa_handler, __entry->sa_flags)
+);
 #endif /* _TRACE_SIGNAL_H */
 
 /* This part must be outside protection */
diff --git a/kernel/signal.c b/kernel/signal.c
index a1e0cc6b32c3..349d44937406 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1840,6 +1840,9 @@ relock:
 			ka = &sighand->action[signr-1];
 		}
 
+		/* Trace actually delivered signals. */
+		trace_signal_deliver(signr, info, ka);
+
 		if (ka->sa.sa_handler == SIG_IGN) /* Do nothing.  */
 			continue;
 		if (ka->sa.sa_handler != SIG_DFL) {
-- 
cgit v1.2.3-70-g09d2


From ba005e1f417295d28cd1563ab82bc33af07fb16a Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Tue, 24 Nov 2009 16:56:58 -0500
Subject: tracepoint: Add signal loss events

Add signal_overflow_fail and signal_lose_info tracepoints
for signal-lost events.

Changes in v3:
 - Add docbook style comments

Changes in v2:
 - Use siginfo string macro

Suggested-by: Roland McGrath <roland@redhat.com>
Reviewed-by: Jason Baron <jbaron@redhat.com>
Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Acked-by: Roland McGrath <roland@redhat.com>
Cc: systemtap <systemtap@sources.redhat.com>
Cc: DLE <dle-develop@lists.sourceforge.net>
Cc: Oleg Nesterov <oleg@redhat.com>
LKML-Reference: <20091124215658.30449.9934.stgit@dhcp-100-2-132.bos.redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/events/signal.h | 68 +++++++++++++++++++++++++++++++++++++++++++
 kernel/signal.c               | 19 ++++++++----
 2 files changed, 82 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h
index a6d71de0dc0d..a510b75ac304 100644
--- a/include/trace/events/signal.h
+++ b/include/trace/events/signal.h
@@ -99,6 +99,74 @@ TRACE_EVENT(signal_deliver,
 		  __entry->sig, __entry->errno, __entry->code,
 		  __entry->sa_handler, __entry->sa_flags)
 );
+
+/**
+ * signal_overflow_fail - called when signal queue is overflow
+ * @sig: signal number
+ * @group: signal to process group or not (bool)
+ * @info: pointer to struct siginfo
+ *
+ * Kernel fails to generate 'sig' signal with 'info' siginfo, because
+ * siginfo queue is overflow, and the signal is dropped.
+ * 'group' is not 0 if the signal will be sent to a process group.
+ * 'sig' is always one of RT signals.
+ */
+TRACE_EVENT(signal_overflow_fail,
+
+	TP_PROTO(int sig, int group, struct siginfo *info),
+
+	TP_ARGS(sig, group, info),
+
+	TP_STRUCT__entry(
+		__field(	int,	sig	)
+		__field(	int,	group	)
+		__field(	int,	errno	)
+		__field(	int,	code	)
+	),
+
+	TP_fast_assign(
+		__entry->sig	= sig;
+		__entry->group	= group;
+		TP_STORE_SIGINFO(__entry, info);
+	),
+
+	TP_printk("sig=%d group=%d errno=%d code=%d",
+		  __entry->sig, __entry->group, __entry->errno, __entry->code)
+);
+
+/**
+ * signal_lose_info - called when siginfo is lost
+ * @sig: signal number
+ * @group: signal to process group or not (bool)
+ * @info: pointer to struct siginfo
+ *
+ * Kernel generates 'sig' signal but loses 'info' siginfo, because siginfo
+ * queue is overflow.
+ * 'group' is not 0 if the signal will be sent to a process group.
+ * 'sig' is always one of non-RT signals.
+ */
+TRACE_EVENT(signal_lose_info,
+
+	TP_PROTO(int sig, int group, struct siginfo *info),
+
+	TP_ARGS(sig, group, info),
+
+	TP_STRUCT__entry(
+		__field(	int,	sig	)
+		__field(	int,	group	)
+		__field(	int,	errno	)
+		__field(	int,	code	)
+	),
+
+	TP_fast_assign(
+		__entry->sig	= sig;
+		__entry->group	= group;
+		TP_STORE_SIGINFO(__entry, info);
+	),
+
+	TP_printk("sig=%d group=%d errno=%d code=%d",
+		  __entry->sig, __entry->group, __entry->errno, __entry->code)
+);
 #endif /* _TRACE_SIGNAL_H */
 
 /* This part must be outside protection */
diff --git a/kernel/signal.c b/kernel/signal.c
index 349d44937406..93e72e5feae6 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -897,12 +897,21 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
 			break;
 		}
 	} else if (!is_si_special(info)) {
-		if (sig >= SIGRTMIN && info->si_code != SI_USER)
-		/*
-		 * Queue overflow, abort.  We may abort if the signal was rt
-		 * and sent by user using something other than kill().
-		 */
+		if (sig >= SIGRTMIN && info->si_code != SI_USER) {
+			/*
+			 * Queue overflow, abort.  We may abort if the
+			 * signal was rt and sent by user using something
+			 * other than kill().
+			 */
+			trace_signal_overflow_fail(sig, group, info);
 			return -EAGAIN;
+		} else {
+			/*
+			 * This is a silent loss of information.  We still
+			 * send the signal, but the *info bits are lost.
+			 */
+			trace_signal_lose_info(sig, group, info);
+		}
 	}
 
 out_set:
-- 
cgit v1.2.3-70-g09d2


From d180c5bccec02612256fd8076ff3c1fac3429553 Mon Sep 17 00:00:00 2001
From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Date: Thu, 26 Nov 2009 14:48:30 +0900
Subject: sched: Introduce task_times() to replace task_{u,s}time() pair

Functions task_{u,s}time() are called in pair in almost all
cases.  However task_stime() is implemented to call task_utime()
from its inside, so such paired calls run task_utime() twice.

It means we do heavy divisions (div_u64 + do_div) twice to get
utime and stime which can be obtained at same time by one set
of divisions.

This patch introduces a function task_times(*tsk, *utime,
*stime) to retrieve utime and stime at once in better, optimized
way.

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: Spencer Candland <spencer@bluehost.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Americo Wang <xiyou.wangcong@gmail.com>
LKML-Reference: <4B0E16AE.906@jp.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 fs/proc/array.c       |  3 +--
 include/linux/sched.h |  1 +
 kernel/exit.c         |  7 +++++--
 kernel/sched.c        | 55 ++++++++++++++++++++++++++++++++-------------------
 kernel/sys.c          |  3 +--
 5 files changed, 43 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/fs/proc/array.c b/fs/proc/array.c
index e209f64ab27b..330deda70d08 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -535,8 +535,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 	if (!whole) {
 		min_flt = task->min_flt;
 		maj_flt = task->maj_flt;
-		utime = task_utime(task);
-		stime = task_stime(task);
+		task_times(task, &utime, &stime);
 		gtime = task_gtime(task);
 	}
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 78ba664474f3..fe6ae1516640 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1723,6 +1723,7 @@ static inline void put_task_struct(struct task_struct *t)
 extern cputime_t task_utime(struct task_struct *p);
 extern cputime_t task_stime(struct task_struct *p);
 extern cputime_t task_gtime(struct task_struct *p);
+extern void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st);
 
 /*
  * Per process flags
diff --git a/kernel/exit.c b/kernel/exit.c
index f7864ac2ecc1..29068ab2670a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -91,6 +91,8 @@ static void __exit_signal(struct task_struct *tsk)
 	if (atomic_dec_and_test(&sig->count))
 		posix_cpu_timers_exit_group(tsk);
 	else {
+		cputime_t utime, stime;
+
 		/*
 		 * If there is any task waiting for the group exit
 		 * then notify it:
@@ -110,8 +112,9 @@ static void __exit_signal(struct task_struct *tsk)
 		 * We won't ever get here for the group leader, since it
 		 * will have been the last reference on the signal_struct.
 		 */
-		sig->utime = cputime_add(sig->utime, task_utime(tsk));
-		sig->stime = cputime_add(sig->stime, task_stime(tsk));
+		task_times(tsk, &utime, &stime);
+		sig->utime = cputime_add(sig->utime, utime);
+		sig->stime = cputime_add(sig->stime, stime);
 		sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
 		sig->min_flt += tsk->min_flt;
 		sig->maj_flt += tsk->maj_flt;
diff --git a/kernel/sched.c b/kernel/sched.c
index 315ba4059f93..475a6f2b7158 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5191,6 +5191,14 @@ cputime_t task_stime(struct task_struct *p)
 {
 	return p->stime;
 }
+
+void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+{
+	if (ut)
+		*ut = task_utime(p);
+	if (st)
+		*st = task_stime(p);
+}
 #else
 
 #ifndef nsecs_to_cputime
@@ -5198,41 +5206,48 @@ cputime_t task_stime(struct task_struct *p)
 	msecs_to_cputime(div_u64((__nsecs), NSEC_PER_MSEC))
 #endif
 
-cputime_t task_utime(struct task_struct *p)
+void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 {
-	cputime_t utime = p->utime, total = utime + p->stime;
-	u64 temp;
+	cputime_t rtime, utime = p->utime, total = utime + p->stime;
 
 	/*
 	 * Use CFS's precise accounting:
 	 */
-	temp = (u64)nsecs_to_cputime(p->se.sum_exec_runtime);
+	rtime = nsecs_to_cputime(p->se.sum_exec_runtime);
 
 	if (total) {
-		temp *= utime;
+		u64 temp;
+
+		temp = (u64)(rtime * utime);
 		do_div(temp, total);
-	}
-	utime = (cputime_t)temp;
+		utime = (cputime_t)temp;
+	} else
+		utime = rtime;
 
+	/*
+	 * Compare with previous values, to keep monotonicity:
+	 */
 	p->prev_utime = max(p->prev_utime, utime);
-	return p->prev_utime;
+	p->prev_stime = max(p->prev_stime, rtime - p->prev_utime);
+
+	if (ut)
+		*ut = p->prev_utime;
+	if (st)
+		*st = p->prev_stime;
+}
+
+cputime_t task_utime(struct task_struct *p)
+{
+	cputime_t utime;
+	task_times(p, &utime, NULL);
+	return utime;
 }
 
 cputime_t task_stime(struct task_struct *p)
 {
 	cputime_t stime;
-
-	/*
-	 * Use CFS's precise accounting. (we subtract utime from
-	 * the total, to make sure the total observed by userspace
-	 * grows monotonically - apps rely on that):
-	 */
-	stime = nsecs_to_cputime(p->se.sum_exec_runtime) - task_utime(p);
-
-	if (stime >= 0)
-		p->prev_stime = max(p->prev_stime, stime);
-
-	return p->prev_stime;
+	task_times(p, NULL, &stime);
+	return stime;
 }
 #endif
 
diff --git a/kernel/sys.c b/kernel/sys.c
index ce17760d9c51..bbdfce0d4347 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1346,8 +1346,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 	utime = stime = cputime_zero;
 
 	if (who == RUSAGE_THREAD) {
-		utime = task_utime(current);
-		stime = task_stime(current);
+		task_times(current, &utime, &stime);
 		accumulate_thread_rusage(p, r);
 		maxrss = p->signal->maxrss;
 		goto out;
-- 
cgit v1.2.3-70-g09d2


From d5b7c78e975302a1bab28263266c39ecb71acad4 Mon Sep 17 00:00:00 2001
From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Date: Thu, 26 Nov 2009 14:49:05 +0900
Subject: sched: Remove task_{u,s,g}time()

Now all task_{u,s}time() pairs are replaced by task_times().
And task_gtime() is too simple to be an inline function.

Cleanup them all.

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: Spencer Candland <spencer@bluehost.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Americo Wang <xiyou.wangcong@gmail.com>
LKML-Reference: <4B0E16D1.70902@jp.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 fs/proc/array.c       |  4 ++--
 include/linux/sched.h |  3 ---
 kernel/exit.c         |  2 +-
 kernel/sched.c        | 33 ++-------------------------------
 4 files changed, 5 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/fs/proc/array.c b/fs/proc/array.c
index 330deda70d08..ca61a88aed66 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -511,7 +511,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 			do {
 				min_flt += t->min_flt;
 				maj_flt += t->maj_flt;
-				gtime = cputime_add(gtime, task_gtime(t));
+				gtime = cputime_add(gtime, t->gtime);
 				t = next_thread(t);
 			} while (t != task);
 
@@ -536,7 +536,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 		min_flt = task->min_flt;
 		maj_flt = task->maj_flt;
 		task_times(task, &utime, &stime);
-		gtime = task_gtime(task);
+		gtime = task->gtime;
 	}
 
 	/* scale priority and nice values from timeslices to -20..20 */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index fe6ae1516640..0395b0f4df3a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1720,9 +1720,6 @@ static inline void put_task_struct(struct task_struct *t)
 		__put_task_struct(t);
 }
 
-extern cputime_t task_utime(struct task_struct *p);
-extern cputime_t task_stime(struct task_struct *p);
-extern cputime_t task_gtime(struct task_struct *p);
 extern void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st);
 
 /*
diff --git a/kernel/exit.c b/kernel/exit.c
index 29068ab2670a..2eaf68b634e3 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -115,7 +115,7 @@ static void __exit_signal(struct task_struct *tsk)
 		task_times(tsk, &utime, &stime);
 		sig->utime = cputime_add(sig->utime, utime);
 		sig->stime = cputime_add(sig->stime, stime);
-		sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
+		sig->gtime = cputime_add(sig->gtime, tsk->gtime);
 		sig->min_flt += tsk->min_flt;
 		sig->maj_flt += tsk->maj_flt;
 		sig->nvcsw += tsk->nvcsw;
diff --git a/kernel/sched.c b/kernel/sched.c
index 475a6f2b7158..82251c21f785 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5182,22 +5182,12 @@ void account_idle_ticks(unsigned long ticks)
  * Use precise platform statistics if available:
  */
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
-cputime_t task_utime(struct task_struct *p)
-{
-	return p->utime;
-}
-
-cputime_t task_stime(struct task_struct *p)
-{
-	return p->stime;
-}
-
 void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 {
 	if (ut)
-		*ut = task_utime(p);
+		*ut = p->utime;
 	if (st)
-		*st = task_stime(p);
+		*st = p->stime;
 }
 #else
 
@@ -5235,27 +5225,8 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 	if (st)
 		*st = p->prev_stime;
 }
-
-cputime_t task_utime(struct task_struct *p)
-{
-	cputime_t utime;
-	task_times(p, &utime, NULL);
-	return utime;
-}
-
-cputime_t task_stime(struct task_struct *p)
-{
-	cputime_t stime;
-	task_times(p, NULL, &stime);
-	return stime;
-}
 #endif
 
-inline cputime_t task_gtime(struct task_struct *p)
-{
-	return p->gtime;
-}
-
 /*
  * This function gets called by the timer code, with HZ frequency.
  * We call it with interrupts disabled.
-- 
cgit v1.2.3-70-g09d2


From b7b20df91d43d5e59578b8fc16e895c0c8cbd9b5 Mon Sep 17 00:00:00 2001
From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Date: Thu, 26 Nov 2009 14:49:27 +0900
Subject: sched, time: Define nsecs_to_jiffies()

Use of msecs_to_jiffies() for nsecs_to_cputime() have some
problems:

 - The type of msecs_to_jiffies()'s argument is unsigned int, so
   it cannot convert msecs greater than UINT_MAX = about 49.7 days.

 - msecs_to_jiffies() returns MAX_JIFFY_OFFSET if MSB of argument
   is set, assuming that input was negative value.  So it cannot
   convert msecs greater than INT_MAX = about 24.8 days too.

This patch defines a new function nsecs_to_jiffies() that can
deal greater values, and that can deal all incoming values as
unsigned.

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: Spencer Candland <spencer@bluehost.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Amrico Wang <xiyou.wangcong@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <johnstul@linux.vnet.ibm.com>
LKML-Reference: <4B0E16E7.5070307@jp.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/jiffies.h |  1 +
 kernel/sched.c          |  3 +--
 kernel/time.c           | 30 ++++++++++++++++++++++++++++++
 3 files changed, 32 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 1a9cf78bfce5..6811f4bfc6e7 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -307,6 +307,7 @@ extern clock_t jiffies_to_clock_t(long x);
 extern unsigned long clock_t_to_jiffies(unsigned long x);
 extern u64 jiffies_64_to_clock_t(u64 x);
 extern u64 nsec_to_clock_t(u64 x);
+extern unsigned long nsecs_to_jiffies(u64 n);
 
 #define TIMESTAMP_SIZE	30
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 82251c21f785..b3d4e2be95aa 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5192,8 +5192,7 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 #else
 
 #ifndef nsecs_to_cputime
-# define nsecs_to_cputime(__nsecs) \
-	msecs_to_cputime(div_u64((__nsecs), NSEC_PER_MSEC))
+# define nsecs_to_cputime(__nsecs)	nsecs_to_jiffies(__nsecs)
 #endif
 
 void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
diff --git a/kernel/time.c b/kernel/time.c
index 2e2e469a7fec..804798005d19 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -662,6 +662,36 @@ u64 nsec_to_clock_t(u64 x)
 #endif
 }
 
+/**
+ * nsecs_to_jiffies - Convert nsecs in u64 to jiffies
+ *
+ * @n:	nsecs in u64
+ *
+ * Unlike {m,u}secs_to_jiffies, type of input is not unsigned int but u64.
+ * And this doesn't return MAX_JIFFY_OFFSET since this function is designed
+ * for scheduler, not for use in device drivers to calculate timeout value.
+ *
+ * note:
+ *   NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512)
+ *   ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years
+ */
+unsigned long nsecs_to_jiffies(u64 n)
+{
+#if (NSEC_PER_SEC % HZ) == 0
+	/* Common case, HZ = 100, 128, 200, 250, 256, 500, 512, 1000 etc. */
+	return div_u64(n, NSEC_PER_SEC / HZ);
+#elif (HZ % 512) == 0
+	/* overflow after 292 years if HZ = 1024 */
+	return div_u64(n * HZ / 512, NSEC_PER_SEC / 512);
+#else
+	/*
+	 * Generic case - optimized for cases where HZ is a multiple of 3.
+	 * overflow after 64.99 years, exact for HZ = 60, 72, 90, 120 etc.
+	 */
+	return div_u64(n * 9, (9ull * NSEC_PER_SEC + HZ / 2) / HZ);
+#endif
+}
+
 #if (BITS_PER_LONG < 64)
 u64 get_jiffies_64(void)
 {
-- 
cgit v1.2.3-70-g09d2


From 5fa10b28e57f94a90535cfeafe89dcee9f47d540 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 27 Nov 2009 04:55:53 +0100
Subject: hw-breakpoints: Use struct perf_event_attr to define user breakpoints

In-kernel user breakpoints are created using functions in which
we pass breakpoint parameters as individual variables: address,
length and type.

Although it fits well for x86, this just does not scale across
archictectures that may support this api later as these may have
more or different needs. Pass in a perf_event_attr structure
instead because it is meant to evolve as much as possible into
a generic hardware breakpoint parameter structure.

Reported-by: K.Prasad <prasad@linux.vnet.ibm.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <1259294154-5197-1-git-send-regression-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ptrace.c      | 74 ++++++++++++++++++++----------------
 include/linux/hw_breakpoint.h | 36 ++++++++----------
 kernel/hw_breakpoint.c        | 87 +++++++++----------------------------------
 3 files changed, 75 insertions(+), 122 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 75e0cd847bd6..2941b32ea666 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -593,6 +593,34 @@ static unsigned long ptrace_get_dr7(struct perf_event *bp[])
 	return dr7;
 }
 
+static struct perf_event *
+ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
+			 struct task_struct *tsk)
+{
+	int err;
+	int gen_len, gen_type;
+	DEFINE_BREAKPOINT_ATTR(attr);
+
+	/*
+	 * We shoud have at least an inactive breakpoint at this
+	 * slot. It means the user is writing dr7 without having
+	 * written the address register first
+	 */
+	if (!bp)
+		return ERR_PTR(-EINVAL);
+
+	err = arch_bp_generic_fields(len, type, &gen_len, &gen_type);
+	if (err)
+		return ERR_PTR(err);
+
+	attr = bp->attr;
+	attr.bp_len = gen_len;
+	attr.bp_type = gen_type;
+	attr.disabled = 0;
+
+	return modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk);
+}
+
 /*
  * Handle ptrace writes to debug register 7.
  */
@@ -603,7 +631,6 @@ static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
 	int i, orig_ret = 0, rc = 0;
 	int enabled, second_pass = 0;
 	unsigned len, type;
-	int gen_len, gen_type;
 	struct perf_event *bp;
 
 	data &= ~DR_CONTROL_RESERVED;
@@ -634,33 +661,12 @@ restore:
 			continue;
 		}
 
-		/*
-		 * We shoud have at least an inactive breakpoint at this
-		 * slot. It means the user is writing dr7 without having
-		 * written the address register first
-		 */
-		if (!bp) {
-			rc = -EINVAL;
-			break;
-		}
-
-		rc = arch_bp_generic_fields(len, type, &gen_len, &gen_type);
-		if (rc)
-			break;
-
-		/*
-		 * This is a temporary thing as bp is unregistered/registered
-		 * to simulate modification
-		 */
-		bp = modify_user_hw_breakpoint(bp, bp->attr.bp_addr, gen_len,
-					       gen_type, bp->callback,
-					       tsk, true);
-		thread->ptrace_bps[i] = NULL;
+		bp = ptrace_modify_breakpoint(bp, len, type, tsk);
 
 		/* Incorrect bp, or we have a bug in bp API */
 		if (IS_ERR(bp)) {
 			rc = PTR_ERR(bp);
-			bp = NULL;
+			thread->ptrace_bps[i] = NULL;
 			break;
 		}
 		thread->ptrace_bps[i] = bp;
@@ -707,24 +713,26 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
 {
 	struct perf_event *bp;
 	struct thread_struct *t = &tsk->thread;
+	DEFINE_BREAKPOINT_ATTR(attr);
 
 	if (!t->ptrace_bps[nr]) {
 		/*
 		 * Put stub len and type to register (reserve) an inactive but
 		 * correct bp
 		 */
-		bp = register_user_hw_breakpoint(addr, HW_BREAKPOINT_LEN_1,
-						 HW_BREAKPOINT_W,
-						 ptrace_triggered, tsk,
-						 false);
+		attr.bp_addr = addr;
+		attr.bp_len = HW_BREAKPOINT_LEN_1;
+		attr.bp_type = HW_BREAKPOINT_W;
+		attr.disabled = 1;
+
+		bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk);
 	} else {
 		bp = t->ptrace_bps[nr];
 		t->ptrace_bps[nr] = NULL;
-		bp = modify_user_hw_breakpoint(bp, addr, bp->attr.bp_len,
-					       bp->attr.bp_type,
-					       bp->callback,
-					       tsk,
-					       bp->attr.disabled);
+
+		attr = bp->attr;
+		attr.bp_addr = addr;
+		bp = modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk);
 	}
 	/*
 	 * CHECKME: the previous code returned -EIO if the addr wasn't a
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index c9f7f7c7b0e0..5da472e434b7 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -20,6 +20,14 @@ enum {
 
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 
+/* As it's for in-kernel or ptrace use, we want it to be pinned */
+#define DEFINE_BREAKPOINT_ATTR(name)	\
+struct perf_event_attr name = {		\
+	.type = PERF_TYPE_BREAKPOINT,	\
+	.size = sizeof(name),		\
+	.pinned = 1,			\
+};
+
 static inline unsigned long hw_breakpoint_addr(struct perf_event *bp)
 {
 	return bp->attr.bp_addr;
@@ -36,22 +44,16 @@ static inline int hw_breakpoint_len(struct perf_event *bp)
 }
 
 extern struct perf_event *
-register_user_hw_breakpoint(unsigned long addr,
-			    int len,
-			    int type,
+register_user_hw_breakpoint(struct perf_event_attr *attr,
 			    perf_callback_t triggered,
-			    struct task_struct *tsk,
-			    bool active);
+			    struct task_struct *tsk);
 
 /* FIXME: only change from the attr, and don't unregister */
 extern struct perf_event *
 modify_user_hw_breakpoint(struct perf_event *bp,
-			  unsigned long addr,
-			  int len,
-			  int type,
+			  struct perf_event_attr *attr,
 			  perf_callback_t triggered,
-			  struct task_struct *tsk,
-			  bool active);
+			  struct task_struct *tsk);
 
 /*
  * Kernel breakpoints are not associated with any particular thread.
@@ -89,20 +91,14 @@ static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
 #else /* !CONFIG_HAVE_HW_BREAKPOINT */
 
 static inline struct perf_event *
-register_user_hw_breakpoint(unsigned long addr,
-			    int len,
-			    int type,
+register_user_hw_breakpoint(struct perf_event_attr *attr,
 			    perf_callback_t triggered,
-			    struct task_struct *tsk,
-			    bool active)		{ return NULL; }
+			    struct task_struct *tsk)	{ return NULL; }
 static inline struct perf_event *
 modify_user_hw_breakpoint(struct perf_event *bp,
-			  unsigned long addr,
-			  int len,
-			  int type,
+			  struct perf_event_attr *attr,
 			  perf_callback_t triggered,
-			  struct task_struct *tsk,
-			  bool active)			{ return NULL; }
+			  struct task_struct *tsk)	{ return NULL; }
 static inline struct perf_event *
 register_wide_hw_breakpoint_cpu(unsigned long addr,
 				int len,
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 32e1018191be..2a47514f12fd 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -289,90 +289,32 @@ int register_perf_hw_breakpoint(struct perf_event *bp)
 	return __register_perf_hw_breakpoint(bp);
 }
 
-/*
- * Register a breakpoint bound to a task and a given cpu.
- * If cpu is -1, the breakpoint is active for the task in every cpu
- * If the task is -1, the breakpoint is active for every tasks in the given
- * cpu.
- */
-static struct perf_event *
-register_user_hw_breakpoint_cpu(unsigned long addr,
-				int len,
-				int type,
-				perf_callback_t triggered,
-				pid_t pid,
-				int cpu,
-				bool active)
-{
-	struct perf_event_attr *attr;
-	struct perf_event *bp;
-
-	attr = kzalloc(sizeof(*attr), GFP_KERNEL);
-	if (!attr)
-		return ERR_PTR(-ENOMEM);
-
-	attr->type = PERF_TYPE_BREAKPOINT;
-	attr->size = sizeof(*attr);
-	attr->bp_addr = addr;
-	attr->bp_len = len;
-	attr->bp_type = type;
-	/*
-	 * Such breakpoints are used by debuggers to trigger signals when
-	 * we hit the excepted memory op. We can't miss such events, they
-	 * must be pinned.
-	 */
-	attr->pinned = 1;
-
-	if (!active)
-		attr->disabled = 1;
-
-	bp = perf_event_create_kernel_counter(attr, cpu, pid, triggered);
-	kfree(attr);
-
-	return bp;
-}
-
 /**
  * register_user_hw_breakpoint - register a hardware breakpoint for user space
- * @addr: is the memory address that triggers the breakpoint
- * @len: the length of the access to the memory (1 byte, 2 bytes etc...)
- * @type: the type of the access to the memory (read/write/exec)
+ * @attr: breakpoint attributes
  * @triggered: callback to trigger when we hit the breakpoint
  * @tsk: pointer to 'task_struct' of the process to which the address belongs
- * @active: should we activate it while registering it
- *
  */
 struct perf_event *
-register_user_hw_breakpoint(unsigned long addr,
-			    int len,
-			    int type,
+register_user_hw_breakpoint(struct perf_event_attr *attr,
 			    perf_callback_t triggered,
-			    struct task_struct *tsk,
-			    bool active)
+			    struct task_struct *tsk)
 {
-	return register_user_hw_breakpoint_cpu(addr, len, type, triggered,
-					       tsk->pid, -1, active);
+	return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered);
 }
 EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
 
 /**
  * modify_user_hw_breakpoint - modify a user-space hardware breakpoint
  * @bp: the breakpoint structure to modify
- * @addr: is the memory address that triggers the breakpoint
- * @len: the length of the access to the memory (1 byte, 2 bytes etc...)
- * @type: the type of the access to the memory (read/write/exec)
+ * @attr: new breakpoint attributes
  * @triggered: callback to trigger when we hit the breakpoint
  * @tsk: pointer to 'task_struct' of the process to which the address belongs
- * @active: should we activate it while registering it
  */
 struct perf_event *
-modify_user_hw_breakpoint(struct perf_event *bp,
-			  unsigned long addr,
-			  int len,
-			  int type,
+modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr,
 			  perf_callback_t triggered,
-			  struct task_struct *tsk,
-			  bool active)
+			  struct task_struct *tsk)
 {
 	/*
 	 * FIXME: do it without unregistering
@@ -381,8 +323,7 @@ modify_user_hw_breakpoint(struct perf_event *bp,
 	 */
 	unregister_hw_breakpoint(bp);
 
-	return register_user_hw_breakpoint(addr, len, type, triggered,
-					   tsk, active);
+	return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered);
 }
 EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
 
@@ -406,8 +347,16 @@ register_kernel_hw_breakpoint_cpu(unsigned long addr,
 				  int cpu,
 				  bool active)
 {
-	return register_user_hw_breakpoint_cpu(addr, len, type, triggered,
-					       -1, cpu, active);
+	DEFINE_BREAKPOINT_ATTR(attr);
+
+	attr.bp_addr = addr;
+	attr.bp_len = len;
+	attr.bp_type = type;
+
+	if (!active)
+		attr.disabled = 1;
+
+	return perf_event_create_kernel_counter(&attr, cpu, -1, triggered);
 }
 
 /**
-- 
cgit v1.2.3-70-g09d2


From dd1853c3f493f6d22d9e5390b192a07b73d2ac0a Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 27 Nov 2009 04:55:54 +0100
Subject: hw-breakpoints: Use struct perf_event_attr to define kernel
 breakpoints

Kernel breakpoints are created using functions in which we pass
breakpoint parameters as individual variables: address, length
and type.

Although it fits well for x86, this just does not scale across
architectures that may support this api later as these may have
more or different needs. Pass in a perf_event_attr structure
instead because it is meant to evolve as much as possible into
a generic hardware breakpoint parameter structure.

Reported-by: K.Prasad <prasad@linux.vnet.ibm.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <1259294154-5197-2-git-send-regression-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/hw_breakpoint.h           | 35 ++++++++++++---------------
 kernel/hw_breakpoint.c                  | 35 ++++-----------------------
 kernel/trace/trace_ksym.c               | 42 ++++++++++++++++-----------------
 samples/hw_breakpoint/data_breakpoint.c | 10 ++++----
 4 files changed, 44 insertions(+), 78 deletions(-)

(limited to 'include')

diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 5da472e434b7..a03daed08c59 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -28,6 +28,13 @@ struct perf_event_attr name = {		\
 	.pinned = 1,			\
 };
 
+static inline void hw_breakpoint_init(struct perf_event_attr *attr)
+{
+	attr->type = PERF_TYPE_BREAKPOINT;
+	attr->size = sizeof(*attr);
+	attr->pinned = 1;
+}
+
 static inline unsigned long hw_breakpoint_addr(struct perf_event *bp)
 {
 	return bp->attr.bp_addr;
@@ -59,19 +66,13 @@ modify_user_hw_breakpoint(struct perf_event *bp,
  * Kernel breakpoints are not associated with any particular thread.
  */
 extern struct perf_event *
-register_wide_hw_breakpoint_cpu(unsigned long addr,
-				int len,
-				int type,
+register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr,
 				perf_callback_t triggered,
-				int cpu,
-				bool active);
+				int cpu);
 
 extern struct perf_event **
-register_wide_hw_breakpoint(unsigned long addr,
-			    int len,
-			    int type,
-			    perf_callback_t triggered,
-			    bool active);
+register_wide_hw_breakpoint(struct perf_event_attr *attr,
+			    perf_callback_t triggered);
 
 extern int register_perf_hw_breakpoint(struct perf_event *bp);
 extern int __register_perf_hw_breakpoint(struct perf_event *bp);
@@ -100,18 +101,12 @@ modify_user_hw_breakpoint(struct perf_event *bp,
 			  perf_callback_t triggered,
 			  struct task_struct *tsk)	{ return NULL; }
 static inline struct perf_event *
-register_wide_hw_breakpoint_cpu(unsigned long addr,
-				int len,
-				int type,
+register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr,
 				perf_callback_t triggered,
-				int cpu,
-				bool active)		{ return NULL; }
+				int cpu)		{ return NULL; }
 static inline struct perf_event **
-register_wide_hw_breakpoint(unsigned long addr,
-			    int len,
-			    int type,
-			    perf_callback_t triggered,
-			    bool active)		{ return NULL; }
+register_wide_hw_breakpoint(struct perf_event_attr *attr,
+			    perf_callback_t triggered)	{ return NULL; }
 static inline int
 register_perf_hw_breakpoint(struct perf_event *bp)	{ return -ENOSYS; }
 static inline int
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 2a47514f12fd..cf5ee1628411 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -339,42 +339,16 @@ void unregister_hw_breakpoint(struct perf_event *bp)
 }
 EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
 
-static struct perf_event *
-register_kernel_hw_breakpoint_cpu(unsigned long addr,
-				  int len,
-				  int type,
-				  perf_callback_t triggered,
-				  int cpu,
-				  bool active)
-{
-	DEFINE_BREAKPOINT_ATTR(attr);
-
-	attr.bp_addr = addr;
-	attr.bp_len = len;
-	attr.bp_type = type;
-
-	if (!active)
-		attr.disabled = 1;
-
-	return perf_event_create_kernel_counter(&attr, cpu, -1, triggered);
-}
-
 /**
  * register_wide_hw_breakpoint - register a wide breakpoint in the kernel
- * @addr: is the memory address that triggers the breakpoint
- * @len: the length of the access to the memory (1 byte, 2 bytes etc...)
- * @type: the type of the access to the memory (read/write/exec)
+ * @attr: breakpoint attributes
  * @triggered: callback to trigger when we hit the breakpoint
- * @active: should we activate it while registering it
  *
  * @return a set of per_cpu pointers to perf events
  */
 struct perf_event **
-register_wide_hw_breakpoint(unsigned long addr,
-			    int len,
-			    int type,
-			    perf_callback_t triggered,
-			    bool active)
+register_wide_hw_breakpoint(struct perf_event_attr *attr,
+			    perf_callback_t triggered)
 {
 	struct perf_event **cpu_events, **pevent, *bp;
 	long err;
@@ -386,8 +360,7 @@ register_wide_hw_breakpoint(unsigned long addr,
 
 	for_each_possible_cpu(cpu) {
 		pevent = per_cpu_ptr(cpu_events, cpu);
-		bp = register_kernel_hw_breakpoint_cpu(addr, len, type,
-					triggered, cpu, active);
+		bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered);
 
 		*pevent = bp;
 
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
index c538b15b95d6..ddfa0fd43bc0 100644
--- a/kernel/trace/trace_ksym.c
+++ b/kernel/trace/trace_ksym.c
@@ -42,9 +42,7 @@
 
 struct trace_ksym {
 	struct perf_event	**ksym_hbp;
-	unsigned long		ksym_addr;
-	int			type;
-	int			len;
+	struct perf_event_attr	attr;
 #ifdef CONFIG_PROFILE_KSYM_TRACER
 	unsigned long		counter;
 #endif
@@ -71,7 +69,7 @@ void ksym_collect_stats(unsigned long hbp_hit_addr)
 
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
-		if ((entry->ksym_addr == hbp_hit_addr) &&
+		if ((entry->attr.bp_addr == hbp_hit_addr) &&
 		    (entry->counter <= MAX_UL_INT)) {
 			entry->counter++;
 			break;
@@ -192,14 +190,15 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
 	if (!entry)
 		return -ENOMEM;
 
-	entry->type = op;
-	entry->ksym_addr = addr;
-	entry->len = HW_BREAKPOINT_LEN_4;
+	hw_breakpoint_init(&entry->attr);
+
+	entry->attr.bp_type = op;
+	entry->attr.bp_addr = addr;
+	entry->attr.bp_len = HW_BREAKPOINT_LEN_4;
 
 	ret = -EAGAIN;
-	entry->ksym_hbp = register_wide_hw_breakpoint(entry->ksym_addr,
-					entry->len, entry->type,
-					ksym_hbp_handler, true);
+	entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr,
+					ksym_hbp_handler);
 
 	if (IS_ERR(entry->ksym_hbp)) {
 		ret = PTR_ERR(entry->ksym_hbp);
@@ -236,12 +235,12 @@ static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
 	mutex_lock(&ksym_tracer_mutex);
 
 	hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
-		ret = trace_seq_printf(s, "%pS:", (void *)entry->ksym_addr);
-		if (entry->type == HW_BREAKPOINT_R)
+		ret = trace_seq_printf(s, "%pS:", (void *)entry->attr.bp_addr);
+		if (entry->attr.bp_type == HW_BREAKPOINT_R)
 			ret = trace_seq_puts(s, "r--\n");
-		else if (entry->type == HW_BREAKPOINT_W)
+		else if (entry->attr.bp_type == HW_BREAKPOINT_W)
 			ret = trace_seq_puts(s, "-w-\n");
-		else if (entry->type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R))
+		else if (entry->attr.bp_type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R))
 			ret = trace_seq_puts(s, "rw-\n");
 		WARN_ON_ONCE(!ret);
 	}
@@ -317,9 +316,9 @@ static ssize_t ksym_trace_filter_write(struct file *file,
 
 	ret = -EINVAL;
 	hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
-		if (entry->ksym_addr == ksym_addr) {
+		if (entry->attr.bp_addr == ksym_addr) {
 			/* Check for malformed request: (6) */
-			if (entry->type != op)
+			if (entry->attr.bp_type != op)
 				changed = 1;
 			else
 				goto out;
@@ -328,13 +327,12 @@ static ssize_t ksym_trace_filter_write(struct file *file,
 	}
 	if (changed) {
 		unregister_wide_hw_breakpoint(entry->ksym_hbp);
-		entry->type = op;
+		entry->attr.bp_type = op;
 		ret = 0;
 		if (op > 0) {
 			entry->ksym_hbp =
-				register_wide_hw_breakpoint(entry->ksym_addr,
-					entry->len, entry->type,
-					ksym_hbp_handler, true);
+				register_wide_hw_breakpoint(&entry->attr,
+					ksym_hbp_handler);
 			if (IS_ERR(entry->ksym_hbp))
 				ret = PTR_ERR(entry->ksym_hbp);
 			else
@@ -489,7 +487,7 @@ static int ksym_tracer_stat_show(struct seq_file *m, void *v)
 
 	entry = hlist_entry(stat, struct trace_ksym, ksym_hlist);
 
-	access_type = entry->type;
+	access_type = entry->attr.bp_type;
 
 	switch (access_type) {
 	case HW_BREAKPOINT_R:
@@ -505,7 +503,7 @@ static int ksym_tracer_stat_show(struct seq_file *m, void *v)
 		seq_puts(m, "  NA          ");
 	}
 
-	if (lookup_symbol_name(entry->ksym_addr, fn_name) >= 0)
+	if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0)
 		seq_printf(m, "  %-36s", fn_name);
 	else
 		seq_printf(m, "  %-36s", "<NA>");
diff --git a/samples/hw_breakpoint/data_breakpoint.c b/samples/hw_breakpoint/data_breakpoint.c
index ee7f9fbaffbd..29525500df00 100644
--- a/samples/hw_breakpoint/data_breakpoint.c
+++ b/samples/hw_breakpoint/data_breakpoint.c
@@ -51,13 +51,13 @@ static void sample_hbp_handler(struct perf_event *temp, void *data)
 static int __init hw_break_module_init(void)
 {
 	int ret;
-	unsigned long addr;
+	DEFINE_BREAKPOINT_ATTR(attr);
 
-	addr = kallsyms_lookup_name(ksym_name);
+	attr.bp_addr = kallsyms_lookup_name(ksym_name);
+	attr.bp_len = HW_BREAKPOINT_LEN_4;
+	attr.bp_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
 
-	sample_hbp = register_wide_hw_breakpoint(addr, HW_BREAKPOINT_LEN_4,
-						 HW_BREAKPOINT_W | HW_BREAKPOINT_R,
-						 sample_hbp_handler, true);
+	sample_hbp = register_wide_hw_breakpoint(&attr, sample_hbp_handler);
 	if (IS_ERR(sample_hbp)) {
 		ret = PTR_ERR(sample_hbp);
 		goto fail;
-- 
cgit v1.2.3-70-g09d2


From c8602edf3f9471466755329b78d309f2a01dd449 Mon Sep 17 00:00:00 2001
From: Thomas Kunze <thommycheck@gmx.de>
Date: Tue, 10 Feb 2009 14:54:57 +0100
Subject: move drivers/mfd/*.h to include/linux/mfd

So drivers like collie_battery driver can use
those files easier.
---
 drivers/mfd/mcp-core.c        |   2 +-
 drivers/mfd/mcp-sa11x0.c      |   2 +-
 drivers/mfd/mcp.h             |  66 -----------
 drivers/mfd/ucb1x00-assabet.c |   2 +-
 drivers/mfd/ucb1x00-core.c    |   2 +-
 drivers/mfd/ucb1x00-ts.c      |   2 +-
 drivers/mfd/ucb1x00.h         | 255 ------------------------------------------
 include/linux/mfd/mcp.h       |  68 +++++++++++
 include/linux/mfd/ucb1x00.h   | 255 ++++++++++++++++++++++++++++++++++++++++++
 9 files changed, 328 insertions(+), 326 deletions(-)
 delete mode 100644 drivers/mfd/mcp.h
 delete mode 100644 drivers/mfd/ucb1x00.h
 create mode 100644 include/linux/mfd/mcp.h
 create mode 100644 include/linux/mfd/ucb1x00.h

(limited to 'include')

diff --git a/drivers/mfd/mcp-core.c b/drivers/mfd/mcp-core.c
index 57271cb3b316..84815f9ef636 100644
--- a/drivers/mfd/mcp-core.c
+++ b/drivers/mfd/mcp-core.c
@@ -17,11 +17,11 @@
 #include <linux/device.h>
 #include <linux/slab.h>
 #include <linux/string.h>
+#include <linux/mfd/mcp.h>
 
 #include <mach/dma.h>
 #include <asm/system.h>
 
-#include "mcp.h"
 
 #define to_mcp(d)		container_of(d, struct mcp, attached_device)
 #define to_mcp_driver(d)	container_of(d, struct mcp_driver, drv)
diff --git a/drivers/mfd/mcp-sa11x0.c b/drivers/mfd/mcp-sa11x0.c
index 62b32dabf629..212189815c8e 100644
--- a/drivers/mfd/mcp-sa11x0.c
+++ b/drivers/mfd/mcp-sa11x0.c
@@ -19,6 +19,7 @@
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/platform_device.h>
+#include <linux/mfd/mcp.h>
 
 #include <mach/dma.h>
 #include <mach/hardware.h>
@@ -28,7 +29,6 @@
 
 #include <mach/assabet.h>
 
-#include "mcp.h"
 
 struct mcp_sa11x0 {
 	u32	mccr0;
diff --git a/drivers/mfd/mcp.h b/drivers/mfd/mcp.h
deleted file mode 100644
index c093a93b8808..000000000000
--- a/drivers/mfd/mcp.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- *  linux/drivers/mfd/mcp.h
- *
- *  Copyright (C) 2001 Russell King, All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License.
- */
-#ifndef MCP_H
-#define MCP_H
-
-struct mcp_ops;
-
-struct mcp {
-	struct module	*owner;
-	struct mcp_ops	*ops;
-	spinlock_t	lock;
-	int		use_count;
-	unsigned int	sclk_rate;
-	unsigned int	rw_timeout;
-	dma_device_t	dma_audio_rd;
-	dma_device_t	dma_audio_wr;
-	dma_device_t	dma_telco_rd;
-	dma_device_t	dma_telco_wr;
-	struct device	attached_device;
-};
-
-struct mcp_ops {
-	void		(*set_telecom_divisor)(struct mcp *, unsigned int);
-	void		(*set_audio_divisor)(struct mcp *, unsigned int);
-	void		(*reg_write)(struct mcp *, unsigned int, unsigned int);
-	unsigned int	(*reg_read)(struct mcp *, unsigned int);
-	void		(*enable)(struct mcp *);
-	void		(*disable)(struct mcp *);
-};
-
-void mcp_set_telecom_divisor(struct mcp *, unsigned int);
-void mcp_set_audio_divisor(struct mcp *, unsigned int);
-void mcp_reg_write(struct mcp *, unsigned int, unsigned int);
-unsigned int mcp_reg_read(struct mcp *, unsigned int);
-void mcp_enable(struct mcp *);
-void mcp_disable(struct mcp *);
-#define mcp_get_sclk_rate(mcp)	((mcp)->sclk_rate)
-
-struct mcp *mcp_host_alloc(struct device *, size_t);
-int mcp_host_register(struct mcp *);
-void mcp_host_unregister(struct mcp *);
-
-struct mcp_driver {
-	struct device_driver drv;
-	int (*probe)(struct mcp *);
-	void (*remove)(struct mcp *);
-	int (*suspend)(struct mcp *, pm_message_t);
-	int (*resume)(struct mcp *);
-};
-
-int mcp_driver_register(struct mcp_driver *);
-void mcp_driver_unregister(struct mcp_driver *);
-
-#define mcp_get_drvdata(mcp)	dev_get_drvdata(&(mcp)->attached_device)
-#define mcp_set_drvdata(mcp,d)	dev_set_drvdata(&(mcp)->attached_device, d)
-
-#define mcp_priv(mcp)		((void *)((mcp)+1))
-
-#endif
diff --git a/drivers/mfd/ucb1x00-assabet.c b/drivers/mfd/ucb1x00-assabet.c
index 86fed4870f93..cea9da60850d 100644
--- a/drivers/mfd/ucb1x00-assabet.c
+++ b/drivers/mfd/ucb1x00-assabet.c
@@ -14,10 +14,10 @@
 #include <linux/fs.h>
 #include <linux/proc_fs.h>
 #include <linux/device.h>
+#include <linux/mfd/ucb1x00.h>
 
 #include <mach/dma.h>
 
-#include "ucb1x00.h"
 
 #define UCB1X00_ATTR(name,input)\
 static ssize_t name##_show(struct device *dev, struct device_attribute *attr, \
diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c
index 60c3988f3cf3..f9de7891e57f 100644
--- a/drivers/mfd/ucb1x00-core.c
+++ b/drivers/mfd/ucb1x00-core.c
@@ -25,11 +25,11 @@
 #include <linux/interrupt.h>
 #include <linux/device.h>
 #include <linux/mutex.h>
+#include <linux/mfd/ucb1x00.h>
 
 #include <mach/dma.h>
 #include <mach/hardware.h>
 
-#include "ucb1x00.h"
 
 static DEFINE_MUTEX(ucb1x00_mutex);
 static LIST_HEAD(ucb1x00_drivers);
diff --git a/drivers/mfd/ucb1x00-ts.c b/drivers/mfd/ucb1x00-ts.c
index 61b7d3eb9a2f..000cb414a78a 100644
--- a/drivers/mfd/ucb1x00-ts.c
+++ b/drivers/mfd/ucb1x00-ts.c
@@ -30,12 +30,12 @@
 #include <linux/freezer.h>
 #include <linux/slab.h>
 #include <linux/kthread.h>
+#include <linux/mfd/ucb1x00.h>
 
 #include <mach/dma.h>
 #include <mach/collie.h>
 #include <asm/mach-types.h>
 
-#include "ucb1x00.h"
 
 
 struct ucb1x00_ts {
diff --git a/drivers/mfd/ucb1x00.h b/drivers/mfd/ucb1x00.h
deleted file mode 100644
index a8ad8a0ed5db..000000000000
--- a/drivers/mfd/ucb1x00.h
+++ /dev/null
@@ -1,255 +0,0 @@
-/*
- *  linux/drivers/mfd/ucb1x00.h
- *
- *  Copyright (C) 2001 Russell King, All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License.
- */
-#ifndef UCB1200_H
-#define UCB1200_H
-
-#define UCB_IO_DATA	0x00
-#define UCB_IO_DIR	0x01
-
-#define UCB_IO_0		(1 << 0)
-#define UCB_IO_1		(1 << 1)
-#define UCB_IO_2		(1 << 2)
-#define UCB_IO_3		(1 << 3)
-#define UCB_IO_4		(1 << 4)
-#define UCB_IO_5		(1 << 5)
-#define UCB_IO_6		(1 << 6)
-#define UCB_IO_7		(1 << 7)
-#define UCB_IO_8		(1 << 8)
-#define UCB_IO_9		(1 << 9)
-
-#define UCB_IE_RIS	0x02
-#define UCB_IE_FAL	0x03
-#define UCB_IE_STATUS	0x04
-#define UCB_IE_CLEAR	0x04
-#define UCB_IE_ADC		(1 << 11)
-#define UCB_IE_TSPX		(1 << 12)
-#define UCB_IE_TSMX		(1 << 13)
-#define UCB_IE_TCLIP		(1 << 14)
-#define UCB_IE_ACLIP		(1 << 15)
-
-#define UCB_IRQ_TSPX		12
-
-#define UCB_TC_A	0x05
-#define UCB_TC_A_LOOP		(1 << 7)	/* UCB1200 */
-#define UCB_TC_A_AMPL		(1 << 7)	/* UCB1300 */
-
-#define UCB_TC_B	0x06
-#define UCB_TC_B_VOICE_ENA	(1 << 3)
-#define UCB_TC_B_CLIP		(1 << 4)
-#define UCB_TC_B_ATT		(1 << 6)
-#define UCB_TC_B_SIDE_ENA	(1 << 11)
-#define UCB_TC_B_MUTE		(1 << 13)
-#define UCB_TC_B_IN_ENA		(1 << 14)
-#define UCB_TC_B_OUT_ENA	(1 << 15)
-
-#define UCB_AC_A	0x07
-#define UCB_AC_B	0x08
-#define UCB_AC_B_LOOP		(1 << 8)
-#define UCB_AC_B_MUTE		(1 << 13)
-#define UCB_AC_B_IN_ENA		(1 << 14)
-#define UCB_AC_B_OUT_ENA	(1 << 15)
-
-#define UCB_TS_CR	0x09
-#define UCB_TS_CR_TSMX_POW	(1 << 0)
-#define UCB_TS_CR_TSPX_POW	(1 << 1)
-#define UCB_TS_CR_TSMY_POW	(1 << 2)
-#define UCB_TS_CR_TSPY_POW	(1 << 3)
-#define UCB_TS_CR_TSMX_GND	(1 << 4)
-#define UCB_TS_CR_TSPX_GND	(1 << 5)
-#define UCB_TS_CR_TSMY_GND	(1 << 6)
-#define UCB_TS_CR_TSPY_GND	(1 << 7)
-#define UCB_TS_CR_MODE_INT	(0 << 8)
-#define UCB_TS_CR_MODE_PRES	(1 << 8)
-#define UCB_TS_CR_MODE_POS	(2 << 8)
-#define UCB_TS_CR_BIAS_ENA	(1 << 11)
-#define UCB_TS_CR_TSPX_LOW	(1 << 12)
-#define UCB_TS_CR_TSMX_LOW	(1 << 13)
-
-#define UCB_ADC_CR	0x0a
-#define UCB_ADC_SYNC_ENA	(1 << 0)
-#define UCB_ADC_VREFBYP_CON	(1 << 1)
-#define UCB_ADC_INP_TSPX	(0 << 2)
-#define UCB_ADC_INP_TSMX	(1 << 2)
-#define UCB_ADC_INP_TSPY	(2 << 2)
-#define UCB_ADC_INP_TSMY	(3 << 2)
-#define UCB_ADC_INP_AD0		(4 << 2)
-#define UCB_ADC_INP_AD1		(5 << 2)
-#define UCB_ADC_INP_AD2		(6 << 2)
-#define UCB_ADC_INP_AD3		(7 << 2)
-#define UCB_ADC_EXT_REF		(1 << 5)
-#define UCB_ADC_START		(1 << 7)
-#define UCB_ADC_ENA		(1 << 15)
-
-#define UCB_ADC_DATA	0x0b
-#define UCB_ADC_DAT_VAL		(1 << 15)
-#define UCB_ADC_DAT(x)		(((x) & 0x7fe0) >> 5)
-
-#define UCB_ID		0x0c
-#define UCB_ID_1200		0x1004
-#define UCB_ID_1300		0x1005
-#define UCB_ID_TC35143          0x9712
-
-#define UCB_MODE	0x0d
-#define UCB_MODE_DYN_VFLAG_ENA	(1 << 12)
-#define UCB_MODE_AUD_OFF_CAN	(1 << 13)
-
-#include "mcp.h"
-
-struct ucb1x00_irq {
-	void *devid;
-	void (*fn)(int, void *);
-};
-
-struct ucb1x00 {
-	spinlock_t		lock;
-	struct mcp		*mcp;
-	unsigned int		irq;
-	struct semaphore	adc_sem;
-	spinlock_t		io_lock;
-	u16			id;
-	u16			io_dir;
-	u16			io_out;
-	u16			adc_cr;
-	u16			irq_fal_enbl;
-	u16			irq_ris_enbl;
-	struct ucb1x00_irq	irq_handler[16];
-	struct device		dev;
-	struct list_head	node;
-	struct list_head	devs;
-};
-
-struct ucb1x00_driver;
-
-struct ucb1x00_dev {
-	struct list_head	dev_node;
-	struct list_head	drv_node;
-	struct ucb1x00		*ucb;
-	struct ucb1x00_driver	*drv;
-	void			*priv;
-};
-
-struct ucb1x00_driver {
-	struct list_head	node;
-	struct list_head	devs;
-	int	(*add)(struct ucb1x00_dev *dev);
-	void	(*remove)(struct ucb1x00_dev *dev);
-	int	(*suspend)(struct ucb1x00_dev *dev, pm_message_t state);
-	int	(*resume)(struct ucb1x00_dev *dev);
-};
-
-#define classdev_to_ucb1x00(cd)	container_of(cd, struct ucb1x00, dev)
-
-int ucb1x00_register_driver(struct ucb1x00_driver *);
-void ucb1x00_unregister_driver(struct ucb1x00_driver *);
-
-/**
- *	ucb1x00_clkrate - return the UCB1x00 SIB clock rate
- *	@ucb: UCB1x00 structure describing chip
- *
- *	Return the SIB clock rate in Hz.
- */
-static inline unsigned int ucb1x00_clkrate(struct ucb1x00 *ucb)
-{
-	return mcp_get_sclk_rate(ucb->mcp);
-}
-
-/**
- *	ucb1x00_enable - enable the UCB1x00 SIB clock
- *	@ucb: UCB1x00 structure describing chip
- *
- *	Enable the SIB clock.  This can be called multiple times.
- */
-static inline void ucb1x00_enable(struct ucb1x00 *ucb)
-{
-	mcp_enable(ucb->mcp);
-}
-
-/**
- *	ucb1x00_disable - disable the UCB1x00 SIB clock
- *	@ucb: UCB1x00 structure describing chip
- *
- *	Disable the SIB clock.  The SIB clock will only be disabled
- *	when the number of ucb1x00_enable calls match the number of
- *	ucb1x00_disable calls.
- */
-static inline void ucb1x00_disable(struct ucb1x00 *ucb)
-{
-	mcp_disable(ucb->mcp);
-}
-
-/**
- *	ucb1x00_reg_write - write a UCB1x00 register
- *	@ucb: UCB1x00 structure describing chip
- *	@reg: UCB1x00 4-bit register index to write
- *	@val: UCB1x00 16-bit value to write
- *
- *	Write the UCB1x00 register @reg with value @val.  The SIB
- *	clock must be running for this function to return.
- */
-static inline void ucb1x00_reg_write(struct ucb1x00 *ucb, unsigned int reg, unsigned int val)
-{
-	mcp_reg_write(ucb->mcp, reg, val);
-}
-
-/**
- *	ucb1x00_reg_read - read a UCB1x00 register
- *	@ucb: UCB1x00 structure describing chip
- *	@reg: UCB1x00 4-bit register index to write
- *
- *	Read the UCB1x00 register @reg and return its value.  The SIB
- *	clock must be running for this function to return.
- */
-static inline unsigned int ucb1x00_reg_read(struct ucb1x00 *ucb, unsigned int reg)
-{
-	return mcp_reg_read(ucb->mcp, reg);
-}
-/**
- *	ucb1x00_set_audio_divisor - 
- *	@ucb: UCB1x00 structure describing chip
- *	@div: SIB clock divisor
- */
-static inline void ucb1x00_set_audio_divisor(struct ucb1x00 *ucb, unsigned int div)
-{
-	mcp_set_audio_divisor(ucb->mcp, div);
-}
-
-/**
- *	ucb1x00_set_telecom_divisor -
- *	@ucb: UCB1x00 structure describing chip
- *	@div: SIB clock divisor
- */
-static inline void ucb1x00_set_telecom_divisor(struct ucb1x00 *ucb, unsigned int div)
-{
-	mcp_set_telecom_divisor(ucb->mcp, div);
-}
-
-void ucb1x00_io_set_dir(struct ucb1x00 *ucb, unsigned int, unsigned int);
-void ucb1x00_io_write(struct ucb1x00 *ucb, unsigned int, unsigned int);
-unsigned int ucb1x00_io_read(struct ucb1x00 *ucb);
-
-#define UCB_NOSYNC	(0)
-#define UCB_SYNC	(1)
-
-unsigned int ucb1x00_adc_read(struct ucb1x00 *ucb, int adc_channel, int sync);
-void ucb1x00_adc_enable(struct ucb1x00 *ucb);
-void ucb1x00_adc_disable(struct ucb1x00 *ucb);
-
-/*
- * Which edges of the IRQ do you want to control today?
- */
-#define UCB_RISING	(1 << 0)
-#define UCB_FALLING	(1 << 1)
-
-int ucb1x00_hook_irq(struct ucb1x00 *ucb, unsigned int idx, void (*fn)(int, void *), void *devid);
-void ucb1x00_enable_irq(struct ucb1x00 *ucb, unsigned int idx, int edges);
-void ucb1x00_disable_irq(struct ucb1x00 *ucb, unsigned int idx, int edges);
-int ucb1x00_free_irq(struct ucb1x00 *ucb, unsigned int idx, void *devid);
-
-#endif
diff --git a/include/linux/mfd/mcp.h b/include/linux/mfd/mcp.h
new file mode 100644
index 000000000000..be95e09fd746
--- /dev/null
+++ b/include/linux/mfd/mcp.h
@@ -0,0 +1,68 @@
+/*
+ *  linux/drivers/mfd/mcp.h
+ *
+ *  Copyright (C) 2001 Russell King, All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ */
+#ifndef MCP_H
+#define MCP_H
+
+#include <mach/dma.h>
+
+struct mcp_ops;
+
+struct mcp {
+	struct module	*owner;
+	struct mcp_ops	*ops;
+	spinlock_t	lock;
+	int		use_count;
+	unsigned int	sclk_rate;
+	unsigned int	rw_timeout;
+	dma_device_t	dma_audio_rd;
+	dma_device_t	dma_audio_wr;
+	dma_device_t	dma_telco_rd;
+	dma_device_t	dma_telco_wr;
+	struct device	attached_device;
+};
+
+struct mcp_ops {
+	void		(*set_telecom_divisor)(struct mcp *, unsigned int);
+	void		(*set_audio_divisor)(struct mcp *, unsigned int);
+	void		(*reg_write)(struct mcp *, unsigned int, unsigned int);
+	unsigned int	(*reg_read)(struct mcp *, unsigned int);
+	void		(*enable)(struct mcp *);
+	void		(*disable)(struct mcp *);
+};
+
+void mcp_set_telecom_divisor(struct mcp *, unsigned int);
+void mcp_set_audio_divisor(struct mcp *, unsigned int);
+void mcp_reg_write(struct mcp *, unsigned int, unsigned int);
+unsigned int mcp_reg_read(struct mcp *, unsigned int);
+void mcp_enable(struct mcp *);
+void mcp_disable(struct mcp *);
+#define mcp_get_sclk_rate(mcp)	((mcp)->sclk_rate)
+
+struct mcp *mcp_host_alloc(struct device *, size_t);
+int mcp_host_register(struct mcp *);
+void mcp_host_unregister(struct mcp *);
+
+struct mcp_driver {
+	struct device_driver drv;
+	int (*probe)(struct mcp *);
+	void (*remove)(struct mcp *);
+	int (*suspend)(struct mcp *, pm_message_t);
+	int (*resume)(struct mcp *);
+};
+
+int mcp_driver_register(struct mcp_driver *);
+void mcp_driver_unregister(struct mcp_driver *);
+
+#define mcp_get_drvdata(mcp)	dev_get_drvdata(&(mcp)->attached_device)
+#define mcp_set_drvdata(mcp,d)	dev_set_drvdata(&(mcp)->attached_device, d)
+
+#define mcp_priv(mcp)		((void *)((mcp)+1))
+
+#endif
diff --git a/include/linux/mfd/ucb1x00.h b/include/linux/mfd/ucb1x00.h
new file mode 100644
index 000000000000..eac346336382
--- /dev/null
+++ b/include/linux/mfd/ucb1x00.h
@@ -0,0 +1,255 @@
+/*
+ *  linux/include/mfd/ucb1x00.h
+ *
+ *  Copyright (C) 2001 Russell King, All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ */
+#ifndef UCB1200_H
+#define UCB1200_H
+
+#include <linux/mfd/mcp.h>
+#define UCB_IO_DATA	0x00
+#define UCB_IO_DIR	0x01
+
+#define UCB_IO_0		(1 << 0)
+#define UCB_IO_1		(1 << 1)
+#define UCB_IO_2		(1 << 2)
+#define UCB_IO_3		(1 << 3)
+#define UCB_IO_4		(1 << 4)
+#define UCB_IO_5		(1 << 5)
+#define UCB_IO_6		(1 << 6)
+#define UCB_IO_7		(1 << 7)
+#define UCB_IO_8		(1 << 8)
+#define UCB_IO_9		(1 << 9)
+
+#define UCB_IE_RIS	0x02
+#define UCB_IE_FAL	0x03
+#define UCB_IE_STATUS	0x04
+#define UCB_IE_CLEAR	0x04
+#define UCB_IE_ADC		(1 << 11)
+#define UCB_IE_TSPX		(1 << 12)
+#define UCB_IE_TSMX		(1 << 13)
+#define UCB_IE_TCLIP		(1 << 14)
+#define UCB_IE_ACLIP		(1 << 15)
+
+#define UCB_IRQ_TSPX		12
+
+#define UCB_TC_A	0x05
+#define UCB_TC_A_LOOP		(1 << 7)	/* UCB1200 */
+#define UCB_TC_A_AMPL		(1 << 7)	/* UCB1300 */
+
+#define UCB_TC_B	0x06
+#define UCB_TC_B_VOICE_ENA	(1 << 3)
+#define UCB_TC_B_CLIP		(1 << 4)
+#define UCB_TC_B_ATT		(1 << 6)
+#define UCB_TC_B_SIDE_ENA	(1 << 11)
+#define UCB_TC_B_MUTE		(1 << 13)
+#define UCB_TC_B_IN_ENA		(1 << 14)
+#define UCB_TC_B_OUT_ENA	(1 << 15)
+
+#define UCB_AC_A	0x07
+#define UCB_AC_B	0x08
+#define UCB_AC_B_LOOP		(1 << 8)
+#define UCB_AC_B_MUTE		(1 << 13)
+#define UCB_AC_B_IN_ENA		(1 << 14)
+#define UCB_AC_B_OUT_ENA	(1 << 15)
+
+#define UCB_TS_CR	0x09
+#define UCB_TS_CR_TSMX_POW	(1 << 0)
+#define UCB_TS_CR_TSPX_POW	(1 << 1)
+#define UCB_TS_CR_TSMY_POW	(1 << 2)
+#define UCB_TS_CR_TSPY_POW	(1 << 3)
+#define UCB_TS_CR_TSMX_GND	(1 << 4)
+#define UCB_TS_CR_TSPX_GND	(1 << 5)
+#define UCB_TS_CR_TSMY_GND	(1 << 6)
+#define UCB_TS_CR_TSPY_GND	(1 << 7)
+#define UCB_TS_CR_MODE_INT	(0 << 8)
+#define UCB_TS_CR_MODE_PRES	(1 << 8)
+#define UCB_TS_CR_MODE_POS	(2 << 8)
+#define UCB_TS_CR_BIAS_ENA	(1 << 11)
+#define UCB_TS_CR_TSPX_LOW	(1 << 12)
+#define UCB_TS_CR_TSMX_LOW	(1 << 13)
+
+#define UCB_ADC_CR	0x0a
+#define UCB_ADC_SYNC_ENA	(1 << 0)
+#define UCB_ADC_VREFBYP_CON	(1 << 1)
+#define UCB_ADC_INP_TSPX	(0 << 2)
+#define UCB_ADC_INP_TSMX	(1 << 2)
+#define UCB_ADC_INP_TSPY	(2 << 2)
+#define UCB_ADC_INP_TSMY	(3 << 2)
+#define UCB_ADC_INP_AD0		(4 << 2)
+#define UCB_ADC_INP_AD1		(5 << 2)
+#define UCB_ADC_INP_AD2		(6 << 2)
+#define UCB_ADC_INP_AD3		(7 << 2)
+#define UCB_ADC_EXT_REF		(1 << 5)
+#define UCB_ADC_START		(1 << 7)
+#define UCB_ADC_ENA		(1 << 15)
+
+#define UCB_ADC_DATA	0x0b
+#define UCB_ADC_DAT_VAL		(1 << 15)
+#define UCB_ADC_DAT(x)		(((x) & 0x7fe0) >> 5)
+
+#define UCB_ID		0x0c
+#define UCB_ID_1200		0x1004
+#define UCB_ID_1300		0x1005
+#define UCB_ID_TC35143          0x9712
+
+#define UCB_MODE	0x0d
+#define UCB_MODE_DYN_VFLAG_ENA	(1 << 12)
+#define UCB_MODE_AUD_OFF_CAN	(1 << 13)
+
+
+struct ucb1x00_irq {
+	void *devid;
+	void (*fn)(int, void *);
+};
+
+struct ucb1x00 {
+	spinlock_t		lock;
+	struct mcp		*mcp;
+	unsigned int		irq;
+	struct semaphore	adc_sem;
+	spinlock_t		io_lock;
+	u16			id;
+	u16			io_dir;
+	u16			io_out;
+	u16			adc_cr;
+	u16			irq_fal_enbl;
+	u16			irq_ris_enbl;
+	struct ucb1x00_irq	irq_handler[16];
+	struct device		dev;
+	struct list_head	node;
+	struct list_head	devs;
+};
+
+struct ucb1x00_driver;
+
+struct ucb1x00_dev {
+	struct list_head	dev_node;
+	struct list_head	drv_node;
+	struct ucb1x00		*ucb;
+	struct ucb1x00_driver	*drv;
+	void			*priv;
+};
+
+struct ucb1x00_driver {
+	struct list_head	node;
+	struct list_head	devs;
+	int	(*add)(struct ucb1x00_dev *dev);
+	void	(*remove)(struct ucb1x00_dev *dev);
+	int	(*suspend)(struct ucb1x00_dev *dev, pm_message_t state);
+	int	(*resume)(struct ucb1x00_dev *dev);
+};
+
+#define classdev_to_ucb1x00(cd)	container_of(cd, struct ucb1x00, dev)
+
+int ucb1x00_register_driver(struct ucb1x00_driver *);
+void ucb1x00_unregister_driver(struct ucb1x00_driver *);
+
+/**
+ *	ucb1x00_clkrate - return the UCB1x00 SIB clock rate
+ *	@ucb: UCB1x00 structure describing chip
+ *
+ *	Return the SIB clock rate in Hz.
+ */
+static inline unsigned int ucb1x00_clkrate(struct ucb1x00 *ucb)
+{
+	return mcp_get_sclk_rate(ucb->mcp);
+}
+
+/**
+ *	ucb1x00_enable - enable the UCB1x00 SIB clock
+ *	@ucb: UCB1x00 structure describing chip
+ *
+ *	Enable the SIB clock.  This can be called multiple times.
+ */
+static inline void ucb1x00_enable(struct ucb1x00 *ucb)
+{
+	mcp_enable(ucb->mcp);
+}
+
+/**
+ *	ucb1x00_disable - disable the UCB1x00 SIB clock
+ *	@ucb: UCB1x00 structure describing chip
+ *
+ *	Disable the SIB clock.  The SIB clock will only be disabled
+ *	when the number of ucb1x00_enable calls match the number of
+ *	ucb1x00_disable calls.
+ */
+static inline void ucb1x00_disable(struct ucb1x00 *ucb)
+{
+	mcp_disable(ucb->mcp);
+}
+
+/**
+ *	ucb1x00_reg_write - write a UCB1x00 register
+ *	@ucb: UCB1x00 structure describing chip
+ *	@reg: UCB1x00 4-bit register index to write
+ *	@val: UCB1x00 16-bit value to write
+ *
+ *	Write the UCB1x00 register @reg with value @val.  The SIB
+ *	clock must be running for this function to return.
+ */
+static inline void ucb1x00_reg_write(struct ucb1x00 *ucb, unsigned int reg, unsigned int val)
+{
+	mcp_reg_write(ucb->mcp, reg, val);
+}
+
+/**
+ *	ucb1x00_reg_read - read a UCB1x00 register
+ *	@ucb: UCB1x00 structure describing chip
+ *	@reg: UCB1x00 4-bit register index to write
+ *
+ *	Read the UCB1x00 register @reg and return its value.  The SIB
+ *	clock must be running for this function to return.
+ */
+static inline unsigned int ucb1x00_reg_read(struct ucb1x00 *ucb, unsigned int reg)
+{
+	return mcp_reg_read(ucb->mcp, reg);
+}
+/**
+ *	ucb1x00_set_audio_divisor - 
+ *	@ucb: UCB1x00 structure describing chip
+ *	@div: SIB clock divisor
+ */
+static inline void ucb1x00_set_audio_divisor(struct ucb1x00 *ucb, unsigned int div)
+{
+	mcp_set_audio_divisor(ucb->mcp, div);
+}
+
+/**
+ *	ucb1x00_set_telecom_divisor -
+ *	@ucb: UCB1x00 structure describing chip
+ *	@div: SIB clock divisor
+ */
+static inline void ucb1x00_set_telecom_divisor(struct ucb1x00 *ucb, unsigned int div)
+{
+	mcp_set_telecom_divisor(ucb->mcp, div);
+}
+
+void ucb1x00_io_set_dir(struct ucb1x00 *ucb, unsigned int, unsigned int);
+void ucb1x00_io_write(struct ucb1x00 *ucb, unsigned int, unsigned int);
+unsigned int ucb1x00_io_read(struct ucb1x00 *ucb);
+
+#define UCB_NOSYNC	(0)
+#define UCB_SYNC	(1)
+
+unsigned int ucb1x00_adc_read(struct ucb1x00 *ucb, int adc_channel, int sync);
+void ucb1x00_adc_enable(struct ucb1x00 *ucb);
+void ucb1x00_adc_disable(struct ucb1x00 *ucb);
+
+/*
+ * Which edges of the IRQ do you want to control today?
+ */
+#define UCB_RISING	(1 << 0)
+#define UCB_FALLING	(1 << 1)
+
+int ucb1x00_hook_irq(struct ucb1x00 *ucb, unsigned int idx, void (*fn)(int, void *), void *devid);
+void ucb1x00_enable_irq(struct ucb1x00 *ucb, unsigned int idx, int edges);
+void ucb1x00_disable_irq(struct ucb1x00 *ucb, unsigned int idx, int edges);
+int ucb1x00_free_irq(struct ucb1x00 *ucb, unsigned int idx, void *devid);
+
+#endif
-- 
cgit v1.2.3-70-g09d2


From 9ca3dc805cd0d89c44f88b9a399061946781323a Mon Sep 17 00:00:00 2001
From: Thomas Kunze <thommycheck@gmx.de>
Date: Tue, 10 Feb 2009 14:50:56 +0100
Subject: add gpiolib support to ucb1x00

The old access methods to the gpios will be removed when
all users has been converted. (mainly ucb1x00-ts)
---
 arch/arm/mach-sa1100/include/mach/mcp.h |  1 +
 drivers/mfd/mcp-sa11x0.c                |  1 +
 drivers/mfd/ucb1x00-core.c              | 87 ++++++++++++++++++++++++++++++++-
 include/linux/mfd/mcp.h                 |  1 +
 include/linux/mfd/ucb1x00.h             |  3 ++
 5 files changed, 91 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-sa1100/include/mach/mcp.h b/arch/arm/mach-sa1100/include/mach/mcp.h
index fb8b09a57ad7..ed1a331508a7 100644
--- a/arch/arm/mach-sa1100/include/mach/mcp.h
+++ b/arch/arm/mach-sa1100/include/mach/mcp.h
@@ -16,6 +16,7 @@ struct mcp_plat_data {
 	u32 mccr0;
 	u32 mccr1;
 	unsigned int sclk_rate;
+	int gpio_base;
 };
 
 #endif
diff --git a/drivers/mfd/mcp-sa11x0.c b/drivers/mfd/mcp-sa11x0.c
index 212189815c8e..258427232728 100644
--- a/drivers/mfd/mcp-sa11x0.c
+++ b/drivers/mfd/mcp-sa11x0.c
@@ -163,6 +163,7 @@ static int mcp_sa11x0_probe(struct platform_device *pdev)
 	mcp->dma_audio_wr	= DMA_Ser4MCP0Wr;
 	mcp->dma_telco_rd	= DMA_Ser4MCP1Rd;
 	mcp->dma_telco_wr	= DMA_Ser4MCP1Wr;
+	mcp->gpio_base		= data->gpio_base;
 
 	platform_set_drvdata(pdev, mcp);
 
diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c
index f9de7891e57f..252b74188ec2 100644
--- a/drivers/mfd/ucb1x00-core.c
+++ b/drivers/mfd/ucb1x00-core.c
@@ -26,11 +26,11 @@
 #include <linux/device.h>
 #include <linux/mutex.h>
 #include <linux/mfd/ucb1x00.h>
+#include <linux/gpio.h>
 
 #include <mach/dma.h>
 #include <mach/hardware.h>
 
-
 static DEFINE_MUTEX(ucb1x00_mutex);
 static LIST_HEAD(ucb1x00_drivers);
 static LIST_HEAD(ucb1x00_devices);
@@ -108,6 +108,60 @@ unsigned int ucb1x00_io_read(struct ucb1x00 *ucb)
 	return ucb1x00_reg_read(ucb, UCB_IO_DATA);
 }
 
+static void ucb1x00_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+{
+	struct ucb1x00 *ucb = container_of(chip, struct ucb1x00, gpio);
+	unsigned long flags;
+
+	spin_lock_irqsave(&ucb->io_lock, flags);
+	if (value)
+		ucb->io_out |= 1 << offset;
+	else
+		ucb->io_out &= ~(1 << offset);
+
+	ucb1x00_reg_write(ucb, UCB_IO_DATA, ucb->io_out);
+	spin_unlock_irqrestore(&ucb->io_lock, flags);
+}
+
+static int ucb1x00_gpio_get(struct gpio_chip *chip, unsigned offset)
+{
+	struct ucb1x00 *ucb = container_of(chip, struct ucb1x00, gpio);
+	return ucb1x00_reg_read(ucb, UCB_IO_DATA) & (1 << offset);
+}
+
+static int ucb1x00_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
+{
+	struct ucb1x00 *ucb = container_of(chip, struct ucb1x00, gpio);
+	unsigned long flags;
+
+	spin_lock_irqsave(&ucb->io_lock, flags);
+	ucb->io_dir &= ~(1 << offset);
+	ucb1x00_reg_write(ucb, UCB_IO_DIR, ucb->io_dir);
+	spin_unlock_irqrestore(&ucb->io_lock, flags);
+
+	return 0;
+}
+
+static int ucb1x00_gpio_direction_output(struct gpio_chip *chip, unsigned offset
+		, int value)
+{
+	struct ucb1x00 *ucb = container_of(chip, struct ucb1x00, gpio);
+	unsigned long flags;
+
+	spin_lock_irqsave(&ucb->io_lock, flags);
+	ucb->io_dir |= (1 << offset);
+	ucb1x00_reg_write(ucb, UCB_IO_DIR, ucb->io_dir);
+
+	if (value)
+		ucb->io_out |= 1 << offset;
+	else
+		ucb->io_out &= ~(1 << offset);
+	ucb1x00_reg_write(ucb, UCB_IO_DATA, ucb->io_out);
+	spin_unlock_irqrestore(&ucb->io_lock, flags);
+
+	return 0;
+}
+
 /*
  * UCB1300 data sheet says we must:
  *  1. enable ADC	=> 5us (including reference startup time)
@@ -476,6 +530,7 @@ static int ucb1x00_probe(struct mcp *mcp)
 	struct ucb1x00_driver *drv;
 	unsigned int id;
 	int ret = -ENODEV;
+	int temp;
 
 	mcp_enable(mcp);
 	id = mcp_reg_read(mcp, UCB_ID);
@@ -508,12 +563,27 @@ static int ucb1x00_probe(struct mcp *mcp)
 		goto err_free;
 	}
 
+	ucb->gpio.base = -1;
+	if (mcp->gpio_base != 0) {
+		ucb->gpio.label = dev_name(&ucb->dev);
+		ucb->gpio.base = mcp->gpio_base;
+		ucb->gpio.ngpio = 10;
+		ucb->gpio.set = ucb1x00_gpio_set;
+		ucb->gpio.get = ucb1x00_gpio_get;
+		ucb->gpio.direction_input = ucb1x00_gpio_direction_input;
+		ucb->gpio.direction_output = ucb1x00_gpio_direction_output;
+		ret = gpiochip_add(&ucb->gpio);
+		if (ret)
+			goto err_free;
+	} else
+		dev_info(&ucb->dev, "gpio_base not set so no gpiolib support");
+
 	ret = request_irq(ucb->irq, ucb1x00_irq, IRQF_TRIGGER_RISING,
 			  "UCB1x00", ucb);
 	if (ret) {
 		printk(KERN_ERR "ucb1x00: unable to grab irq%d: %d\n",
 			ucb->irq, ret);
-		goto err_free;
+		goto err_gpio;
 	}
 
 	mcp_set_drvdata(mcp, ucb);
@@ -522,6 +592,7 @@ static int ucb1x00_probe(struct mcp *mcp)
 	if (ret)
 		goto err_irq;
 
+
 	INIT_LIST_HEAD(&ucb->devs);
 	mutex_lock(&ucb1x00_mutex);
 	list_add(&ucb->node, &ucb1x00_devices);
@@ -529,10 +600,14 @@ static int ucb1x00_probe(struct mcp *mcp)
 		ucb1x00_add_dev(ucb, drv);
 	}
 	mutex_unlock(&ucb1x00_mutex);
+
 	goto out;
 
  err_irq:
 	free_irq(ucb->irq, ucb);
+ err_gpio:
+	if (ucb->gpio.base != -1)
+		temp = gpiochip_remove(&ucb->gpio);
  err_free:
 	kfree(ucb);
  err_disable:
@@ -545,6 +620,7 @@ static void ucb1x00_remove(struct mcp *mcp)
 {
 	struct ucb1x00 *ucb = mcp_get_drvdata(mcp);
 	struct list_head *l, *n;
+	int ret;
 
 	mutex_lock(&ucb1x00_mutex);
 	list_del(&ucb->node);
@@ -554,6 +630,12 @@ static void ucb1x00_remove(struct mcp *mcp)
 	}
 	mutex_unlock(&ucb1x00_mutex);
 
+	if (ucb->gpio.base != -1) {
+		ret = gpiochip_remove(&ucb->gpio);
+		if (ret)
+			dev_err(&ucb->dev, "Can't remove gpio chip: %d\n", ret);
+	}
+
 	free_irq(ucb->irq, ucb);
 	device_unregister(&ucb->dev);
 }
@@ -604,6 +686,7 @@ static int ucb1x00_resume(struct mcp *mcp)
 	struct ucb1x00 *ucb = mcp_get_drvdata(mcp);
 	struct ucb1x00_dev *dev;
 
+	ucb1x00_reg_write(ucb, UCB_IO_DIR, ucb->io_dir);
 	mutex_lock(&ucb1x00_mutex);
 	list_for_each_entry(dev, &ucb->devs, dev_node) {
 		if (dev->drv->resume)
diff --git a/include/linux/mfd/mcp.h b/include/linux/mfd/mcp.h
index be95e09fd746..ee496708e38b 100644
--- a/include/linux/mfd/mcp.h
+++ b/include/linux/mfd/mcp.h
@@ -26,6 +26,7 @@ struct mcp {
 	dma_device_t	dma_telco_rd;
 	dma_device_t	dma_telco_wr;
 	struct device	attached_device;
+	int		gpio_base;
 };
 
 struct mcp_ops {
diff --git a/include/linux/mfd/ucb1x00.h b/include/linux/mfd/ucb1x00.h
index eac346336382..aa9c3789bed4 100644
--- a/include/linux/mfd/ucb1x00.h
+++ b/include/linux/mfd/ucb1x00.h
@@ -11,6 +11,8 @@
 #define UCB1200_H
 
 #include <linux/mfd/mcp.h>
+#include <linux/gpio.h>
+
 #define UCB_IO_DATA	0x00
 #define UCB_IO_DIR	0x01
 
@@ -123,6 +125,7 @@ struct ucb1x00 {
 	struct device		dev;
 	struct list_head	node;
 	struct list_head	devs;
+	struct gpio_chip 	gpio;
 };
 
 struct ucb1x00_driver;
-- 
cgit v1.2.3-70-g09d2


From f5560da549ea2e32dd41e36548c0e7dee3d4aabb Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Wed, 13 Dec 2006 19:46:38 +0900
Subject: pcmcia: Pass struct pcmcia_device to pcmcia_release_window()

No logic changes, just pass struct pcmcia_device to pcmcia_release_window().

[linux@dominikbrodowski.net: update to 2.6.31]
CC: netdev@vger.kernel.org
CC: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 drivers/char/pcmcia/ipwireless/main.c | 14 +++++++-------
 drivers/mtd/maps/pcmciamtd.c          |  2 +-
 drivers/net/pcmcia/fmvj18x_cs.c       |  4 ++--
 drivers/net/pcmcia/ibmtr_cs.c         |  2 +-
 drivers/net/pcmcia/pcnet_cs.c         |  4 ++--
 drivers/net/wireless/ray_cs.c         |  4 ++--
 drivers/pcmcia/pcmcia_resource.c      |  5 +++--
 include/pcmcia/ds.h                   |  2 +-
 8 files changed, 19 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/drivers/char/pcmcia/ipwireless/main.c b/drivers/char/pcmcia/ipwireless/main.c
index 24bffa4ece49..91ca23d2b042 100644
--- a/drivers/char/pcmcia/ipwireless/main.c
+++ b/drivers/char/pcmcia/ipwireless/main.c
@@ -168,15 +168,15 @@ static int ipwireless_probe(struct pcmcia_device *p_dev,
 	return 0;
 
 exit3:
-	pcmcia_release_window(ipw->handle_attr_memory);
+	pcmcia_release_window(p_dev, ipw->handle_attr_memory);
 exit2:
 	if (ipw->common_memory) {
 		release_mem_region(ipw->request_common_memory.Base,
 				ipw->request_common_memory.Size);
 		iounmap(ipw->common_memory);
-		pcmcia_release_window(ipw->handle_common_memory);
+		pcmcia_release_window(p_dev, ipw->handle_common_memory);
 	} else
-		pcmcia_release_window(ipw->handle_common_memory);
+		pcmcia_release_window(p_dev, ipw->handle_common_memory);
 exit1:
 	release_resource(io_resource);
 	pcmcia_disable_device(p_dev);
@@ -260,13 +260,13 @@ exit:
 		release_mem_region(ipw->request_attr_memory.Base,
 				ipw->request_attr_memory.Size);
 		iounmap(ipw->attr_memory);
-		pcmcia_release_window(ipw->handle_attr_memory);
+		pcmcia_release_window(link, ipw->handle_attr_memory);
 	}
 	if (ipw->common_memory) {
 		release_mem_region(ipw->request_common_memory.Base,
 				ipw->request_common_memory.Size);
 		iounmap(ipw->common_memory);
-		pcmcia_release_window(ipw->handle_common_memory);
+		pcmcia_release_window(link, ipw->handle_common_memory);
 	}
 	pcmcia_disable_device(link);
 	return -1;
@@ -287,9 +287,9 @@ static void release_ipwireless(struct ipw_dev *ipw)
 		iounmap(ipw->attr_memory);
 	}
 	if (ipw->common_memory)
-		pcmcia_release_window(ipw->handle_common_memory);
+		pcmcia_release_window(ipw->link, ipw->handle_common_memory);
 	if (ipw->attr_memory)
-		pcmcia_release_window(ipw->handle_attr_memory);
+		pcmcia_release_window(ipw->link, ipw->handle_attr_memory);
 
 	/* Break the link with Card Services */
 	pcmcia_disable_device(ipw->link);
diff --git a/drivers/mtd/maps/pcmciamtd.c b/drivers/mtd/maps/pcmciamtd.c
index b698dbaaf9e6..16d9985aadcd 100644
--- a/drivers/mtd/maps/pcmciamtd.c
+++ b/drivers/mtd/maps/pcmciamtd.c
@@ -344,7 +344,7 @@ static void pcmciamtd_release(struct pcmcia_device *link)
 			iounmap(dev->win_base);
 			dev->win_base = NULL;
 		}
-		pcmcia_release_window(link->win);
+		pcmcia_release_window(link, link->win);
 	}
 	pcmcia_disable_device(link);
 }
diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c
index fdec5c333a2a..61726c82be46 100644
--- a/drivers/net/pcmcia/fmvj18x_cs.c
+++ b/drivers/net/pcmcia/fmvj18x_cs.c
@@ -599,7 +599,7 @@ static int fmvj18x_get_hwinfo(struct pcmcia_device *link, u_char *node_id)
     }
 
     iounmap(base);
-    j = pcmcia_release_window(link->win);
+    j = pcmcia_release_window(link, link->win);
     return (i != 0x200) ? 0 : -1;
 
 } /* fmvj18x_get_hwinfo */
@@ -666,7 +666,7 @@ static void fmvj18x_release(struct pcmcia_device *link)
 	tmp = lp->base;
 	lp->base = NULL;    /* set NULL before iounmap */
 	iounmap(tmp);
-	j = pcmcia_release_window(link->win);
+	j = pcmcia_release_window(link, link->win);
     }
 
     pcmcia_disable_device(link);
diff --git a/drivers/net/pcmcia/ibmtr_cs.c b/drivers/net/pcmcia/ibmtr_cs.c
index 0d914f3b2941..7a985ca079d7 100644
--- a/drivers/net/pcmcia/ibmtr_cs.c
+++ b/drivers/net/pcmcia/ibmtr_cs.c
@@ -328,7 +328,7 @@ static void ibmtr_release(struct pcmcia_device *link)
 	if (link->win) {
 		struct tok_info *ti = netdev_priv(dev);
 		iounmap(ti->mmio);
-		pcmcia_release_window(info->sram_win_handle);
+		pcmcia_release_window(link, info->sram_win_handle);
 	}
 	pcmcia_disable_device(link);
 }
diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c
index 80ab9de1c4df..4156a6c5811e 100644
--- a/drivers/net/pcmcia/pcnet_cs.c
+++ b/drivers/net/pcmcia/pcnet_cs.c
@@ -337,7 +337,7 @@ static hw_info_t *get_hwinfo(struct pcmcia_device *link)
     }
 
     iounmap(virt);
-    j = pcmcia_release_window(link->win);
+    j = pcmcia_release_window(link, link->win);
     return (i < NR_INFO) ? hw_info+i : NULL;
 } /* get_hwinfo */
 
@@ -1513,7 +1513,7 @@ static int setup_shmem_window(struct pcmcia_device *link, int start_pg,
     pcnet_reset_8390(dev);
     if (i != (TX_PAGES<<8)) {
 	iounmap(info->base);
-	pcmcia_release_window(link->win);
+	pcmcia_release_window(link, link->win);
 	info->base = NULL; link->win = NULL;
 	goto failed;
     }
diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c
index 28db7914a5d1..ed328750de45 100644
--- a/drivers/net/wireless/ray_cs.c
+++ b/drivers/net/wireless/ray_cs.c
@@ -820,10 +820,10 @@ static void ray_release(struct pcmcia_device *link)
 	iounmap(local->rmem);
 	iounmap(local->amem);
 	/* Do bother checking to see if these succeed or not */
-	i = pcmcia_release_window(local->amem_handle);
+	i = pcmcia_release_window(link, local->amem_handle);
 	if (i != 0)
 		dev_dbg(&link->dev, "ReleaseWindow(local->amem) ret = %x\n", i);
-	i = pcmcia_release_window(local->rmem_handle);
+	i = pcmcia_release_window(link, local->rmem_handle);
 	if (i != 0)
 		dev_dbg(&link->dev, "ReleaseWindow(local->rmem) ret = %x\n", i);
 	pcmcia_disable_device(link);
diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c
index cda48ea9b6fd..820a6e5868e8 100644
--- a/drivers/pcmcia/pcmcia_resource.c
+++ b/drivers/pcmcia/pcmcia_resource.c
@@ -442,9 +442,10 @@ static int pcmcia_release_irq(struct pcmcia_device *p_dev, irq_req_t *req)
 } /* pcmcia_release_irq */
 
 
-int pcmcia_release_window(window_handle_t win)
+int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t wh)
 {
 	struct pcmcia_socket *s;
+	window_handle_t win = wh;
 
 	if ((win == NULL) || (win->magic != WINDOW_MAGIC))
 		return -EINVAL;
@@ -891,7 +892,7 @@ void pcmcia_disable_device(struct pcmcia_device *p_dev) {
 	pcmcia_release_io(p_dev, &p_dev->io);
 	pcmcia_release_irq(p_dev, &p_dev->irq);
 	if (p_dev->win)
-		pcmcia_release_window(p_dev->win);
+		pcmcia_release_window(p_dev, p_dev->win);
 }
 EXPORT_SYMBOL(pcmcia_disable_device);
 
diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h
index d82392de4e92..40b098d7aa41 100644
--- a/include/pcmcia/ds.h
+++ b/include/pcmcia/ds.h
@@ -201,7 +201,7 @@ int pcmcia_request_configuration(struct pcmcia_device *p_dev,
 
 int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req,
 			  window_handle_t *wh);
-int pcmcia_release_window(window_handle_t win);
+int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t win);
 
 int pcmcia_get_mem_page(window_handle_t win, memreq_t *req);
 int pcmcia_map_mem_page(window_handle_t win, memreq_t *req);
-- 
cgit v1.2.3-70-g09d2


From 868575d1e87ff2091800aea816972ddb46de60d5 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Wed, 13 Dec 2006 19:46:43 +0900
Subject: pcmcia: Pass struct pcmcia_device to pcmcia_map_mem_page()

No logic changes, just pass struct pcmcia_device to pcmcia_map_mem_page()

[linux@dominikbrodowski.net: update to 2.6.31]
CC: netdev@vger.kernel.org
CC: linux-wireless@vger.kernel.org
CC: linux-scsi@vger.kernel.org
CC: Jiri Kosina <jkosina@suse.cz>
Acked-by: Karsten Keil <keil@b1-systems.de> (for ISDN)
Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 drivers/char/pcmcia/ipwireless/main.c         | 4 ++--
 drivers/isdn/hisax/sedlbauer_cs.c             | 2 +-
 drivers/mtd/maps/pcmciamtd.c                  | 2 +-
 drivers/net/pcmcia/fmvj18x_cs.c               | 4 ++--
 drivers/net/pcmcia/ibmtr_cs.c                 | 4 ++--
 drivers/net/pcmcia/pcnet_cs.c                 | 4 ++--
 drivers/net/pcmcia/smc91c92_cs.c              | 2 +-
 drivers/net/pcmcia/xirc2ps_cs.c               | 2 +-
 drivers/net/wireless/airo_cs.c                | 2 +-
 drivers/net/wireless/b43/pcmcia.c             | 2 +-
 drivers/net/wireless/netwave_cs.c             | 2 +-
 drivers/net/wireless/ray_cs.c                 | 6 +++---
 drivers/net/wireless/wavelan_cs.c             | 2 +-
 drivers/pcmcia/pcmcia_resource.c              | 5 ++++-
 drivers/scsi/pcmcia/nsp_cs.c                  | 2 +-
 drivers/staging/comedi/drivers/ni_daq_700.c   | 2 +-
 drivers/staging/comedi/drivers/ni_daq_dio24.c | 2 +-
 drivers/staging/comedi/drivers/ni_labpc_cs.c  | 2 +-
 include/pcmcia/ds.h                           | 3 ++-
 19 files changed, 29 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/drivers/char/pcmcia/ipwireless/main.c b/drivers/char/pcmcia/ipwireless/main.c
index 91ca23d2b042..10af1c0dc368 100644
--- a/drivers/char/pcmcia/ipwireless/main.c
+++ b/drivers/char/pcmcia/ipwireless/main.c
@@ -125,7 +125,7 @@ static int ipwireless_probe(struct pcmcia_device *p_dev,
 	memreq_common_memory.CardOffset = cfg->mem.win[0].card_addr;
 	memreq_common_memory.Page = 0;
 
-	ret = pcmcia_map_mem_page(ipw->handle_common_memory,
+	ret = pcmcia_map_mem_page(p_dev, ipw->handle_common_memory,
 				&memreq_common_memory);
 
 	if (ret != 0)
@@ -154,7 +154,7 @@ static int ipwireless_probe(struct pcmcia_device *p_dev,
 	memreq_attr_memory.CardOffset = 0;
 	memreq_attr_memory.Page = 0;
 
-	ret = pcmcia_map_mem_page(ipw->handle_attr_memory,
+	ret = pcmcia_map_mem_page(p_dev, ipw->handle_attr_memory,
 				&memreq_attr_memory);
 
 	if (ret != 0)
diff --git a/drivers/isdn/hisax/sedlbauer_cs.c b/drivers/isdn/hisax/sedlbauer_cs.c
index 33d7530527c7..598e17a551b7 100644
--- a/drivers/isdn/hisax/sedlbauer_cs.c
+++ b/drivers/isdn/hisax/sedlbauer_cs.c
@@ -276,7 +276,7 @@ static int sedlbauer_config_check(struct pcmcia_device *p_dev,
 			return -ENODEV;
 		map.Page = 0;
 		map.CardOffset = mem->win[0].card_addr;
-		if (pcmcia_map_mem_page(p_dev->win, &map) != 0)
+		if (pcmcia_map_mem_page(p_dev, p_dev->win, &map) != 0)
 			return -ENODEV;
 	}
 	return 0;
diff --git a/drivers/mtd/maps/pcmciamtd.c b/drivers/mtd/maps/pcmciamtd.c
index 16d9985aadcd..80b9005c3736 100644
--- a/drivers/mtd/maps/pcmciamtd.c
+++ b/drivers/mtd/maps/pcmciamtd.c
@@ -118,7 +118,7 @@ static caddr_t remap_window(struct map_info *map, unsigned long to)
 		DEBUG(2, "Remapping window from 0x%8.8x to 0x%8.8x",
 		      dev->offset, mrq.CardOffset);
 		mrq.Page = 0;
-		ret = pcmcia_map_mem_page(win, &mrq);
+		ret = pcmcia_map_mem_page(dev->p_dev, win, &mrq);
 		if (ret != 0)
 			return NULL;
 		dev->offset = mrq.CardOffset;
diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c
index 61726c82be46..6b9c79e4a2c0 100644
--- a/drivers/net/pcmcia/fmvj18x_cs.c
+++ b/drivers/net/pcmcia/fmvj18x_cs.c
@@ -574,7 +574,7 @@ static int fmvj18x_get_hwinfo(struct pcmcia_device *link, u_char *node_id)
     base = ioremap(req.Base, req.Size);
     mem.Page = 0;
     mem.CardOffset = 0;
-    pcmcia_map_mem_page(link->win, &mem);
+    pcmcia_map_mem_page(link, link->win, &mem);
 
     /*
      *  MBH10304 CISTPL_FUNCE_LAN_NODE_ID format
@@ -630,7 +630,7 @@ static int fmvj18x_setup_mfc(struct pcmcia_device *link)
 
     mem.Page = 0;
     mem.CardOffset = 0;
-    i = pcmcia_map_mem_page(link->win, &mem);
+    i = pcmcia_map_mem_page(link, link->win, &mem);
     if (i != 0) {
 	iounmap(lp->base);
 	lp->base = NULL;
diff --git a/drivers/net/pcmcia/ibmtr_cs.c b/drivers/net/pcmcia/ibmtr_cs.c
index 7a985ca079d7..6fc89eba9c8f 100644
--- a/drivers/net/pcmcia/ibmtr_cs.c
+++ b/drivers/net/pcmcia/ibmtr_cs.c
@@ -252,7 +252,7 @@ static int __devinit ibmtr_config(struct pcmcia_device *link)
 
     mem.CardOffset = mmiobase;
     mem.Page = 0;
-    ret = pcmcia_map_mem_page(link->win, &mem);
+    ret = pcmcia_map_mem_page(link, link->win, &mem);
     if (ret)
 	    goto failed;
     ti->mmio = ioremap(req.Base, req.Size);
@@ -269,7 +269,7 @@ static int __devinit ibmtr_config(struct pcmcia_device *link)
 
     mem.CardOffset = srambase;
     mem.Page = 0;
-    ret = pcmcia_map_mem_page(info->sram_win_handle, &mem);
+    ret = pcmcia_map_mem_page(link, info->sram_win_handle, &mem);
     if (ret)
 	    goto failed;
 
diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c
index 4156a6c5811e..75e68946455e 100644
--- a/drivers/net/pcmcia/pcnet_cs.c
+++ b/drivers/net/pcmcia/pcnet_cs.c
@@ -325,7 +325,7 @@ static hw_info_t *get_hwinfo(struct pcmcia_device *link)
     mem.Page = 0;
     for (i = 0; i < NR_INFO; i++) {
 	mem.CardOffset = hw_info[i].offset & ~(req.Size-1);
-	pcmcia_map_mem_page(link->win, &mem);
+	pcmcia_map_mem_page(link, link->win, &mem);
 	base = &virt[hw_info[i].offset & (req.Size-1)];
 	if ((readb(base+0) == hw_info[i].a0) &&
 	    (readb(base+2) == hw_info[i].a1) &&
@@ -1499,7 +1499,7 @@ static int setup_shmem_window(struct pcmcia_device *link, int start_pg,
     offset = mem.CardOffset % window_size;
     mem.CardOffset -= offset;
     mem.Page = 0;
-    ret = pcmcia_map_mem_page(link->win, &mem);
+    ret = pcmcia_map_mem_page(link, link->win, &mem);
     if (ret)
 	    goto failed;
 
diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c
index 580ec444a654..4ceaa45d7d81 100644
--- a/drivers/net/pcmcia/smc91c92_cs.c
+++ b/drivers/net/pcmcia/smc91c92_cs.c
@@ -480,7 +480,7 @@ static int mhz_mfc_config(struct pcmcia_device *link)
     mem.CardOffset = mem.Page = 0;
     if (smc->manfid == MANFID_MOTOROLA)
 	mem.CardOffset = link->conf.ConfigBase;
-    i = pcmcia_map_mem_page(link->win, &mem);
+    i = pcmcia_map_mem_page(link, link->win, &mem);
 
     if ((i == 0)
 	&& (smc->manfid == MANFID_MEGAHERTZ)
diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c
index fbf926539285..9bc4d6002ae0 100644
--- a/drivers/net/pcmcia/xirc2ps_cs.c
+++ b/drivers/net/pcmcia/xirc2ps_cs.c
@@ -932,7 +932,7 @@ xirc2ps_config(struct pcmcia_device * link)
 	local->dingo_ccr = ioremap(req.Base,0x1000) + 0x0800;
 	mem.CardOffset = 0x0;
 	mem.Page = 0;
-	if ((err = pcmcia_map_mem_page(link->win, &mem)))
+	if ((err = pcmcia_map_mem_page(link, link->win, &mem)))
 	    goto config_error;
 
 	/* Setup the CCRs; there are no infos in the CIS about the Ethernet
diff --git a/drivers/net/wireless/airo_cs.c b/drivers/net/wireless/airo_cs.c
index a1b84fc48afd..2aa6978d76ca 100644
--- a/drivers/net/wireless/airo_cs.c
+++ b/drivers/net/wireless/airo_cs.c
@@ -261,7 +261,7 @@ static int airo_cs_config_check(struct pcmcia_device *p_dev,
 			return -ENODEV;
 		map.Page = 0;
 		map.CardOffset = mem->win[0].card_addr;
-		if (pcmcia_map_mem_page(p_dev->win, &map) != 0)
+		if (pcmcia_map_mem_page(p_dev, p_dev->win, &map) != 0)
 			return -ENODEV;
 	}
 	/* If we got this far, we're cool! */
diff --git a/drivers/net/wireless/b43/pcmcia.c b/drivers/net/wireless/b43/pcmcia.c
index cd14b7e8ba80..c0a34bbcb8d5 100644
--- a/drivers/net/wireless/b43/pcmcia.c
+++ b/drivers/net/wireless/b43/pcmcia.c
@@ -93,7 +93,7 @@ static int __devinit b43_pcmcia_probe(struct pcmcia_device *dev)
 
 	mem.CardOffset = 0;
 	mem.Page = 0;
-	res = pcmcia_map_mem_page(dev->win, &mem);
+	res = pcmcia_map_mem_page(dev, dev->win, &mem);
 	if (res != 0)
 		goto err_disable;
 
diff --git a/drivers/net/wireless/netwave_cs.c b/drivers/net/wireless/netwave_cs.c
index cbd85de0c601..4a8c62126c55 100644
--- a/drivers/net/wireless/netwave_cs.c
+++ b/drivers/net/wireless/netwave_cs.c
@@ -763,7 +763,7 @@ static int netwave_pcmcia_config(struct pcmcia_device *link) {
     if (ret)
 	    goto failed;
     mem.CardOffset = 0x20000; mem.Page = 0; 
-    ret = pcmcia_map_mem_page(link->win, &mem);
+    ret = pcmcia_map_mem_page(link, link->win, &mem);
     if (ret)
 	    goto failed;
 
diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c
index ed328750de45..f719ffcb3c74 100644
--- a/drivers/net/wireless/ray_cs.c
+++ b/drivers/net/wireless/ray_cs.c
@@ -443,7 +443,7 @@ static int ray_config(struct pcmcia_device *link)
 		goto failed;
 	mem.CardOffset = 0x0000;
 	mem.Page = 0;
-	ret = pcmcia_map_mem_page(link->win, &mem);
+	ret = pcmcia_map_mem_page(link, link->win, &mem);
 	if (ret)
 		goto failed;
 	local->sram = ioremap(req.Base, req.Size);
@@ -459,7 +459,7 @@ static int ray_config(struct pcmcia_device *link)
 		goto failed;
 	mem.CardOffset = 0x8000;
 	mem.Page = 0;
-	ret = pcmcia_map_mem_page(local->rmem_handle, &mem);
+	ret = pcmcia_map_mem_page(link, local->rmem_handle, &mem);
 	if (ret)
 		goto failed;
 	local->rmem = ioremap(req.Base, req.Size);
@@ -475,7 +475,7 @@ static int ray_config(struct pcmcia_device *link)
 		goto failed;
 	mem.CardOffset = 0x0000;
 	mem.Page = 0;
-	ret = pcmcia_map_mem_page(local->amem_handle, &mem);
+	ret = pcmcia_map_mem_page(link, local->amem_handle, &mem);
 	if (ret)
 		goto failed;
 	local->amem = ioremap(req.Base, req.Size);
diff --git a/drivers/net/wireless/wavelan_cs.c b/drivers/net/wireless/wavelan_cs.c
index 2fad4ac89827..d4df25d329d4 100644
--- a/drivers/net/wireless/wavelan_cs.c
+++ b/drivers/net/wireless/wavelan_cs.c
@@ -3885,7 +3885,7 @@ wv_pcmcia_config(struct pcmcia_device *	link)
       dev->mem_end = dev->mem_start + req.Size;
 
       mem.CardOffset = 0; mem.Page = 0;
-      i = pcmcia_map_mem_page(link->win, &mem);
+      i = pcmcia_map_mem_page(link, link->win, &mem);
       if (i != 0)
 	  break;
 
diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c
index 820a6e5868e8..b60952a3df93 100644
--- a/drivers/pcmcia/pcmcia_resource.c
+++ b/drivers/pcmcia/pcmcia_resource.c
@@ -234,9 +234,12 @@ int pcmcia_get_mem_page(window_handle_t win, memreq_t *req)
 EXPORT_SYMBOL(pcmcia_get_mem_page);
 
 
-int pcmcia_map_mem_page(window_handle_t win, memreq_t *req)
+int pcmcia_map_mem_page(struct pcmcia_device *p_dev, window_handle_t wh,
+			memreq_t *req)
 {
 	struct pcmcia_socket *s;
+	window_handle_t win = wh;
+
 	if ((win == NULL) || (win->magic != WINDOW_MAGIC))
 		return -EINVAL;
 	s = win->sock;
diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c
index e32c344d7ad8..c54108fc89a1 100644
--- a/drivers/scsi/pcmcia/nsp_cs.c
+++ b/drivers/scsi/pcmcia/nsp_cs.c
@@ -1687,7 +1687,7 @@ static int nsp_cs_config_check(struct pcmcia_device *p_dev,
 			if (pcmcia_request_window(&p_dev, &cfg_mem->req, &p_dev->win) != 0)
 				goto next_entry;
 			map.Page = 0; map.CardOffset = mem->win[0].card_addr;
-			if (pcmcia_map_mem_page(p_dev->win, &map) != 0)
+			if (pcmcia_map_mem_page(p_dev, p_dev->win, &map) != 0)
 				goto next_entry;
 
 			cfg_mem->data->MmioAddress = (unsigned long) ioremap_nocache(cfg_mem->req.Base, cfg_mem->req.Size);
diff --git a/drivers/staging/comedi/drivers/ni_daq_700.c b/drivers/staging/comedi/drivers/ni_daq_700.c
index e06d5b2bc336..aeb85b3ec7a7 100644
--- a/drivers/staging/comedi/drivers/ni_daq_700.c
+++ b/drivers/staging/comedi/drivers/ni_daq_700.c
@@ -616,7 +616,7 @@ static int dio700_pcmcia_config_loop(struct pcmcia_device *p_dev,
 			return -ENODEV;
 		map.Page = 0;
 		map.CardOffset = mem->win[0].card_addr;
-		if (pcmcia_map_mem_page(p_dev->win, &map))
+		if (pcmcia_map_mem_page(p_dev, p_dev->win, &map))
 			return -ENODEV;
 	}
 	/* If we got this far, we're cool! */
diff --git a/drivers/staging/comedi/drivers/ni_daq_dio24.c b/drivers/staging/comedi/drivers/ni_daq_dio24.c
index 9257a4224eab..0968a67b97cb 100644
--- a/drivers/staging/comedi/drivers/ni_daq_dio24.c
+++ b/drivers/staging/comedi/drivers/ni_daq_dio24.c
@@ -367,7 +367,7 @@ static int dio24_pcmcia_config_loop(struct pcmcia_device *p_dev,
 			return -ENODEV;
 		map.Page = 0;
 		map.CardOffset = mem->win[0].card_addr;
-		if (pcmcia_map_mem_page(p_dev->win, &map))
+		if (pcmcia_map_mem_page(p_dev, p_dev->win, &map))
 			return -ENODEV;
 	}
 	/* If we got this far, we're cool! */
diff --git a/drivers/staging/comedi/drivers/ni_labpc_cs.c b/drivers/staging/comedi/drivers/ni_labpc_cs.c
index 28ff4a68cab3..b4c7dfa54638 100644
--- a/drivers/staging/comedi/drivers/ni_labpc_cs.c
+++ b/drivers/staging/comedi/drivers/ni_labpc_cs.c
@@ -346,7 +346,7 @@ static int labpc_pcmcia_config_loop(struct pcmcia_device *p_dev,
 			return -ENODEV;
 		map.Page = 0;
 		map.CardOffset = mem->win[0].card_addr;
-		if (pcmcia_map_mem_page(p_dev->win, &map))
+		if (pcmcia_map_mem_page(p_dev, p_dev->win, &map))
 			return -ENODEV;
 	}
 	/* If we got this far, we're cool! */
diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h
index 40b098d7aa41..f240bfa454f8 100644
--- a/include/pcmcia/ds.h
+++ b/include/pcmcia/ds.h
@@ -204,7 +204,8 @@ int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req,
 int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t win);
 
 int pcmcia_get_mem_page(window_handle_t win, memreq_t *req);
-int pcmcia_map_mem_page(window_handle_t win, memreq_t *req);
+int pcmcia_map_mem_page(struct pcmcia_device *p_dev, window_handle_t win,
+			memreq_t *req);
 
 int pcmcia_modify_configuration(struct pcmcia_device *p_dev, modconf_t *mod);
 void pcmcia_disable_device(struct pcmcia_device *p_dev);
-- 
cgit v1.2.3-70-g09d2


From 16456ebabfec3f8f509fc18b45f256d066a1b360 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Wed, 13 Dec 2006 19:46:48 +0900
Subject: pcmcia: Pass struct pcmcia_socket to pcmcia_get_mem_page()

No logic changes, just pass struct pcmcia_socket to pcmcia_get_mem_page()

[linux@dominikbrodowski.net: update to 2.6.31]
Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 drivers/pcmcia/cs_internal.h     | 2 ++
 drivers/pcmcia/pcmcia_ioctl.c    | 2 +-
 drivers/pcmcia/pcmcia_resource.c | 5 ++++-
 include/pcmcia/ds.h              | 2 --
 4 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/pcmcia/cs_internal.h b/drivers/pcmcia/cs_internal.h
index 0a3ada970bf8..8e09a4998c84 100644
--- a/drivers/pcmcia/cs_internal.h
+++ b/drivers/pcmcia/cs_internal.h
@@ -227,6 +227,8 @@ extern void pcmcia_put_dev(struct pcmcia_device *p_dev);
 
 struct pcmcia_device *pcmcia_device_add(struct pcmcia_socket *s,
 					unsigned int function);
+int pcmcia_get_mem_page(struct pcmcia_socket *skt, window_handle_t win,
+			memreq_t *req);
 
 /* pcmcia_ioctl.c */
 extern void __init pcmcia_setup_ioctl(void);
diff --git a/drivers/pcmcia/pcmcia_ioctl.c b/drivers/pcmcia/pcmcia_ioctl.c
index 056fd131c89c..c829ead5cdb7 100644
--- a/drivers/pcmcia/pcmcia_ioctl.c
+++ b/drivers/pcmcia/pcmcia_ioctl.c
@@ -924,7 +924,7 @@ static int ds_ioctl(struct inode * inode, struct file * file,
 			buf->win_info.handle->index + 1, &buf->win_info.window);
 	break;
     case DS_GET_MEM_PAGE:
-	ret = pcmcia_get_mem_page(buf->win_info.handle,
+	ret = pcmcia_get_mem_page(s, buf->win_info.handle,
 			   &buf->win_info.map);
 	break;
     case DS_REPLACE_CIS:
diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c
index b60952a3df93..a092749f79cb 100644
--- a/drivers/pcmcia/pcmcia_resource.c
+++ b/drivers/pcmcia/pcmcia_resource.c
@@ -223,8 +223,11 @@ EXPORT_SYMBOL(pcmcia_get_window);
  *
  * Change the card address of an already open memory window.
  */
-int pcmcia_get_mem_page(window_handle_t win, memreq_t *req)
+int pcmcia_get_mem_page(struct pcmcia_socket *skt, window_handle_t wh,
+			memreq_t *req)
 {
+	window_handle_t win = wh;
+
 	if ((win == NULL) || (win->magic != WINDOW_MAGIC))
 		return -EINVAL;
 	req->Page = 0;
diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h
index f240bfa454f8..cbf5f05745f2 100644
--- a/include/pcmcia/ds.h
+++ b/include/pcmcia/ds.h
@@ -202,8 +202,6 @@ int pcmcia_request_configuration(struct pcmcia_device *p_dev,
 int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req,
 			  window_handle_t *wh);
 int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t win);
-
-int pcmcia_get_mem_page(window_handle_t win, memreq_t *req);
 int pcmcia_map_mem_page(struct pcmcia_device *p_dev, window_handle_t win,
 			memreq_t *req);
 
-- 
cgit v1.2.3-70-g09d2


From 0bdf9b3dd3cfa5cbd5d55172c19f5dd166208e17 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Wed, 13 Dec 2006 19:46:53 +0900
Subject: pcmcia: Change window_handle_t logic to unsigned long

Logic changes based on top of the other patches:

This set of patches changed window_handle_t from being a pointer to an
unsigned long. The unsigned long is now a simple index into socket->win[].
Going from a pointer to unsigned long should leave the user space interface
unchanged unless I'm mistaken.

This change results in code that is less error prone and a user space
interface which is much cleaner and safer. A nice side effect is that we
are also are able to remove all members except one from window_t.

[ linux@dominikbrodowski.net:
	Update to 2.6.31. Also, a plain "index" to socket->win[] does not
	work, as several codepaths rely on "window_handle_t" being
	non-zero if used. Therefore, set the window_handle_t to the
	socket->win[] index + 1. ]

CC: netdev@vger.kernel.org
Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 drivers/net/pcmcia/pcnet_cs.c    |  2 +-
 drivers/pcmcia/cs_internal.h     |  4 +--
 drivers/pcmcia/pcmcia_ioctl.c    |  4 +--
 drivers/pcmcia/pcmcia_resource.c | 59 ++++++++++++++++++++--------------------
 include/pcmcia/cs_types.h        |  3 +-
 include/pcmcia/ss.h              |  5 ----
 6 files changed, 36 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c
index 75e68946455e..518b094c9f6d 100644
--- a/drivers/net/pcmcia/pcnet_cs.c
+++ b/drivers/net/pcmcia/pcnet_cs.c
@@ -1514,7 +1514,7 @@ static int setup_shmem_window(struct pcmcia_device *link, int start_pg,
     if (i != (TX_PAGES<<8)) {
 	iounmap(info->base);
 	pcmcia_release_window(link, link->win);
-	info->base = NULL; link->win = NULL;
+	info->base = NULL; link->win = 0;
 	goto failed;
     }
 
diff --git a/drivers/pcmcia/cs_internal.h b/drivers/pcmcia/cs_internal.h
index 8e09a4998c84..6df41de39d16 100644
--- a/drivers/pcmcia/cs_internal.h
+++ b/drivers/pcmcia/cs_internal.h
@@ -149,8 +149,8 @@ extern struct list_head pcmcia_socket_list;
 extern struct class pcmcia_socket_class;
 
 int pcmcia_get_window(struct pcmcia_socket *s,
-		      window_handle_t *handle,
-		      int idx,
+		      window_handle_t *wh_out,
+		      window_handle_t wh,
 		      win_req_t *req);
 int pccard_register_pcmcia(struct pcmcia_socket *s, struct pcmcia_callback *c);
 struct pcmcia_socket *pcmcia_get_socket_by_nr(unsigned int nr);
diff --git a/drivers/pcmcia/pcmcia_ioctl.c b/drivers/pcmcia/pcmcia_ioctl.c
index c829ead5cdb7..6245fde02b79 100644
--- a/drivers/pcmcia/pcmcia_ioctl.c
+++ b/drivers/pcmcia/pcmcia_ioctl.c
@@ -916,12 +916,12 @@ static int ds_ioctl(struct inode * inode, struct file * file,
 	goto free_out;
 	break;
     case DS_GET_FIRST_WINDOW:
-	ret = pcmcia_get_window(s, &buf->win_info.handle, 0,
+	ret = pcmcia_get_window(s, &buf->win_info.handle, 1,
 			&buf->win_info.window);
 	break;
     case DS_GET_NEXT_WINDOW:
 	ret = pcmcia_get_window(s, &buf->win_info.handle,
-			buf->win_info.handle->index + 1, &buf->win_info.window);
+			buf->win_info.handle + 1, &buf->win_info.window);
 	break;
     case DS_GET_MEM_PAGE:
 	ret = pcmcia_get_mem_page(s, buf->win_info.handle,
diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c
index a092749f79cb..ae6abc7833d4 100644
--- a/drivers/pcmcia/pcmcia_resource.c
+++ b/drivers/pcmcia/pcmcia_resource.c
@@ -187,15 +187,19 @@ EXPORT_SYMBOL(pcmcia_access_configuration_register);
 
 /** pcmcia_get_window
  */
-int pcmcia_get_window(struct pcmcia_socket *s, window_handle_t *handle,
-		      int idx, win_req_t *req)
+int pcmcia_get_window(struct pcmcia_socket *s, window_handle_t *wh_out,
+		window_handle_t wh, win_req_t *req)
 {
 	window_t *win;
-	int w;
+	window_handle_t w;
 
 	if (!s || !(s->state & SOCKET_PRESENT))
 		return -ENODEV;
-	for (w = idx; w < MAX_WIN; w++)
+
+	wh--;
+	if (wh >= MAX_WIN)
+		return -EINVAL;
+	for (w = wh; w < MAX_WIN; w++)
 		if (s->state & SOCKET_WIN_REQ(w))
 			break;
 	if (w == MAX_WIN)
@@ -213,7 +217,8 @@ int pcmcia_get_window(struct pcmcia_socket *s, window_handle_t *handle,
 		req->Attributes |= WIN_DATA_WIDTH_16;
 	if (win->ctl.flags & MAP_USE_WAIT)
 		req->Attributes |= WIN_USE_WAIT;
-	*handle = win;
+
+	*wh_out = w++;
 	return 0;
 } /* pcmcia_get_window */
 EXPORT_SYMBOL(pcmcia_get_window);
@@ -226,12 +231,12 @@ EXPORT_SYMBOL(pcmcia_get_window);
 int pcmcia_get_mem_page(struct pcmcia_socket *skt, window_handle_t wh,
 			memreq_t *req)
 {
-	window_handle_t win = wh;
-
-	if ((win == NULL) || (win->magic != WINDOW_MAGIC))
+	wh--;
+	if (wh >= MAX_WIN)
 		return -EINVAL;
+
 	req->Page = 0;
-	req->CardOffset = win->ctl.card_start;
+	req->CardOffset = skt->win[wh].ctl.card_start;
 	return 0;
 } /* pcmcia_get_mem_page */
 EXPORT_SYMBOL(pcmcia_get_mem_page);
@@ -240,18 +245,17 @@ EXPORT_SYMBOL(pcmcia_get_mem_page);
 int pcmcia_map_mem_page(struct pcmcia_device *p_dev, window_handle_t wh,
 			memreq_t *req)
 {
-	struct pcmcia_socket *s;
-	window_handle_t win = wh;
+	struct pcmcia_socket *s = p_dev->socket;
 
-	if ((win == NULL) || (win->magic != WINDOW_MAGIC))
+	wh--;
+	if (wh >= MAX_WIN)
 		return -EINVAL;
-	s = win->sock;
 	if (req->Page != 0) {
 		dev_dbg(&s->dev, "failure: requested page is zero\n");
 		return -EINVAL;
 	}
-	win->ctl.card_start = req->CardOffset;
-	if (s->ops->set_mem_map(s, &win->ctl) != 0) {
+	s->win[wh].ctl.card_start = req->CardOffset;
+	if (s->ops->set_mem_map(s, &s->win[wh].ctl) != 0) {
 		dev_dbg(&s->dev, "failed to set_mem_map\n");
 		return -EIO;
 	}
@@ -450,13 +454,16 @@ static int pcmcia_release_irq(struct pcmcia_device *p_dev, irq_req_t *req)
 
 int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t wh)
 {
-	struct pcmcia_socket *s;
-	window_handle_t win = wh;
+	struct pcmcia_socket *s = p_dev->socket;
+	window_t *win;
 
-	if ((win == NULL) || (win->magic != WINDOW_MAGIC))
+	wh--;
+	if (wh >= MAX_WIN)
 		return -EINVAL;
-	s = win->sock;
-	if (!(win->handle->_win & CLIENT_WIN_REQ(win->index))) {
+
+	win = &s->win[wh];
+
+	if (!(p_dev->_win & CLIENT_WIN_REQ(wh))) {
 		dev_dbg(&s->dev, "not releasing unknown window\n");
 		return -EINVAL;
 	}
@@ -464,7 +471,7 @@ int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t wh)
 	/* Shut down memory window */
 	win->ctl.flags &= ~MAP_ACTIVE;
 	s->ops->set_mem_map(s, &win->ctl);
-	s->state &= ~SOCKET_WIN_REQ(win->index);
+	s->state &= ~SOCKET_WIN_REQ(wh);
 
 	/* Release system memory */
 	if (win->ctl.res) {
@@ -472,9 +479,7 @@ int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t wh)
 		kfree(win->ctl.res);
 		win->ctl.res = NULL;
 	}
-	win->handle->_win &= ~CLIENT_WIN_REQ(win->index);
-
-	win->magic = 0;
+	p_dev->_win &= ~CLIENT_WIN_REQ(wh);
 
 	return 0;
 } /* pcmcia_release_window */
@@ -847,10 +852,6 @@ int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_h
 	}
 
 	win = &s->win[w];
-	win->magic = WINDOW_MAGIC;
-	win->index = w;
-	win->handle = *p_dev;
-	win->sock = s;
 
 	if (!(s->features & SS_CAP_STATIC_MAP)) {
 		win->ctl.res = pcmcia_find_mem_region(req->Base, req->Size, align,
@@ -887,7 +888,7 @@ int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_h
 	} else {
 		req->Base = win->ctl.res->start;
 	}
-	*wh = win;
+	*wh = w + 1;
 
 	return 0;
 } /* pcmcia_request_window */
diff --git a/include/pcmcia/cs_types.h b/include/pcmcia/cs_types.h
index 315965a37930..f5e3b8386c8f 100644
--- a/include/pcmcia/cs_types.h
+++ b/include/pcmcia/cs_types.h
@@ -26,8 +26,7 @@ typedef u_int	event_t;
 typedef u_char	cisdata_t;
 typedef u_short	page_t;
 
-struct window_t;
-typedef struct window_t *window_handle_t;
+typedef unsigned long window_handle_t;
 
 struct region_t;
 typedef struct region_t *memory_handle_t;
diff --git a/include/pcmcia/ss.h b/include/pcmcia/ss.h
index 753da9b087d3..6301c3f4f19e 100644
--- a/include/pcmcia/ss.h
+++ b/include/pcmcia/ss.h
@@ -107,12 +107,7 @@ typedef struct io_window_t {
 	struct resource		*res;
 } io_window_t;
 
-#define WINDOW_MAGIC	0xB35C
 typedef struct window_t {
-	u_short			magic;
-	u_short			index;
-	struct pcmcia_device	*handle;
-	struct pcmcia_socket 	*sock;
 	pccard_mem_map		ctl;
 } window_t;
 
-- 
cgit v1.2.3-70-g09d2


From 82f88e36004162f49a9340ffbbaebe89016e4835 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Tue, 3 Nov 2009 01:16:12 +0100
Subject: pcmcia: remove unused "window_t" typedef

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 drivers/pcmcia/pcmcia_ioctl.c    | 18 ++++++++--------
 drivers/pcmcia/pcmcia_resource.c | 46 ++++++++++++++++++++--------------------
 include/pcmcia/ss.h              |  6 +-----
 3 files changed, 33 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/drivers/pcmcia/pcmcia_ioctl.c b/drivers/pcmcia/pcmcia_ioctl.c
index 38b3a26a3ff3..c4d7908fa37f 100644
--- a/drivers/pcmcia/pcmcia_ioctl.c
+++ b/drivers/pcmcia/pcmcia_ioctl.c
@@ -224,7 +224,7 @@ static int pcmcia_adjust_resource_info(adjust_t *adj)
 static int pcmcia_get_window(struct pcmcia_socket *s, window_handle_t *wh_out,
 			window_handle_t wh, win_req_t *req)
 {
-	window_t *win;
+	pccard_mem_map *win;
 	window_handle_t w;
 
 	wh--;
@@ -238,17 +238,17 @@ static int pcmcia_get_window(struct pcmcia_socket *s, window_handle_t *wh_out,
 	if (w == MAX_WIN)
 		return -EINVAL;
 	win = &s->win[w];
-	req->Base = win->ctl.res->start;
-	req->Size = win->ctl.res->end - win->ctl.res->start + 1;
-	req->AccessSpeed = win->ctl.speed;
+	req->Base = win->res->start;
+	req->Size = win->res->end - win->res->start + 1;
+	req->AccessSpeed = win->speed;
 	req->Attributes = 0;
-	if (win->ctl.flags & MAP_ATTRIB)
+	if (win->flags & MAP_ATTRIB)
 		req->Attributes |= WIN_MEMORY_TYPE_AM;
-	if (win->ctl.flags & MAP_ACTIVE)
+	if (win->flags & MAP_ACTIVE)
 		req->Attributes |= WIN_ENABLE;
-	if (win->ctl.flags & MAP_16BIT)
+	if (win->flags & MAP_16BIT)
 		req->Attributes |= WIN_DATA_WIDTH_16;
-	if (win->ctl.flags & MAP_USE_WAIT)
+	if (win->flags & MAP_USE_WAIT)
 		req->Attributes |= WIN_USE_WAIT;
 
 	*wh_out = w + 1;
@@ -268,7 +268,7 @@ static int pcmcia_get_mem_page(struct pcmcia_socket *skt, window_handle_t wh,
 		return -EINVAL;
 
 	req->Page = 0;
-	req->CardOffset = skt->win[wh].ctl.card_start;
+	req->CardOffset = skt->win[wh].card_start;
 	return 0;
 } /* pcmcia_get_mem_page */
 
diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c
index ae68b26a7050..50468543d607 100644
--- a/drivers/pcmcia/pcmcia_resource.c
+++ b/drivers/pcmcia/pcmcia_resource.c
@@ -197,8 +197,8 @@ int pcmcia_map_mem_page(struct pcmcia_device *p_dev, window_handle_t wh,
 		dev_dbg(&s->dev, "failure: requested page is zero\n");
 		return -EINVAL;
 	}
-	s->win[wh].ctl.card_start = req->CardOffset;
-	if (s->ops->set_mem_map(s, &s->win[wh].ctl) != 0) {
+	s->win[wh].card_start = req->CardOffset;
+	if (s->ops->set_mem_map(s, &s->win[wh]) != 0) {
 		dev_dbg(&s->dev, "failed to set_mem_map\n");
 		return -EIO;
 	}
@@ -398,7 +398,7 @@ static int pcmcia_release_irq(struct pcmcia_device *p_dev, irq_req_t *req)
 int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t wh)
 {
 	struct pcmcia_socket *s = p_dev->socket;
-	window_t *win;
+	pccard_mem_map *win;
 
 	wh--;
 	if (wh >= MAX_WIN)
@@ -412,15 +412,15 @@ int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t wh)
 	}
 
 	/* Shut down memory window */
-	win->ctl.flags &= ~MAP_ACTIVE;
-	s->ops->set_mem_map(s, &win->ctl);
+	win->flags &= ~MAP_ACTIVE;
+	s->ops->set_mem_map(s, win);
 	s->state &= ~SOCKET_WIN_REQ(wh);
 
 	/* Release system memory */
-	if (win->ctl.res) {
-		release_resource(win->ctl.res);
-		kfree(win->ctl.res);
-		win->ctl.res = NULL;
+	if (win->res) {
+		release_resource(win->res);
+		kfree(win->res);
+		win->res = NULL;
 	}
 	p_dev->_win &= ~CLIENT_WIN_REQ(wh);
 
@@ -755,7 +755,7 @@ EXPORT_SYMBOL(pcmcia_request_irq);
 int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_handle_t *wh)
 {
 	struct pcmcia_socket *s = (*p_dev)->socket;
-	window_t *win;
+	pccard_mem_map *win;
 	u_long align;
 	int w;
 
@@ -797,9 +797,9 @@ int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_h
 	win = &s->win[w];
 
 	if (!(s->features & SS_CAP_STATIC_MAP)) {
-		win->ctl.res = pcmcia_find_mem_region(req->Base, req->Size, align,
+		win->res = pcmcia_find_mem_region(req->Base, req->Size, align,
 						      (req->Attributes & WIN_MAP_BELOW_1MB), s);
-		if (!win->ctl.res) {
+		if (!win->res) {
 			dev_dbg(&s->dev, "allocating mem region failed\n");
 			return -EINVAL;
 		}
@@ -807,19 +807,19 @@ int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_h
 	(*p_dev)->_win |= CLIENT_WIN_REQ(w);
 
 	/* Configure the socket controller */
-	win->ctl.map = w+1;
-	win->ctl.flags = 0;
-	win->ctl.speed = req->AccessSpeed;
+	win->map = w+1;
+	win->flags = 0;
+	win->speed = req->AccessSpeed;
 	if (req->Attributes & WIN_MEMORY_TYPE)
-		win->ctl.flags |= MAP_ATTRIB;
+		win->flags |= MAP_ATTRIB;
 	if (req->Attributes & WIN_ENABLE)
-		win->ctl.flags |= MAP_ACTIVE;
+		win->flags |= MAP_ACTIVE;
 	if (req->Attributes & WIN_DATA_WIDTH_16)
-		win->ctl.flags |= MAP_16BIT;
+		win->flags |= MAP_16BIT;
 	if (req->Attributes & WIN_USE_WAIT)
-		win->ctl.flags |= MAP_USE_WAIT;
-	win->ctl.card_start = 0;
-	if (s->ops->set_mem_map(s, &win->ctl) != 0) {
+		win->flags |= MAP_USE_WAIT;
+	win->card_start = 0;
+	if (s->ops->set_mem_map(s, win) != 0) {
 		dev_dbg(&s->dev, "failed to set memory mapping\n");
 		return -EIO;
 	}
@@ -827,9 +827,9 @@ int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_h
 
 	/* Return window handle */
 	if (s->features & SS_CAP_STATIC_MAP) {
-		req->Base = win->ctl.static_start;
+		req->Base = win->static_start;
 	} else {
-		req->Base = win->ctl.res->start;
+		req->Base = win->res->start;
 	}
 	*wh = w + 1;
 
diff --git a/include/pcmcia/ss.h b/include/pcmcia/ss.h
index 6301c3f4f19e..d85f725be7e3 100644
--- a/include/pcmcia/ss.h
+++ b/include/pcmcia/ss.h
@@ -107,10 +107,6 @@ typedef struct io_window_t {
 	struct resource		*res;
 } io_window_t;
 
-typedef struct window_t {
-	pccard_mem_map		ctl;
-} window_t;
-
 /* Maximum number of IO windows per socket */
 #define MAX_IO_WIN 2
 
@@ -150,7 +146,7 @@ struct pcmcia_socket {
 		u_int			Config;
 	} irq;
 	io_window_t			io[MAX_IO_WIN];
-	window_t			win[MAX_WIN];
+	pccard_mem_map			win[MAX_WIN];
 	struct list_head		cis_cache;
 	size_t				fake_cis_len;
 	u8				*fake_cis;
-- 
cgit v1.2.3-70-g09d2


From 6838b03fc6564ea07d0cd87ea6e198d90ab1fc3e Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Tue, 3 Nov 2009 01:31:52 +0100
Subject: pcmcia: pcmcia_request_window() doesn't need a pointer to a pointer

pcmcia_request_window() only needs a pointer to struct pcmcia_device, not
a pointer to a pointer.

CC: netdev@vger.kernel.org
CC: linux-wireless@vger.kernel.org
CC: linux-scsi@vger.kernel.org
CC: Jiri Kosina <jkosina@suse.cz>
Acked-by: Karsten Keil <keil@b1-systems.de> (for ISDN)
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 drivers/char/pcmcia/ipwireless/main.c         | 4 ++--
 drivers/isdn/hisax/sedlbauer_cs.c             | 2 +-
 drivers/mtd/maps/pcmciamtd.c                  | 2 +-
 drivers/net/pcmcia/fmvj18x_cs.c               | 4 ++--
 drivers/net/pcmcia/ibmtr_cs.c                 | 4 ++--
 drivers/net/pcmcia/pcnet_cs.c                 | 4 ++--
 drivers/net/pcmcia/smc91c92_cs.c              | 2 +-
 drivers/net/pcmcia/xirc2ps_cs.c               | 2 +-
 drivers/net/wireless/airo_cs.c                | 2 +-
 drivers/net/wireless/b43/pcmcia.c             | 2 +-
 drivers/net/wireless/netwave_cs.c             | 2 +-
 drivers/net/wireless/ray_cs.c                 | 6 +++---
 drivers/net/wireless/wavelan_cs.c             | 2 +-
 drivers/pcmcia/pcmcia_resource.c              | 6 +++---
 drivers/scsi/pcmcia/nsp_cs.c                  | 2 +-
 drivers/staging/comedi/drivers/ni_daq_700.c   | 2 +-
 drivers/staging/comedi/drivers/ni_daq_dio24.c | 2 +-
 drivers/staging/comedi/drivers/ni_labpc_cs.c  | 2 +-
 include/pcmcia/ds.h                           | 2 +-
 19 files changed, 27 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/drivers/char/pcmcia/ipwireless/main.c b/drivers/char/pcmcia/ipwireless/main.c
index 10af1c0dc368..082146a26c87 100644
--- a/drivers/char/pcmcia/ipwireless/main.c
+++ b/drivers/char/pcmcia/ipwireless/main.c
@@ -116,7 +116,7 @@ static int ipwireless_probe(struct pcmcia_device *p_dev,
 		ipw->request_common_memory.Size = 0x1000;
 	ipw->request_common_memory.AccessSpeed = 0;
 
-	ret = pcmcia_request_window(&p_dev, &ipw->request_common_memory,
+	ret = pcmcia_request_window(p_dev, &ipw->request_common_memory,
 				&ipw->handle_common_memory);
 
 	if (ret != 0)
@@ -145,7 +145,7 @@ static int ipwireless_probe(struct pcmcia_device *p_dev,
 	ipw->request_attr_memory.Size = 0;	/* this used to be 0x1000 */
 	ipw->request_attr_memory.AccessSpeed = 0;
 
-	ret = pcmcia_request_window(&p_dev, &ipw->request_attr_memory,
+	ret = pcmcia_request_window(p_dev, &ipw->request_attr_memory,
 				&ipw->handle_attr_memory);
 
 	if (ret != 0)
diff --git a/drivers/isdn/hisax/sedlbauer_cs.c b/drivers/isdn/hisax/sedlbauer_cs.c
index 598e17a551b7..331716fc6b30 100644
--- a/drivers/isdn/hisax/sedlbauer_cs.c
+++ b/drivers/isdn/hisax/sedlbauer_cs.c
@@ -272,7 +272,7 @@ static int sedlbauer_config_check(struct pcmcia_device *p_dev,
 		req->Base = mem->win[0].host_addr;
 		req->Size = mem->win[0].len;
 		req->AccessSpeed = 0;
-		if (pcmcia_request_window(&p_dev, req, &p_dev->win) != 0)
+		if (pcmcia_request_window(p_dev, req, &p_dev->win) != 0)
 			return -ENODEV;
 		map.Page = 0;
 		map.CardOffset = mem->win[0].card_addr;
diff --git a/drivers/mtd/maps/pcmciamtd.c b/drivers/mtd/maps/pcmciamtd.c
index 80b9005c3736..689d6a79ffc0 100644
--- a/drivers/mtd/maps/pcmciamtd.c
+++ b/drivers/mtd/maps/pcmciamtd.c
@@ -530,7 +530,7 @@ static int pcmciamtd_config(struct pcmcia_device *link)
 		int ret;
 		DEBUG(2, "requesting window with size = %dKiB memspeed = %d",
 		      req.Size >> 10, req.AccessSpeed);
-		ret = pcmcia_request_window(&link, &req, &link->win);
+		ret = pcmcia_request_window(link, &req, &link->win);
 		DEBUG(2, "ret = %d dev->win_size = %d", ret, dev->win_size);
 		if(ret) {
 			req.Size >>= 1;
diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c
index 6b9c79e4a2c0..85f7c4562707 100644
--- a/drivers/net/pcmcia/fmvj18x_cs.c
+++ b/drivers/net/pcmcia/fmvj18x_cs.c
@@ -567,7 +567,7 @@ static int fmvj18x_get_hwinfo(struct pcmcia_device *link, u_char *node_id)
     req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE;
     req.Base = 0; req.Size = 0;
     req.AccessSpeed = 0;
-    i = pcmcia_request_window(&link, &req, &link->win);
+    i = pcmcia_request_window(link, &req, &link->win);
     if (i != 0)
 	return -1;
 
@@ -618,7 +618,7 @@ static int fmvj18x_setup_mfc(struct pcmcia_device *link)
     req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE;
     req.Base = 0; req.Size = 0;
     req.AccessSpeed = 0;
-    i = pcmcia_request_window(&link, &req, &link->win);
+    i = pcmcia_request_window(link, &req, &link->win);
     if (i != 0)
 	return -1;
 
diff --git a/drivers/net/pcmcia/ibmtr_cs.c b/drivers/net/pcmcia/ibmtr_cs.c
index 6fc89eba9c8f..14fe5fa35d86 100644
--- a/drivers/net/pcmcia/ibmtr_cs.c
+++ b/drivers/net/pcmcia/ibmtr_cs.c
@@ -246,7 +246,7 @@ static int __devinit ibmtr_config(struct pcmcia_device *link)
     req.Base = 0; 
     req.Size = 0x2000;
     req.AccessSpeed = 250;
-    ret = pcmcia_request_window(&link, &req, &link->win);
+    ret = pcmcia_request_window(link, &req, &link->win);
     if (ret)
 	    goto failed;
 
@@ -263,7 +263,7 @@ static int __devinit ibmtr_config(struct pcmcia_device *link)
     req.Base = 0;
     req.Size = sramsize * 1024;
     req.AccessSpeed = 250;
-    ret = pcmcia_request_window(&link, &req, &info->sram_win_handle);
+    ret = pcmcia_request_window(link, &req, &info->sram_win_handle);
     if (ret)
 	    goto failed;
 
diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c
index 518b094c9f6d..d06a0ce7cb89 100644
--- a/drivers/net/pcmcia/pcnet_cs.c
+++ b/drivers/net/pcmcia/pcnet_cs.c
@@ -317,7 +317,7 @@ static hw_info_t *get_hwinfo(struct pcmcia_device *link)
     req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE;
     req.Base = 0; req.Size = 0;
     req.AccessSpeed = 0;
-    i = pcmcia_request_window(&link, &req, &link->win);
+    i = pcmcia_request_window(link, &req, &link->win);
     if (i != 0)
 	return NULL;
 
@@ -1491,7 +1491,7 @@ static int setup_shmem_window(struct pcmcia_device *link, int start_pg,
     req.Attributes |= WIN_USE_WAIT;
     req.Base = 0; req.Size = window_size;
     req.AccessSpeed = mem_speed;
-    ret = pcmcia_request_window(&link, &req, &link->win);
+    ret = pcmcia_request_window(link, &req, &link->win);
     if (ret)
 	    goto failed;
 
diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c
index 4ceaa45d7d81..c6ca3764ab6d 100644
--- a/drivers/net/pcmcia/smc91c92_cs.c
+++ b/drivers/net/pcmcia/smc91c92_cs.c
@@ -472,7 +472,7 @@ static int mhz_mfc_config(struct pcmcia_device *link)
     req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE;
     req.Base = req.Size = 0;
     req.AccessSpeed = 0;
-    i = pcmcia_request_window(&link, &req, &link->win);
+    i = pcmcia_request_window(link, &req, &link->win);
     if (i != 0)
 	    return -ENODEV;
 
diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c
index 9bc4d6002ae0..265852ab1a2d 100644
--- a/drivers/net/pcmcia/xirc2ps_cs.c
+++ b/drivers/net/pcmcia/xirc2ps_cs.c
@@ -926,7 +926,7 @@ xirc2ps_config(struct pcmcia_device * link)
 	req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE;
 	req.Base = req.Size = 0;
 	req.AccessSpeed = 0;
-	if ((err = pcmcia_request_window(&link, &req, &link->win)))
+	if ((err = pcmcia_request_window(link, &req, &link->win)))
 	    goto config_error;
 
 	local->dingo_ccr = ioremap(req.Base,0x1000) + 0x0800;
diff --git a/drivers/net/wireless/airo_cs.c b/drivers/net/wireless/airo_cs.c
index 2aa6978d76ca..34d775c18a31 100644
--- a/drivers/net/wireless/airo_cs.c
+++ b/drivers/net/wireless/airo_cs.c
@@ -257,7 +257,7 @@ static int airo_cs_config_check(struct pcmcia_device *p_dev,
 		req->Base = mem->win[0].host_addr;
 		req->Size = mem->win[0].len;
 		req->AccessSpeed = 0;
-		if (pcmcia_request_window(&p_dev, req, &p_dev->win) != 0)
+		if (pcmcia_request_window(p_dev, req, &p_dev->win) != 0)
 			return -ENODEV;
 		map.Page = 0;
 		map.CardOffset = mem->win[0].card_addr;
diff --git a/drivers/net/wireless/b43/pcmcia.c b/drivers/net/wireless/b43/pcmcia.c
index c0a34bbcb8d5..2588358294be 100644
--- a/drivers/net/wireless/b43/pcmcia.c
+++ b/drivers/net/wireless/b43/pcmcia.c
@@ -87,7 +87,7 @@ static int __devinit b43_pcmcia_probe(struct pcmcia_device *dev)
 	win.Base = 0;
 	win.Size = SSB_CORE_SIZE;
 	win.AccessSpeed = 250;
-	res = pcmcia_request_window(&dev, &win, &dev->win);
+	res = pcmcia_request_window(dev, &win, &dev->win);
 	if (res != 0)
 		goto err_kfree_ssb;
 
diff --git a/drivers/net/wireless/netwave_cs.c b/drivers/net/wireless/netwave_cs.c
index 4a8c62126c55..bd4eff79bd13 100644
--- a/drivers/net/wireless/netwave_cs.c
+++ b/drivers/net/wireless/netwave_cs.c
@@ -759,7 +759,7 @@ static int netwave_pcmcia_config(struct pcmcia_device *link) {
     req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_CM|WIN_ENABLE;
     req.Base = 0; req.Size = 0x8000;
     req.AccessSpeed = mem_speed;
-    ret = pcmcia_request_window(&link, &req, &link->win);
+    ret = pcmcia_request_window(link, &req, &link->win);
     if (ret)
 	    goto failed;
     mem.CardOffset = 0x20000; mem.Page = 0; 
diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c
index f719ffcb3c74..66e2d10cd754 100644
--- a/drivers/net/wireless/ray_cs.c
+++ b/drivers/net/wireless/ray_cs.c
@@ -438,7 +438,7 @@ static int ray_config(struct pcmcia_device *link)
 	req.Base = 0;
 	req.Size = 0x8000;
 	req.AccessSpeed = ray_mem_speed;
-	ret = pcmcia_request_window(&link, &req, &link->win);
+	ret = pcmcia_request_window(link, &req, &link->win);
 	if (ret)
 		goto failed;
 	mem.CardOffset = 0x0000;
@@ -454,7 +454,7 @@ static int ray_config(struct pcmcia_device *link)
 	req.Base = 0;
 	req.Size = 0x4000;
 	req.AccessSpeed = ray_mem_speed;
-	ret = pcmcia_request_window(&link, &req, &local->rmem_handle);
+	ret = pcmcia_request_window(link, &req, &local->rmem_handle);
 	if (ret)
 		goto failed;
 	mem.CardOffset = 0x8000;
@@ -470,7 +470,7 @@ static int ray_config(struct pcmcia_device *link)
 	req.Base = 0;
 	req.Size = 0x1000;
 	req.AccessSpeed = ray_mem_speed;
-	ret = pcmcia_request_window(&link, &req, &local->amem_handle);
+	ret = pcmcia_request_window(link, &req, &local->amem_handle);
 	if (ret)
 		goto failed;
 	mem.CardOffset = 0x0000;
diff --git a/drivers/net/wireless/wavelan_cs.c b/drivers/net/wireless/wavelan_cs.c
index d4df25d329d4..f8c5166fffe0 100644
--- a/drivers/net/wireless/wavelan_cs.c
+++ b/drivers/net/wireless/wavelan_cs.c
@@ -3876,7 +3876,7 @@ wv_pcmcia_config(struct pcmcia_device *	link)
       req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE;
       req.Base = req.Size = 0;
       req.AccessSpeed = mem_speed;
-      i = pcmcia_request_window(&link, &req, &link->win);
+      i = pcmcia_request_window(link, &req, &link->win);
       if (i != 0)
 	  break;
 
diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c
index 50468543d607..1cf7d54fb7e2 100644
--- a/drivers/pcmcia/pcmcia_resource.c
+++ b/drivers/pcmcia/pcmcia_resource.c
@@ -752,9 +752,9 @@ EXPORT_SYMBOL(pcmcia_request_irq);
  * Request_window() establishes a mapping between card memory space
  * and system memory space.
  */
-int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_handle_t *wh)
+int pcmcia_request_window(struct pcmcia_device *p_dev, win_req_t *req, window_handle_t *wh)
 {
-	struct pcmcia_socket *s = (*p_dev)->socket;
+	struct pcmcia_socket *s = p_dev->socket;
 	pccard_mem_map *win;
 	u_long align;
 	int w;
@@ -804,7 +804,7 @@ int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_h
 			return -EINVAL;
 		}
 	}
-	(*p_dev)->_win |= CLIENT_WIN_REQ(w);
+	p_dev->_win |= CLIENT_WIN_REQ(w);
 
 	/* Configure the socket controller */
 	win->map = w+1;
diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c
index c54108fc89a1..9dfd6f510b65 100644
--- a/drivers/scsi/pcmcia/nsp_cs.c
+++ b/drivers/scsi/pcmcia/nsp_cs.c
@@ -1684,7 +1684,7 @@ static int nsp_cs_config_check(struct pcmcia_device *p_dev,
 			if (cfg_mem->req.Size < 0x1000)
 				cfg_mem->req.Size = 0x1000;
 			cfg_mem->req.AccessSpeed = 0;
-			if (pcmcia_request_window(&p_dev, &cfg_mem->req, &p_dev->win) != 0)
+			if (pcmcia_request_window(p_dev, &cfg_mem->req, &p_dev->win) != 0)
 				goto next_entry;
 			map.Page = 0; map.CardOffset = mem->win[0].card_addr;
 			if (pcmcia_map_mem_page(p_dev, p_dev->win, &map) != 0)
diff --git a/drivers/staging/comedi/drivers/ni_daq_700.c b/drivers/staging/comedi/drivers/ni_daq_700.c
index aeb85b3ec7a7..7328a84ac500 100644
--- a/drivers/staging/comedi/drivers/ni_daq_700.c
+++ b/drivers/staging/comedi/drivers/ni_daq_700.c
@@ -612,7 +612,7 @@ static int dio700_pcmcia_config_loop(struct pcmcia_device *p_dev,
 		if (req->Size < 0x1000)
 			req->Size = 0x1000;
 		req->AccessSpeed = 0;
-		if (pcmcia_request_window(&p_dev, req, &p_dev->win))
+		if (pcmcia_request_window(p_dev, req, &p_dev->win))
 			return -ENODEV;
 		map.Page = 0;
 		map.CardOffset = mem->win[0].card_addr;
diff --git a/drivers/staging/comedi/drivers/ni_daq_dio24.c b/drivers/staging/comedi/drivers/ni_daq_dio24.c
index 0968a67b97cb..505631553ef7 100644
--- a/drivers/staging/comedi/drivers/ni_daq_dio24.c
+++ b/drivers/staging/comedi/drivers/ni_daq_dio24.c
@@ -363,7 +363,7 @@ static int dio24_pcmcia_config_loop(struct pcmcia_device *p_dev,
 		if (req->Size < 0x1000)
 			req->Size = 0x1000;
 		req->AccessSpeed = 0;
-		if (pcmcia_request_window(&p_dev, req, &p_dev->win))
+		if (pcmcia_request_window(p_dev, req, &p_dev->win))
 			return -ENODEV;
 		map.Page = 0;
 		map.CardOffset = mem->win[0].card_addr;
diff --git a/drivers/staging/comedi/drivers/ni_labpc_cs.c b/drivers/staging/comedi/drivers/ni_labpc_cs.c
index b4c7dfa54638..7fb5058ad43c 100644
--- a/drivers/staging/comedi/drivers/ni_labpc_cs.c
+++ b/drivers/staging/comedi/drivers/ni_labpc_cs.c
@@ -342,7 +342,7 @@ static int labpc_pcmcia_config_loop(struct pcmcia_device *p_dev,
 		if (req->Size < 0x1000)
 			req->Size = 0x1000;
 		req->AccessSpeed = 0;
-		if (pcmcia_request_window(&p_dev, req, &p_dev->win))
+		if (pcmcia_request_window(p_dev, req, &p_dev->win))
 			return -ENODEV;
 		map.Page = 0;
 		map.CardOffset = mem->win[0].card_addr;
diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h
index cbf5f05745f2..d6c55fdf8d01 100644
--- a/include/pcmcia/ds.h
+++ b/include/pcmcia/ds.h
@@ -199,7 +199,7 @@ int pcmcia_request_irq(struct pcmcia_device *p_dev, irq_req_t *req);
 int pcmcia_request_configuration(struct pcmcia_device *p_dev,
 				 config_req_t *req);
 
-int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req,
+int pcmcia_request_window(struct pcmcia_device *p_dev, win_req_t *req,
 			  window_handle_t *wh);
 int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t win);
 int pcmcia_map_mem_page(struct pcmcia_device *p_dev, window_handle_t win,
-- 
cgit v1.2.3-70-g09d2


From dd2e5a156525f11754d9b1e0583f6bb49c253d62 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Tue, 3 Nov 2009 10:27:34 +0100
Subject: pcmcia: remove deprecated handle_to_dev() macro

Update remaining users and remove deprecated handle_to_dev() macro

CC: Harald Welte <laforge@gnumonks.org>
CC: netdev@vger.kernel.org
CC: linux-wireless@vger.kernel.org
CC: linux-serial@vger.kernel.org
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 drivers/char/pcmcia/cm4000_cs.c            | 2 +-
 drivers/char/pcmcia/cm4040_cs.c            | 6 +++---
 drivers/net/pcmcia/3c574_cs.c              | 2 +-
 drivers/net/pcmcia/3c589_cs.c              | 2 +-
 drivers/net/pcmcia/axnet_cs.c              | 2 +-
 drivers/net/pcmcia/com20020_cs.c           | 2 +-
 drivers/net/pcmcia/fmvj18x_cs.c            | 2 +-
 drivers/net/pcmcia/ibmtr_cs.c              | 2 +-
 drivers/net/pcmcia/nmclan_cs.c             | 2 +-
 drivers/net/pcmcia/pcnet_cs.c              | 2 +-
 drivers/net/pcmcia/smc91c92_cs.c           | 2 +-
 drivers/net/pcmcia/xirc2ps_cs.c            | 2 +-
 drivers/net/wireless/airo_cs.c             | 2 +-
 drivers/net/wireless/atmel_cs.c            | 4 ++--
 drivers/net/wireless/hostap/hostap_cs.c    | 2 +-
 drivers/net/wireless/libertas/if_cs.c      | 4 ++--
 drivers/net/wireless/netwave_cs.c          | 2 +-
 drivers/net/wireless/orinoco/orinoco_cs.c  | 2 +-
 drivers/net/wireless/orinoco/spectrum_cs.c | 2 +-
 drivers/net/wireless/ray_cs.c              | 2 +-
 drivers/net/wireless/wavelan_cs.c          | 2 +-
 drivers/net/wireless/wl3501_cs.c           | 2 +-
 drivers/serial/serial_cs.c                 | 2 +-
 drivers/usb/host/sl811_cs.c                | 2 +-
 include/pcmcia/ds.h                        | 3 ---
 sound/pcmcia/pdaudiocf/pdaudiocf.c         | 2 +-
 sound/pcmcia/vx/vxpocket.c                 | 2 +-
 27 files changed, 30 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c
index 1611c4fe97fc..2db4c0a29b05 100644
--- a/drivers/char/pcmcia/cm4000_cs.c
+++ b/drivers/char/pcmcia/cm4000_cs.c
@@ -45,7 +45,7 @@
 
 /* #define ATR_CSUM */
 
-#define reader_to_dev(x)	(&handle_to_dev(x->p_dev))
+#define reader_to_dev(x)	(&x->p_dev->dev)
 
 /* n (debug level) is ignored */
 /* additional debug output may be enabled by re-compiling with
diff --git a/drivers/char/pcmcia/cm4040_cs.c b/drivers/char/pcmcia/cm4040_cs.c
index 38790db561a1..a6a70e476bea 100644
--- a/drivers/char/pcmcia/cm4040_cs.c
+++ b/drivers/char/pcmcia/cm4040_cs.c
@@ -39,7 +39,7 @@
 #include "cm4040_cs.h"
 
 
-#define reader_to_dev(x)	(&handle_to_dev(x->p_dev))
+#define reader_to_dev(x)	(&x->p_dev->dev)
 
 /* n (debug level) is ignored */
 /* additional debug output may be enabled by re-compiling with
@@ -539,7 +539,7 @@ static int cm4040_config_check(struct pcmcia_device *p_dev,
 	p_dev->io.IOAddrLines = cfg->io.flags & CISTPL_IO_LINES_MASK;
 
 	rc = pcmcia_request_io(p_dev, &p_dev->io);
-	dev_printk(KERN_INFO, &handle_to_dev(p_dev),
+	dev_printk(KERN_INFO, &p_dev->dev,
 		   "pcmcia_request_io returned 0x%x\n", rc);
 	return rc;
 }
@@ -561,7 +561,7 @@ static int reader_config(struct pcmcia_device *link, int devno)
 
 	fail_rc = pcmcia_request_configuration(link, &link->conf);
 	if (fail_rc != 0) {
-		dev_printk(KERN_INFO, &handle_to_dev(link),
+		dev_printk(KERN_INFO, &link->dev,
 			   "pcmcia_request_configuration failed 0x%x\n",
 			   fail_rc);
 		goto cs_release;
diff --git a/drivers/net/pcmcia/3c574_cs.c b/drivers/net/pcmcia/3c574_cs.c
index dbef5d9cc9db..8b65e18ab230 100644
--- a/drivers/net/pcmcia/3c574_cs.c
+++ b/drivers/net/pcmcia/3c574_cs.c
@@ -449,7 +449,7 @@ static int tc574_config(struct pcmcia_device *link)
 	}
 
 	link->dev_node = &lp->node;
-	SET_NETDEV_DEV(dev, &handle_to_dev(link));
+	SET_NETDEV_DEV(dev, &link->dev);
 
 	if (register_netdev(dev) != 0) {
 		printk(KERN_NOTICE "3c574_cs: register_netdev() failed\n");
diff --git a/drivers/net/pcmcia/3c589_cs.c b/drivers/net/pcmcia/3c589_cs.c
index 6eac62b7143c..c43c21ddb2d1 100644
--- a/drivers/net/pcmcia/3c589_cs.c
+++ b/drivers/net/pcmcia/3c589_cs.c
@@ -316,7 +316,7 @@ static int tc589_config(struct pcmcia_device *link)
 	printk(KERN_ERR "3c589_cs: invalid if_port requested\n");
     
     link->dev_node = &lp->node;
-    SET_NETDEV_DEV(dev, &handle_to_dev(link));
+    SET_NETDEV_DEV(dev, &link->dev);
 
     if (register_netdev(dev) != 0) {
 	printk(KERN_ERR "3c589_cs: register_netdev() failed\n");
diff --git a/drivers/net/pcmcia/axnet_cs.c b/drivers/net/pcmcia/axnet_cs.c
index 5af2ccfdb52d..0552dddd587f 100644
--- a/drivers/net/pcmcia/axnet_cs.c
+++ b/drivers/net/pcmcia/axnet_cs.c
@@ -400,7 +400,7 @@ static int axnet_config(struct pcmcia_device *link)
 
     info->phy_id = (i < 32) ? i : -1;
     link->dev_node = &info->node;
-    SET_NETDEV_DEV(dev, &handle_to_dev(link));
+    SET_NETDEV_DEV(dev, &link->dev);
 
     if (register_netdev(dev) != 0) {
 	printk(KERN_NOTICE "axnet_cs: register_netdev() failed\n");
diff --git a/drivers/net/pcmcia/com20020_cs.c b/drivers/net/pcmcia/com20020_cs.c
index 9a2e5006570b..51e9cb0a6d1e 100644
--- a/drivers/net/pcmcia/com20020_cs.c
+++ b/drivers/net/pcmcia/com20020_cs.c
@@ -302,7 +302,7 @@ static int com20020_config(struct pcmcia_device *link)
     lp->card_flags = ARC_CAN_10MBIT; /* pretend all of them can 10Mbit */
 
     link->dev_node = &info->node;
-    SET_NETDEV_DEV(dev, &handle_to_dev(link));
+    SET_NETDEV_DEV(dev, &link->dev);
 
     i = com20020_found(dev, 0);	/* calls register_netdev */
     
diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c
index 85f7c4562707..9b5ca37c6684 100644
--- a/drivers/net/pcmcia/fmvj18x_cs.c
+++ b/drivers/net/pcmcia/fmvj18x_cs.c
@@ -532,7 +532,7 @@ static int fmvj18x_config(struct pcmcia_device *link)
 
     lp->cardtype = cardtype;
     link->dev_node = &lp->node;
-    SET_NETDEV_DEV(dev, &handle_to_dev(link));
+    SET_NETDEV_DEV(dev, &link->dev);
 
     if (register_netdev(dev) != 0) {
 	printk(KERN_NOTICE "fmvj18x_cs: register_netdev() failed\n");
diff --git a/drivers/net/pcmcia/ibmtr_cs.c b/drivers/net/pcmcia/ibmtr_cs.c
index 14fe5fa35d86..76706e12d731 100644
--- a/drivers/net/pcmcia/ibmtr_cs.c
+++ b/drivers/net/pcmcia/ibmtr_cs.c
@@ -287,7 +287,7 @@ static int __devinit ibmtr_config(struct pcmcia_device *link)
     ibmtr_hw_setup(dev, mmiobase);
 
     link->dev_node = &info->node;
-    SET_NETDEV_DEV(dev, &handle_to_dev(link));
+    SET_NETDEV_DEV(dev, &link->dev);
 
     i = ibmtr_probe_card(dev);
     if (i != 0) {
diff --git a/drivers/net/pcmcia/nmclan_cs.c b/drivers/net/pcmcia/nmclan_cs.c
index a5363483ec6f..2d0c6f93ed8b 100644
--- a/drivers/net/pcmcia/nmclan_cs.c
+++ b/drivers/net/pcmcia/nmclan_cs.c
@@ -701,7 +701,7 @@ static int nmclan_config(struct pcmcia_device *link)
     printk(KERN_NOTICE "nmclan_cs: invalid if_port requested\n");
 
   link->dev_node = &lp->node;
-  SET_NETDEV_DEV(dev, &handle_to_dev(link));
+  SET_NETDEV_DEV(dev, &link->dev);
 
   i = register_netdev(dev);
   if (i != 0) {
diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c
index d06a0ce7cb89..30baee7b86a2 100644
--- a/drivers/net/pcmcia/pcnet_cs.c
+++ b/drivers/net/pcmcia/pcnet_cs.c
@@ -646,7 +646,7 @@ static int pcnet_config(struct pcmcia_device *link)
 	mii_phy_probe(dev);
 
     link->dev_node = &info->node;
-    SET_NETDEV_DEV(dev, &handle_to_dev(link));
+    SET_NETDEV_DEV(dev, &link->dev);
 
     if (register_netdev(dev) != 0) {
 	printk(KERN_NOTICE "pcnet_cs: register_netdev() failed\n");
diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c
index c6ca3764ab6d..2e795de06cb3 100644
--- a/drivers/net/pcmcia/smc91c92_cs.c
+++ b/drivers/net/pcmcia/smc91c92_cs.c
@@ -964,7 +964,7 @@ static int smc91c92_config(struct pcmcia_device *link)
     }
 
     link->dev_node = &smc->node;
-    SET_NETDEV_DEV(dev, &handle_to_dev(link));
+    SET_NETDEV_DEV(dev, &link->dev);
 
     if (register_netdev(dev) != 0) {
 	printk(KERN_ERR "smc91c92_cs: register_netdev() failed\n");
diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c
index 265852ab1a2d..f75ac716390d 100644
--- a/drivers/net/pcmcia/xirc2ps_cs.c
+++ b/drivers/net/pcmcia/xirc2ps_cs.c
@@ -992,7 +992,7 @@ xirc2ps_config(struct pcmcia_device * link)
 	do_reset(dev, 1); /* a kludge to make the cem56 work */
 
     link->dev_node = &local->node;
-    SET_NETDEV_DEV(dev, &handle_to_dev(link));
+    SET_NETDEV_DEV(dev, &link->dev);
 
     if ((err=register_netdev(dev))) {
 	printk(KNOT_XIRC "register_netdev() failed\n");
diff --git a/drivers/net/wireless/airo_cs.c b/drivers/net/wireless/airo_cs.c
index 34d775c18a31..7d3a96fdf5a3 100644
--- a/drivers/net/wireless/airo_cs.c
+++ b/drivers/net/wireless/airo_cs.c
@@ -322,7 +322,7 @@ static int airo_config(struct pcmcia_device *link)
 		goto failed;
 	((local_info_t *)link->priv)->eth_dev =
 		init_airo_card(link->irq.AssignedIRQ,
-			       link->io.BasePort1, 1, &handle_to_dev(link));
+			       link->io.BasePort1, 1, &link->dev);
 	if (!((local_info_t *)link->priv)->eth_dev)
 		goto failed;
 
diff --git a/drivers/net/wireless/atmel_cs.c b/drivers/net/wireless/atmel_cs.c
index 78385722a768..18a58b859223 100644
--- a/drivers/net/wireless/atmel_cs.c
+++ b/drivers/net/wireless/atmel_cs.c
@@ -260,7 +260,7 @@ static int atmel_config(struct pcmcia_device *link)
 	struct pcmcia_device_id *did;
 
 	dev = link->priv;
-	did = dev_get_drvdata(&handle_to_dev(link));
+	did = dev_get_drvdata(&link->dev);
 
 	dev_dbg(&link->dev, "atmel_config\n");
 
@@ -309,7 +309,7 @@ static int atmel_config(struct pcmcia_device *link)
 		init_atmel_card(link->irq.AssignedIRQ,
 				link->io.BasePort1,
 				did ? did->driver_info : ATMEL_FW_TYPE_NONE,
-				&handle_to_dev(link),
+				&link->dev,
 				card_present,
 				link);
 	if (!((local_info_t*)link->priv)->eth_dev)
diff --git a/drivers/net/wireless/hostap/hostap_cs.c b/drivers/net/wireless/hostap/hostap_cs.c
index ca3ab849ac03..243e912729b9 100644
--- a/drivers/net/wireless/hostap/hostap_cs.c
+++ b/drivers/net/wireless/hostap/hostap_cs.c
@@ -623,7 +623,7 @@ static int prism2_config(struct pcmcia_device *link)
 
 	/* Need to allocate net_device before requesting IRQ handler */
 	dev = prism2_init_local_data(&prism2_pccard_funcs, 0,
-				     &handle_to_dev(link));
+				     &link->dev);
 	if (dev == NULL)
 		goto failed;
 	link->priv = dev;
diff --git a/drivers/net/wireless/libertas/if_cs.c b/drivers/net/wireless/libertas/if_cs.c
index cb40c386fc77..f2b16559b686 100644
--- a/drivers/net/wireless/libertas/if_cs.c
+++ b/drivers/net/wireless/libertas/if_cs.c
@@ -590,7 +590,7 @@ static int if_cs_prog_helper(struct if_cs_card *card)
 
 	/* TODO: make firmware file configurable */
 	ret = request_firmware(&fw, "libertas_cs_helper.fw",
-		&handle_to_dev(card->p_dev));
+		&card->p_dev->dev);
 	if (ret) {
 		lbs_pr_err("can't load helper firmware\n");
 		ret = -ENODEV;
@@ -663,7 +663,7 @@ static int if_cs_prog_real(struct if_cs_card *card)
 
 	/* TODO: make firmware file configurable */
 	ret = request_firmware(&fw, "libertas_cs.fw",
-		&handle_to_dev(card->p_dev));
+		&card->p_dev->dev);
 	if (ret) {
 		lbs_pr_err("can't load firmware\n");
 		ret = -ENODEV;
diff --git a/drivers/net/wireless/netwave_cs.c b/drivers/net/wireless/netwave_cs.c
index bd4eff79bd13..f5333b7d2226 100644
--- a/drivers/net/wireless/netwave_cs.c
+++ b/drivers/net/wireless/netwave_cs.c
@@ -773,7 +773,7 @@ static int netwave_pcmcia_config(struct pcmcia_device *link) {
 
     dev->irq = link->irq.AssignedIRQ;
     dev->base_addr = link->io.BasePort1;
-    SET_NETDEV_DEV(dev, &handle_to_dev(link));
+    SET_NETDEV_DEV(dev, &link->dev);
 
     if (register_netdev(dev) != 0) {
 	printk(KERN_DEBUG "netwave_cs: register_netdev() failed\n");
diff --git a/drivers/net/wireless/orinoco/orinoco_cs.c b/drivers/net/wireless/orinoco/orinoco_cs.c
index da626ec663d0..688b39823459 100644
--- a/drivers/net/wireless/orinoco/orinoco_cs.c
+++ b/drivers/net/wireless/orinoco/orinoco_cs.c
@@ -109,7 +109,7 @@ orinoco_cs_probe(struct pcmcia_device *link)
 	struct orinoco_private *priv;
 	struct orinoco_pccard *card;
 
-	priv = alloc_orinocodev(sizeof(*card), &handle_to_dev(link),
+	priv = alloc_orinocodev(sizeof(*card), &link->dev,
 				orinoco_cs_hard_reset, NULL);
 	if (!priv)
 		return -ENOMEM;
diff --git a/drivers/net/wireless/orinoco/spectrum_cs.c b/drivers/net/wireless/orinoco/spectrum_cs.c
index 700010e9e346..c609371d1525 100644
--- a/drivers/net/wireless/orinoco/spectrum_cs.c
+++ b/drivers/net/wireless/orinoco/spectrum_cs.c
@@ -182,7 +182,7 @@ spectrum_cs_probe(struct pcmcia_device *link)
 	struct orinoco_private *priv;
 	struct orinoco_pccard *card;
 
-	priv = alloc_orinocodev(sizeof(*card), &handle_to_dev(link),
+	priv = alloc_orinocodev(sizeof(*card), &link->dev,
 				spectrum_cs_hard_reset,
 				spectrum_cs_stop_firmware);
 	if (!priv)
diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c
index 66e2d10cd754..5e0f4c3eac38 100644
--- a/drivers/net/wireless/ray_cs.c
+++ b/drivers/net/wireless/ray_cs.c
@@ -488,7 +488,7 @@ static int ray_config(struct pcmcia_device *link)
 		return -ENODEV;
 	}
 
-	SET_NETDEV_DEV(dev, &handle_to_dev(link));
+	SET_NETDEV_DEV(dev, &link->dev);
 	i = register_netdev(dev);
 	if (i != 0) {
 		printk("ray_config register_netdev() failed\n");
diff --git a/drivers/net/wireless/wavelan_cs.c b/drivers/net/wireless/wavelan_cs.c
index f8c5166fffe0..df3579aef505 100644
--- a/drivers/net/wireless/wavelan_cs.c
+++ b/drivers/net/wireless/wavelan_cs.c
@@ -3899,7 +3899,7 @@ wv_pcmcia_config(struct pcmcia_device *	link)
 	     lp->mem, dev->irq, (u_int) dev->base_addr);
 #endif
 
-      SET_NETDEV_DEV(dev, &handle_to_dev(link));
+      SET_NETDEV_DEV(dev, &link->dev);
       i = register_netdev(dev);
       if(i != 0)
 	{
diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c
index 7e8e269b4cb6..9a956c786738 100644
--- a/drivers/net/wireless/wl3501_cs.c
+++ b/drivers/net/wireless/wl3501_cs.c
@@ -1975,7 +1975,7 @@ static int wl3501_config(struct pcmcia_device *link)
 
 	dev->irq = link->irq.AssignedIRQ;
 	dev->base_addr = link->io.BasePort1;
-	SET_NETDEV_DEV(dev, &handle_to_dev(link));
+	SET_NETDEV_DEV(dev, &link->dev);
 	if (register_netdev(dev)) {
 		printk(KERN_NOTICE "wl3501_cs: register_netdev() failed\n");
 		goto failed;
diff --git a/drivers/serial/serial_cs.c b/drivers/serial/serial_cs.c
index 3b31bee85761..8d651a618455 100644
--- a/drivers/serial/serial_cs.c
+++ b/drivers/serial/serial_cs.c
@@ -387,7 +387,7 @@ static int setup_serial(struct pcmcia_device *handle, struct serial_info * info,
 	port.irq = irq;
 	port.flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_SHARE_IRQ;
 	port.uartclk = 1843200;
-	port.dev = &handle_to_dev(handle);
+	port.dev = &handle->dev;
 	if (buggy_uart)
 		port.flags |= UPF_BUGGY_UART;
 
diff --git a/drivers/usb/host/sl811_cs.c b/drivers/usb/host/sl811_cs.c
index 46077420dea5..f87aba6db4e8 100644
--- a/drivers/usb/host/sl811_cs.c
+++ b/drivers/usb/host/sl811_cs.c
@@ -185,7 +185,7 @@ static int sl811_cs_config_check(struct pcmcia_device *p_dev,
 
 static int sl811_cs_config(struct pcmcia_device *link)
 {
-	struct device		*parent = &handle_to_dev(link);
+	struct device		*parent = &link->dev;
 	local_info_t		*dev = link->priv;
 	int			ret;
 
diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h
index d6c55fdf8d01..d403c12f7978 100644
--- a/include/pcmcia/ds.h
+++ b/include/pcmcia/ds.h
@@ -138,9 +138,6 @@ struct pcmcia_device {
 #define to_pcmcia_dev(n) container_of(n, struct pcmcia_device, dev)
 #define to_pcmcia_drv(n) container_of(n, struct pcmcia_driver, drv)
 
-/* deprecated -- don't use! */
-#define handle_to_dev(handle) (handle->dev)
-
 
 /*
  * CIS access.
diff --git a/sound/pcmcia/pdaudiocf/pdaudiocf.c b/sound/pcmcia/pdaudiocf/pdaudiocf.c
index 64b859925c0b..447aaaee3be6 100644
--- a/sound/pcmcia/pdaudiocf/pdaudiocf.c
+++ b/sound/pcmcia/pdaudiocf/pdaudiocf.c
@@ -131,7 +131,7 @@ static int snd_pdacf_probe(struct pcmcia_device *link)
 		return err;
 	}
 
-	snd_card_set_dev(card, &handle_to_dev(link));
+	snd_card_set_dev(card, &link->dev);
 
 	pdacf->index = i;
 	card_list[i] = card;
diff --git a/sound/pcmcia/vx/vxpocket.c b/sound/pcmcia/vx/vxpocket.c
index 1492744ad67f..5a5db48a91a9 100644
--- a/sound/pcmcia/vx/vxpocket.c
+++ b/sound/pcmcia/vx/vxpocket.c
@@ -244,7 +244,7 @@ static int vxpocket_config(struct pcmcia_device *link)
 	if (ret)
 		goto failed;
 
-	chip->dev = &handle_to_dev(link);
+	chip->dev = &link->dev;
 	snd_card_set_dev(chip->card, chip->dev);
 
 	if (snd_vxpocket_assign_resources(chip, link->io.BasePort1, link->irq.AssignedIRQ) < 0)
-- 
cgit v1.2.3-70-g09d2


From 5fa9167a1bf5f5a4b7282f5e7ac56a4a5a1fa044 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Sun, 8 Nov 2009 17:24:46 +0100
Subject: pcmcia: rework the irq_req_t typedef

Most of the irq_req_t typedef'd struct can be re-worked quite
easily:

(1) IRQInfo2 was unused in any case, so drop it.

(2) IRQInfo1 was used write-only, so drop it.

(3) Instance (private data to be passed to the IRQ handler):
	Most PCMCIA drivers using pcmcia_request_irq() to actually
	register an IRQ handler set the "dev_id" to the same pointer
	as the "priv" pointer in struct pcmcia_device. Modify the two
	exceptions (ipwireless, ibmtr_cs) to also work this waym and
	set the IRQ handler's "dev_id" to p_dev->priv unconditionally.

(4) Handler is to be of type irq_handler_t.

(5) Handler != NULL already tells whether an IRQ handler is present.
	Therefore, we do not need the IRQ_HANDLER_PRESENT flag in
	irq_req_t.Attributes.

CC: netdev@vger.kernel.org
CC: linux-bluetooth@vger.kernel.org
CC: linux-ide@vger.kernel.org
CC: linux-wireless@vger.kernel.org
CC: linux-scsi@vger.kernel.org
CC: alsa-devel@alsa-project.org
CC: Jaroslav Kysela <perex@perex.cz>
CC: Jiri Kosina <jkosina@suse.cz>
CC: Karsten Keil <isdn@linux-pingi.de>
for the Bluetooth parts: Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 drivers/ata/pata_pcmcia.c                        |  1 -
 drivers/bluetooth/bluecard_cs.c                  |  4 +---
 drivers/bluetooth/bt3c_cs.c                      |  4 +---
 drivers/bluetooth/btuart_cs.c                    |  4 +---
 drivers/bluetooth/dtl1_cs.c                      |  4 +---
 drivers/char/pcmcia/ipwireless/hardware.c        |  8 ++++----
 drivers/char/pcmcia/ipwireless/main.c            |  6 +-----
 drivers/char/pcmcia/synclink_cs.c                |  3 ---
 drivers/ide/ide-cs.c                             |  1 -
 drivers/isdn/hardware/avm/avm_cs.c               |  2 --
 drivers/isdn/hisax/avma1_cs.c                    |  2 --
 drivers/isdn/hisax/elsa_cs.c                     |  1 -
 drivers/isdn/hisax/sedlbauer_cs.c                |  1 -
 drivers/isdn/hisax/teles_cs.c                    |  1 -
 drivers/net/pcmcia/3c574_cs.c                    |  4 +---
 drivers/net/pcmcia/3c589_cs.c                    |  4 +---
 drivers/net/pcmcia/axnet_cs.c                    |  1 -
 drivers/net/pcmcia/com20020_cs.c                 |  8 +++-----
 drivers/net/pcmcia/fmvj18x_cs.c                  |  6 ++----
 drivers/net/pcmcia/ibmtr_cs.c                    | 13 +++++++++----
 drivers/net/pcmcia/nmclan_cs.c                   |  4 +---
 drivers/net/pcmcia/pcnet_cs.c                    |  1 -
 drivers/net/pcmcia/smc91c92_cs.c                 |  8 +++-----
 drivers/net/pcmcia/xirc2ps_cs.c                  |  3 ---
 drivers/net/wireless/airo_cs.c                   |  1 -
 drivers/net/wireless/atmel_cs.c                  |  1 -
 drivers/net/wireless/b43/pcmcia.c                |  2 --
 drivers/net/wireless/hostap/hostap_cs.c          |  5 +----
 drivers/net/wireless/libertas/if_cs.c            |  1 -
 drivers/net/wireless/netwave_cs.c                |  5 +----
 drivers/net/wireless/orinoco/orinoco_cs.c        |  4 +---
 drivers/net/wireless/orinoco/spectrum_cs.c       |  4 +---
 drivers/net/wireless/ray_cs.c                    |  4 +---
 drivers/net/wireless/wavelan_cs.c                |  5 ++---
 drivers/net/wireless/wl3501_cs.c                 |  5 ++---
 drivers/parport/parport_cs.c                     |  1 -
 drivers/pcmcia/pcmcia_resource.c                 | 18 +++++++++---------
 drivers/scsi/pcmcia/aha152x_stub.c               |  1 -
 drivers/scsi/pcmcia/fdomain_stub.c               |  1 -
 drivers/scsi/pcmcia/nsp_cs.c                     |  4 +---
 drivers/scsi/pcmcia/qlogic_stub.c                |  1 -
 drivers/scsi/pcmcia/sym53c500_cs.c               |  1 -
 drivers/serial/serial_cs.c                       |  1 -
 drivers/staging/comedi/drivers/cb_das16_cs.c     |  1 -
 drivers/staging/comedi/drivers/das08_cs.c        |  1 -
 drivers/staging/comedi/drivers/ni_daq_700.c      |  1 -
 drivers/staging/comedi/drivers/ni_daq_dio24.c    |  1 -
 drivers/staging/comedi/drivers/ni_labpc_cs.c     |  1 -
 drivers/staging/comedi/drivers/ni_mio_cs.c       |  4 ----
 drivers/staging/comedi/drivers/quatech_daqp_cs.c |  4 +---
 drivers/usb/host/sl811_cs.c                      |  1 -
 include/pcmcia/cs.h                              | 14 ++++++++------
 sound/pcmcia/pdaudiocf/pdaudiocf.c               |  4 +---
 sound/pcmcia/vx/vxpocket.c                       |  4 +---
 54 files changed, 59 insertions(+), 136 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/pata_pcmcia.c b/drivers/ata/pata_pcmcia.c
index 5f94e214e17d..1b392c9e8531 100644
--- a/drivers/ata/pata_pcmcia.c
+++ b/drivers/ata/pata_pcmcia.c
@@ -268,7 +268,6 @@ static int pcmcia_init_one(struct pcmcia_device *pdev)
 	pdev->io.Attributes2 = IO_DATA_PATH_WIDTH_8;
 	pdev->io.IOAddrLines = 3;
 	pdev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING;
-	pdev->irq.IRQInfo1 = IRQ_LEVEL_ID;
 	pdev->conf.Attributes = CONF_ENABLE_IRQ;
 	pdev->conf.IntType = INT_MEMORY_AND_IO;
 
diff --git a/drivers/bluetooth/bluecard_cs.c b/drivers/bluetooth/bluecard_cs.c
index 1e0c4d822972..2acdc605cb4b 100644
--- a/drivers/bluetooth/bluecard_cs.c
+++ b/drivers/bluetooth/bluecard_cs.c
@@ -867,11 +867,9 @@ static int bluecard_probe(struct pcmcia_device *link)
 
 	link->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
 	link->io.NumPorts1 = 8;
-	link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT;
-	link->irq.IRQInfo1 = IRQ_LEVEL_ID;
+	link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING;
 
 	link->irq.Handler = bluecard_interrupt;
-	link->irq.Instance = info;
 
 	link->conf.Attributes = CONF_ENABLE_IRQ;
 	link->conf.IntType = INT_MEMORY_AND_IO;
diff --git a/drivers/bluetooth/bt3c_cs.c b/drivers/bluetooth/bt3c_cs.c
index 9787fda45d84..d814a2755ccb 100644
--- a/drivers/bluetooth/bt3c_cs.c
+++ b/drivers/bluetooth/bt3c_cs.c
@@ -659,11 +659,9 @@ static int bt3c_probe(struct pcmcia_device *link)
 
 	link->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
 	link->io.NumPorts1 = 8;
-	link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT;
-	link->irq.IRQInfo1 = IRQ_LEVEL_ID;
+	link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING;
 
 	link->irq.Handler = bt3c_interrupt;
-	link->irq.Instance = info;
 
 	link->conf.Attributes = CONF_ENABLE_IRQ;
 	link->conf.IntType = INT_MEMORY_AND_IO;
diff --git a/drivers/bluetooth/btuart_cs.c b/drivers/bluetooth/btuart_cs.c
index f44d75217b2b..d339464dc15e 100644
--- a/drivers/bluetooth/btuart_cs.c
+++ b/drivers/bluetooth/btuart_cs.c
@@ -588,11 +588,9 @@ static int btuart_probe(struct pcmcia_device *link)
 
 	link->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
 	link->io.NumPorts1 = 8;
-	link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT;
-	link->irq.IRQInfo1 = IRQ_LEVEL_ID;
+	link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING;
 
 	link->irq.Handler = btuart_interrupt;
-	link->irq.Instance = info;
 
 	link->conf.Attributes = CONF_ENABLE_IRQ;
 	link->conf.IntType = INT_MEMORY_AND_IO;
diff --git a/drivers/bluetooth/dtl1_cs.c b/drivers/bluetooth/dtl1_cs.c
index 7cd8614a8ea9..4f02a6f3c980 100644
--- a/drivers/bluetooth/dtl1_cs.c
+++ b/drivers/bluetooth/dtl1_cs.c
@@ -573,11 +573,9 @@ static int dtl1_probe(struct pcmcia_device *link)
 
 	link->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
 	link->io.NumPorts1 = 8;
-	link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT;
-	link->irq.IRQInfo1 = IRQ_LEVEL_ID;
+	link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING;
 
 	link->irq.Handler = dtl1_interrupt;
-	link->irq.Instance = info;
 
 	link->conf.Attributes = CONF_ENABLE_IRQ;
 	link->conf.IntType = INT_MEMORY_AND_IO;
diff --git a/drivers/char/pcmcia/ipwireless/hardware.c b/drivers/char/pcmcia/ipwireless/hardware.c
index 4c1820cad712..99cffdab1056 100644
--- a/drivers/char/pcmcia/ipwireless/hardware.c
+++ b/drivers/char/pcmcia/ipwireless/hardware.c
@@ -1213,12 +1213,12 @@ static irqreturn_t ipwireless_handle_v2_v3_interrupt(int irq,
 
 irqreturn_t ipwireless_interrupt(int irq, void *dev_id)
 {
-	struct ipw_hardware *hw = dev_id;
+	struct ipw_dev *ipw = dev_id;
 
-	if (hw->hw_version == HW_VERSION_1)
-		return ipwireless_handle_v1_interrupt(irq, hw);
+	if (ipw->hardware->hw_version == HW_VERSION_1)
+		return ipwireless_handle_v1_interrupt(irq, ipw->hardware);
 	else
-		return ipwireless_handle_v2_v3_interrupt(irq, hw);
+		return ipwireless_handle_v2_v3_interrupt(irq, ipw->hardware);
 }
 
 static void flush_packets_to_hw(struct ipw_hardware *hw)
diff --git a/drivers/char/pcmcia/ipwireless/main.c b/drivers/char/pcmcia/ipwireless/main.c
index 082146a26c87..dff24dae1485 100644
--- a/drivers/char/pcmcia/ipwireless/main.c
+++ b/drivers/char/pcmcia/ipwireless/main.c
@@ -93,8 +93,6 @@ static int ipwireless_probe(struct pcmcia_device *p_dev,
 	p_dev->io.NumPorts1 = cfg->io.win[0].len;
 	p_dev->io.IOAddrLines = 16;
 
-	p_dev->irq.IRQInfo1 = cfg->irq.IRQInfo1;
-
 	/* 0x40 causes it to generate level mode interrupts. */
 	/* 0x04 enables IREQ pin. */
 	p_dev->conf.ConfigIndex = cfg->index | 0x44;
@@ -197,9 +195,8 @@ static int config_ipwireless(struct ipw_dev *ipw)
 	link->conf.Attributes = CONF_ENABLE_IRQ;
 	link->conf.IntType = INT_MEMORY_AND_IO;
 
-	link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT;
+	link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING;
 	link->irq.Handler = ipwireless_interrupt;
-	link->irq.Instance = ipw->hardware;
 
 	INIT_WORK(&ipw->work_reboot, signalled_reboot_work);
 
@@ -315,7 +312,6 @@ static int ipwireless_attach(struct pcmcia_device *link)
 
 	ipw->link = link;
 	link->priv = ipw;
-	link->irq.Instance = ipw;
 
 	/* Link this device into our device list. */
 	link->dev_node = &ipw->nodes[0];
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index 09b2590adb8b..c31a0d913d37 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -554,7 +554,6 @@ static int mgslpc_probe(struct pcmcia_device *link)
 
     /* Interrupt setup */
     link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING;
-    link->irq.IRQInfo1   = IRQ_LEVEL_ID;
     link->irq.Handler = NULL;
 
     link->conf.Attributes = 0;
@@ -609,9 +608,7 @@ static int mgslpc_config(struct pcmcia_device *link)
     link->conf.ConfigIndex = 8;
     link->conf.Present = PRESENT_OPTION;
 
-    link->irq.Attributes |= IRQ_HANDLE_PRESENT;
     link->irq.Handler     = mgslpc_isr;
-    link->irq.Instance    = info;
 
     ret = pcmcia_request_irq(link, &link->irq);
     if (ret)
diff --git a/drivers/ide/ide-cs.c b/drivers/ide/ide-cs.c
index 6cee6c8d0782..dd6396384c25 100644
--- a/drivers/ide/ide-cs.c
+++ b/drivers/ide/ide-cs.c
@@ -103,7 +103,6 @@ static int ide_probe(struct pcmcia_device *link)
     link->io.Attributes2 = IO_DATA_PATH_WIDTH_8;
     link->io.IOAddrLines = 3;
     link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING;
-    link->irq.IRQInfo1 = IRQ_LEVEL_ID;
     link->conf.Attributes = CONF_ENABLE_IRQ;
     link->conf.IntType = INT_MEMORY_AND_IO;
 
diff --git a/drivers/isdn/hardware/avm/avm_cs.c b/drivers/isdn/hardware/avm/avm_cs.c
index d388eadb9fdb..5a6ae646a636 100644
--- a/drivers/isdn/hardware/avm/avm_cs.c
+++ b/drivers/isdn/hardware/avm/avm_cs.c
@@ -111,8 +111,6 @@ static int avmcs_probe(struct pcmcia_device *p_dev)
     p_dev->irq.Attributes = IRQ_TYPE_EXCLUSIVE;
     p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED;
 
-    p_dev->irq.IRQInfo1 = IRQ_LEVEL_ID;
-
     /* General socket configuration */
     p_dev->conf.Attributes = CONF_ENABLE_IRQ;
     p_dev->conf.IntType = INT_MEMORY_AND_IO;
diff --git a/drivers/isdn/hisax/avma1_cs.c b/drivers/isdn/hisax/avma1_cs.c
index 6d963f9a09ce..f9bdff39cf4a 100644
--- a/drivers/isdn/hisax/avma1_cs.c
+++ b/drivers/isdn/hisax/avma1_cs.c
@@ -123,8 +123,6 @@ static int avma1cs_probe(struct pcmcia_device *p_dev)
     p_dev->irq.Attributes = IRQ_TYPE_EXCLUSIVE;
     p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED;
 
-    p_dev->irq.IRQInfo1 = IRQ_LEVEL_ID;
-
     /* General socket configuration */
     p_dev->conf.Attributes = CONF_ENABLE_IRQ;
     p_dev->conf.IntType = INT_MEMORY_AND_IO;
diff --git a/drivers/isdn/hisax/elsa_cs.c b/drivers/isdn/hisax/elsa_cs.c
index cdcd2979fcd4..a2f709f53974 100644
--- a/drivers/isdn/hisax/elsa_cs.c
+++ b/drivers/isdn/hisax/elsa_cs.c
@@ -138,7 +138,6 @@ static int elsa_cs_probe(struct pcmcia_device *link)
 
     /* Interrupt setup */
     link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED;
-    link->irq.IRQInfo1 = IRQ_LEVEL_ID|IRQ_SHARE_ID;
     link->irq.Handler = NULL;
 
     /*
diff --git a/drivers/isdn/hisax/sedlbauer_cs.c b/drivers/isdn/hisax/sedlbauer_cs.c
index 331716fc6b30..af5d393cc2d0 100644
--- a/drivers/isdn/hisax/sedlbauer_cs.c
+++ b/drivers/isdn/hisax/sedlbauer_cs.c
@@ -145,7 +145,6 @@ static int sedlbauer_probe(struct pcmcia_device *link)
 
     /* Interrupt setup */
     link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED;
-    link->irq.IRQInfo1 = IRQ_LEVEL_ID;
     link->irq.Handler = NULL;
 
     /*
diff --git a/drivers/isdn/hisax/teles_cs.c b/drivers/isdn/hisax/teles_cs.c
index 7b11c15b3a97..ea705394ce2b 100644
--- a/drivers/isdn/hisax/teles_cs.c
+++ b/drivers/isdn/hisax/teles_cs.c
@@ -128,7 +128,6 @@ static int teles_probe(struct pcmcia_device *link)
 
     /* Interrupt setup */
     link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED;
-    link->irq.IRQInfo1 = IRQ_LEVEL_ID|IRQ_SHARE_ID;
     link->irq.Handler = NULL;
 
     /*
diff --git a/drivers/net/pcmcia/3c574_cs.c b/drivers/net/pcmcia/3c574_cs.c
index 8b65e18ab230..17a27225cc98 100644
--- a/drivers/net/pcmcia/3c574_cs.c
+++ b/drivers/net/pcmcia/3c574_cs.c
@@ -283,10 +283,8 @@ static int tc574_probe(struct pcmcia_device *link)
 	spin_lock_init(&lp->window_lock);
 	link->io.NumPorts1 = 32;
 	link->io.Attributes1 = IO_DATA_PATH_WIDTH_16;
-	link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_HANDLE_PRESENT;
-	link->irq.IRQInfo1 = IRQ_LEVEL_ID;
+	link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING;
 	link->irq.Handler = &el3_interrupt;
-	link->irq.Instance = dev;
 	link->conf.Attributes = CONF_ENABLE_IRQ;
 	link->conf.IntType = INT_MEMORY_AND_IO;
 	link->conf.ConfigIndex = 1;
diff --git a/drivers/net/pcmcia/3c589_cs.c b/drivers/net/pcmcia/3c589_cs.c
index c43c21ddb2d1..6f8d7e2e5922 100644
--- a/drivers/net/pcmcia/3c589_cs.c
+++ b/drivers/net/pcmcia/3c589_cs.c
@@ -194,10 +194,8 @@ static int tc589_probe(struct pcmcia_device *link)
     spin_lock_init(&lp->lock);
     link->io.NumPorts1 = 16;
     link->io.Attributes1 = IO_DATA_PATH_WIDTH_16;
-    link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_HANDLE_PRESENT;
-    link->irq.IRQInfo1 = IRQ_LEVEL_ID;
+    link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING;
     link->irq.Handler = &el3_interrupt;
-    link->irq.Instance = dev;
     link->conf.Attributes = CONF_ENABLE_IRQ;
     link->conf.IntType = INT_MEMORY_AND_IO;
     link->conf.ConfigIndex = 1;
diff --git a/drivers/net/pcmcia/axnet_cs.c b/drivers/net/pcmcia/axnet_cs.c
index 0552dddd587f..800597b82d18 100644
--- a/drivers/net/pcmcia/axnet_cs.c
+++ b/drivers/net/pcmcia/axnet_cs.c
@@ -170,7 +170,6 @@ static int axnet_probe(struct pcmcia_device *link)
     info->p_dev = link;
     link->priv = dev;
     link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING;
-    link->irq.IRQInfo1 = IRQ_LEVEL_ID;
     link->conf.Attributes = CONF_ENABLE_IRQ;
     link->conf.IntType = INT_MEMORY_AND_IO;
 
diff --git a/drivers/net/pcmcia/com20020_cs.c b/drivers/net/pcmcia/com20020_cs.c
index 51e9cb0a6d1e..21d9c9d815d1 100644
--- a/drivers/net/pcmcia/com20020_cs.c
+++ b/drivers/net/pcmcia/com20020_cs.c
@@ -164,11 +164,10 @@ static int com20020_probe(struct pcmcia_device *p_dev)
     p_dev->io.NumPorts1 = 16;
     p_dev->io.IOAddrLines = 16;
     p_dev->irq.Attributes = IRQ_TYPE_EXCLUSIVE;
-    p_dev->irq.IRQInfo1 = IRQ_LEVEL_ID;
     p_dev->conf.Attributes = CONF_ENABLE_IRQ;
     p_dev->conf.IntType = INT_MEMORY_AND_IO;
 
-    p_dev->irq.Instance = info->dev = dev;
+    info->dev = dev;
     p_dev->priv = info;
 
     return com20020_config(p_dev);
@@ -275,9 +274,8 @@ static int com20020_config(struct pcmcia_device *link)
     ioaddr = dev->base_addr = link->io.BasePort1;
     dev_dbg(&link->dev, "got ioaddr %Xh\n", ioaddr);
 
-    dev_dbg(&link->dev, "request IRQ %d (%Xh/%Xh)\n",
-	   link->irq.AssignedIRQ,
-	   link->irq.IRQInfo1, link->irq.IRQInfo2);
+    dev_dbg(&link->dev, "request IRQ %d\n",
+	    link->irq.AssignedIRQ);
     i = pcmcia_request_irq(link, &link->irq);
     if (i != 0)
     {
diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c
index 9b5ca37c6684..6e3e1ced6db4 100644
--- a/drivers/net/pcmcia/fmvj18x_cs.c
+++ b/drivers/net/pcmcia/fmvj18x_cs.c
@@ -255,10 +255,8 @@ static int fmvj18x_probe(struct pcmcia_device *link)
     link->io.IOAddrLines = 5;
 
     /* Interrupt setup */
-    link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_HANDLE_PRESENT;
-    link->irq.IRQInfo1 = IRQ_LEVEL_ID;
+    link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING;
     link->irq.Handler = &fjn_interrupt;
-    link->irq.Instance = dev;
 
     /* General socket configuration */
     link->conf.Attributes = CONF_ENABLE_IRQ;
@@ -428,7 +426,7 @@ static int fmvj18x_config(struct pcmcia_device *link)
 
     if (link->io.NumPorts2 != 0) {
     	link->irq.Attributes =
-		IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED|IRQ_HANDLE_PRESENT;
+		IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED;
 	ret = mfc_try_io_port(link);
 	if (ret != 0) goto failed;
     } else if (cardtype == UNGERMANN) {
diff --git a/drivers/net/pcmcia/ibmtr_cs.c b/drivers/net/pcmcia/ibmtr_cs.c
index 76706e12d731..37f4a6fdc3ef 100644
--- a/drivers/net/pcmcia/ibmtr_cs.c
+++ b/drivers/net/pcmcia/ibmtr_cs.c
@@ -119,6 +119,12 @@ static const struct ethtool_ops netdev_ethtool_ops = {
 	.get_drvinfo		= netdev_get_drvinfo,
 };
 
+static irqreturn_t ibmtr_interrupt(int irq, void *dev_id) {
+	ibmtr_dev_t *info = dev_id;
+	struct net_device *dev = info->dev;
+	return tok_interrupt(irq, dev);
+};
+
 /*======================================================================
 
     ibmtr_attach() creates an "instance" of the driver, allocating
@@ -150,14 +156,13 @@ static int __devinit ibmtr_attach(struct pcmcia_device *link)
     link->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
     link->io.NumPorts1 = 4;
     link->io.IOAddrLines = 16;
-    link->irq.Attributes = IRQ_TYPE_EXCLUSIVE | IRQ_HANDLE_PRESENT;
-    link->irq.IRQInfo1 = IRQ_LEVEL_ID;
-    link->irq.Handler = &tok_interrupt;
+    link->irq.Attributes = IRQ_TYPE_EXCLUSIVE;
+    link->irq.Handler = ibmtr_interrupt;
     link->conf.Attributes = CONF_ENABLE_IRQ;
     link->conf.IntType = INT_MEMORY_AND_IO;
     link->conf.Present = PRESENT_OPTION;
 
-    link->irq.Instance = info->dev = dev;
+    info->dev = dev;
 
     SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops);
 
diff --git a/drivers/net/pcmcia/nmclan_cs.c b/drivers/net/pcmcia/nmclan_cs.c
index 2d0c6f93ed8b..dae5ef6b2609 100644
--- a/drivers/net/pcmcia/nmclan_cs.c
+++ b/drivers/net/pcmcia/nmclan_cs.c
@@ -463,10 +463,8 @@ static int nmclan_probe(struct pcmcia_device *link)
     link->io.NumPorts1 = 32;
     link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
     link->io.IOAddrLines = 5;
-    link->irq.Attributes = IRQ_TYPE_EXCLUSIVE | IRQ_HANDLE_PRESENT;
-    link->irq.IRQInfo1 = IRQ_LEVEL_ID;
+    link->irq.Attributes = IRQ_TYPE_EXCLUSIVE;
     link->irq.Handler = &mace_interrupt;
-    link->irq.Instance = dev;
     link->conf.Attributes = CONF_ENABLE_IRQ;
     link->conf.IntType = INT_MEMORY_AND_IO;
     link->conf.ConfigIndex = 1;
diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c
index 30baee7b86a2..de2d10085635 100644
--- a/drivers/net/pcmcia/pcnet_cs.c
+++ b/drivers/net/pcmcia/pcnet_cs.c
@@ -266,7 +266,6 @@ static int pcnet_probe(struct pcmcia_device *link)
     link->priv = dev;
 
     link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING;
-    link->irq.IRQInfo1 = IRQ_LEVEL_ID;
     link->conf.Attributes = CONF_ENABLE_IRQ;
     link->conf.IntType = INT_MEMORY_AND_IO;
 
diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c
index 2e795de06cb3..9e0da370912e 100644
--- a/drivers/net/pcmcia/smc91c92_cs.c
+++ b/drivers/net/pcmcia/smc91c92_cs.c
@@ -329,10 +329,8 @@ static int smc91c92_probe(struct pcmcia_device *link)
     link->io.NumPorts1 = 16;
     link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
     link->io.IOAddrLines = 4;
-    link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_HANDLE_PRESENT;
-    link->irq.IRQInfo1 = IRQ_LEVEL_ID;
+    link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING;
     link->irq.Handler = &smc_interrupt;
-    link->irq.Instance = dev;
     link->conf.Attributes = CONF_ENABLE_IRQ;
     link->conf.IntType = INT_MEMORY_AND_IO;
 
@@ -456,7 +454,7 @@ static int mhz_mfc_config(struct pcmcia_device *link)
     link->conf.Attributes |= CONF_ENABLE_SPKR;
     link->conf.Status = CCSR_AUDIO_ENA;
     link->irq.Attributes =
-	IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED|IRQ_HANDLE_PRESENT;
+	IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED;
     link->io.IOAddrLines = 16;
     link->io.Attributes2 = IO_DATA_PATH_WIDTH_8;
     link->io.NumPorts2 = 8;
@@ -655,7 +653,7 @@ static int osi_config(struct pcmcia_device *link)
     link->conf.Attributes |= CONF_ENABLE_SPKR;
     link->conf.Status = CCSR_AUDIO_ENA;
     link->irq.Attributes =
-	IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED|IRQ_HANDLE_PRESENT;
+	IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED;
     link->io.NumPorts1 = 64;
     link->io.Attributes2 = IO_DATA_PATH_WIDTH_8;
     link->io.NumPorts2 = 8;
diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c
index f75ac716390d..fe504b7f369f 100644
--- a/drivers/net/pcmcia/xirc2ps_cs.c
+++ b/drivers/net/pcmcia/xirc2ps_cs.c
@@ -556,7 +556,6 @@ xirc2ps_probe(struct pcmcia_device *link)
     link->conf.IntType = INT_MEMORY_AND_IO;
     link->conf.ConfigIndex = 1;
     link->irq.Handler = xirc2ps_interrupt;
-    link->irq.Instance = dev;
 
     /* Fill in card specific entries */
     dev->netdev_ops = &netdev_ops;
@@ -835,8 +834,6 @@ xirc2ps_config(struct pcmcia_device * link)
 
     link->io.IOAddrLines =10;
     link->io.Attributes1 = IO_DATA_PATH_WIDTH_16;
-    link->irq.Attributes = IRQ_HANDLE_PRESENT;
-    link->irq.IRQInfo1 = IRQ_LEVEL_ID;
     if (local->modem) {
 	int pass;
 
diff --git a/drivers/net/wireless/airo_cs.c b/drivers/net/wireless/airo_cs.c
index 7d3a96fdf5a3..f6036fb42319 100644
--- a/drivers/net/wireless/airo_cs.c
+++ b/drivers/net/wireless/airo_cs.c
@@ -134,7 +134,6 @@ static int airo_probe(struct pcmcia_device *p_dev)
 
 	/* Interrupt setup */
 	p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING;
-	p_dev->irq.IRQInfo1 = IRQ_LEVEL_ID;
 	p_dev->irq.Handler = NULL;
 
 	/*
diff --git a/drivers/net/wireless/atmel_cs.c b/drivers/net/wireless/atmel_cs.c
index 18a58b859223..32407911842f 100644
--- a/drivers/net/wireless/atmel_cs.c
+++ b/drivers/net/wireless/atmel_cs.c
@@ -143,7 +143,6 @@ static int atmel_probe(struct pcmcia_device *p_dev)
 
 	/* Interrupt setup */
 	p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING;
-	p_dev->irq.IRQInfo1 = IRQ_LEVEL_ID;
 	p_dev->irq.Handler = NULL;
 
 	/*
diff --git a/drivers/net/wireless/b43/pcmcia.c b/drivers/net/wireless/b43/pcmcia.c
index 2588358294be..984174bc7b0f 100644
--- a/drivers/net/wireless/b43/pcmcia.c
+++ b/drivers/net/wireless/b43/pcmcia.c
@@ -98,9 +98,7 @@ static int __devinit b43_pcmcia_probe(struct pcmcia_device *dev)
 		goto err_disable;
 
 	dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING;
-	dev->irq.IRQInfo1 = IRQ_LEVEL_ID;
 	dev->irq.Handler = NULL; /* The handler is registered later. */
-	dev->irq.Instance = NULL;
 	res = pcmcia_request_irq(dev, &dev->irq);
 	if (res != 0)
 		goto err_disable;
diff --git a/drivers/net/wireless/hostap/hostap_cs.c b/drivers/net/wireless/hostap/hostap_cs.c
index 243e912729b9..c9640a3e02c9 100644
--- a/drivers/net/wireless/hostap/hostap_cs.c
+++ b/drivers/net/wireless/hostap/hostap_cs.c
@@ -641,11 +641,8 @@ static int prism2_config(struct pcmcia_device *link)
 	 * irq structure is initialized.
 	 */
 	if (link->conf.Attributes & CONF_ENABLE_IRQ) {
-		link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING |
-				       IRQ_HANDLE_PRESENT;
-		link->irq.IRQInfo1 = IRQ_LEVEL_ID;
+		link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING;
 		link->irq.Handler = prism2_interrupt;
-		link->irq.Instance = dev;
 		ret = pcmcia_request_irq(link, &link->irq);
 		if (ret)
 			goto failed;
diff --git a/drivers/net/wireless/libertas/if_cs.c b/drivers/net/wireless/libertas/if_cs.c
index f2b16559b686..b1d84592b959 100644
--- a/drivers/net/wireless/libertas/if_cs.c
+++ b/drivers/net/wireless/libertas/if_cs.c
@@ -837,7 +837,6 @@ static int if_cs_probe(struct pcmcia_device *p_dev)
 
 	p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING;
 	p_dev->irq.Handler = NULL;
-	p_dev->irq.IRQInfo1 = IRQ_INFO2_VALID | IRQ_LEVEL_ID;
 
 	p_dev->conf.Attributes = 0;
 	p_dev->conf.IntType = INT_MEMORY_AND_IO;
diff --git a/drivers/net/wireless/netwave_cs.c b/drivers/net/wireless/netwave_cs.c
index f5333b7d2226..e61e6b9440ab 100644
--- a/drivers/net/wireless/netwave_cs.c
+++ b/drivers/net/wireless/netwave_cs.c
@@ -384,8 +384,7 @@ static int netwave_probe(struct pcmcia_device *link)
     link->io.IOAddrLines = 5;
     
     /* Interrupt setup */
-    link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT;
-    link->irq.IRQInfo1 = IRQ_LEVEL_ID;
+    link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING;
     link->irq.Handler = &netwave_interrupt;
     
     /* General socket configuration */
@@ -404,8 +403,6 @@ static int netwave_probe(struct pcmcia_device *link)
 
     dev->watchdog_timeo = TX_TIMEOUT;
 
-    link->irq.Instance = dev;
-
     return netwave_pcmcia_config( link);
 } /* netwave_attach */
 
diff --git a/drivers/net/wireless/orinoco/orinoco_cs.c b/drivers/net/wireless/orinoco/orinoco_cs.c
index 688b39823459..f27bb8367c98 100644
--- a/drivers/net/wireless/orinoco/orinoco_cs.c
+++ b/drivers/net/wireless/orinoco/orinoco_cs.c
@@ -120,10 +120,8 @@ orinoco_cs_probe(struct pcmcia_device *link)
 	link->priv = priv;
 
 	/* Interrupt setup */
-	link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT;
-	link->irq.IRQInfo1 = IRQ_LEVEL_ID;
+	link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING;
 	link->irq.Handler = orinoco_interrupt;
-	link->irq.Instance = priv;
 
 	/* General socket configuration defaults can go here.  In this
 	 * client, we assume very little, and rely on the CIS for
diff --git a/drivers/net/wireless/orinoco/spectrum_cs.c b/drivers/net/wireless/orinoco/spectrum_cs.c
index c609371d1525..59bda240fdc2 100644
--- a/drivers/net/wireless/orinoco/spectrum_cs.c
+++ b/drivers/net/wireless/orinoco/spectrum_cs.c
@@ -194,10 +194,8 @@ spectrum_cs_probe(struct pcmcia_device *link)
 	link->priv = priv;
 
 	/* Interrupt setup */
-	link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT;
-	link->irq.IRQInfo1 = IRQ_LEVEL_ID;
+	link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING;
 	link->irq.Handler = orinoco_interrupt;
-	link->irq.Instance = priv;
 
 	/* General socket configuration defaults can go here.  In this
 	 * client, we assume very little, and rely on the CIS for
diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c
index 5e0f4c3eac38..91213f9e2c4f 100644
--- a/drivers/net/wireless/ray_cs.c
+++ b/drivers/net/wireless/ray_cs.c
@@ -323,8 +323,7 @@ static int ray_probe(struct pcmcia_device *p_dev)
 	p_dev->io.IOAddrLines = 5;
 
 	/* Interrupt setup. For PCMCIA, driver takes what's given */
-	p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT;
-	p_dev->irq.IRQInfo1 = IRQ_LEVEL_ID;
+	p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING;
 	p_dev->irq.Handler = &ray_interrupt;
 
 	/* General socket configuration */
@@ -333,7 +332,6 @@ static int ray_probe(struct pcmcia_device *p_dev)
 	p_dev->conf.ConfigIndex = 1;
 
 	p_dev->priv = dev;
-	p_dev->irq.Instance = dev;
 
 	local->finder = p_dev;
 	local->card_status = CARD_INSERTED;
diff --git a/drivers/net/wireless/wavelan_cs.c b/drivers/net/wireless/wavelan_cs.c
index df3579aef505..33918fd5b231 100644
--- a/drivers/net/wireless/wavelan_cs.c
+++ b/drivers/net/wireless/wavelan_cs.c
@@ -4438,8 +4438,7 @@ wavelan_probe(struct pcmcia_device *p_dev)
   p_dev->io.IOAddrLines = 3;
 
   /* Interrupt setup */
-  p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT;
-  p_dev->irq.IRQInfo1 = IRQ_LEVEL_ID;
+  p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING;
   p_dev->irq.Handler = wavelan_interrupt;
 
   /* General socket configuration */
@@ -4451,7 +4450,7 @@ wavelan_probe(struct pcmcia_device *p_dev)
   if (!dev)
       return -ENOMEM;
 
-  p_dev->priv = p_dev->irq.Instance = dev;
+  p_dev->priv = dev;
 
   lp = netdev_priv(dev);
 
diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c
index 9a956c786738..5f0401a52cff 100644
--- a/drivers/net/wireless/wl3501_cs.c
+++ b/drivers/net/wireless/wl3501_cs.c
@@ -1898,8 +1898,7 @@ static int wl3501_probe(struct pcmcia_device *p_dev)
 	p_dev->io.IOAddrLines	= 5;
 
 	/* Interrupt setup */
-	p_dev->irq.Attributes	= IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT;
-	p_dev->irq.IRQInfo1	= IRQ_LEVEL_ID;
+	p_dev->irq.Attributes	= IRQ_TYPE_DYNAMIC_SHARING;
 	p_dev->irq.Handler = wl3501_interrupt;
 
 	/* General socket configuration */
@@ -1922,7 +1921,7 @@ static int wl3501_probe(struct pcmcia_device *p_dev)
 	dev->wireless_handlers	= &wl3501_handler_def;
 	SET_ETHTOOL_OPS(dev, &ops);
 	netif_stop_queue(dev);
-	p_dev->priv = p_dev->irq.Instance = dev;
+	p_dev->priv = dev;
 
 	return wl3501_config(p_dev);
 out_link:
diff --git a/drivers/parport/parport_cs.c b/drivers/parport/parport_cs.c
index e56a4dea6717..7dd370fa3439 100644
--- a/drivers/parport/parport_cs.c
+++ b/drivers/parport/parport_cs.c
@@ -106,7 +106,6 @@ static int parport_probe(struct pcmcia_device *link)
     link->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
     link->io.Attributes2 = IO_DATA_PATH_WIDTH_8;
     link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING;
-    link->irq.IRQInfo1 = IRQ_LEVEL_ID;
     link->conf.Attributes = CONF_ENABLE_IRQ;
     link->conf.IntType = INT_MEMORY_AND_IO;
 
diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c
index 1cf7d54fb7e2..a8bf8c1b45ed 100644
--- a/drivers/pcmcia/pcmcia_resource.c
+++ b/drivers/pcmcia/pcmcia_resource.c
@@ -383,8 +383,8 @@ static int pcmcia_release_irq(struct pcmcia_device *p_dev, irq_req_t *req)
 		s->irq.AssignedIRQ = 0;
 	}
 
-	if (req->Attributes & IRQ_HANDLE_PRESENT) {
-		free_irq(req->AssignedIRQ, req->Instance);
+	if (req->Handler) {
+		free_irq(req->AssignedIRQ, p_dev->priv);
 	}
 
 #ifdef CONFIG_PCMCIA_PROBE
@@ -664,7 +664,7 @@ int pcmcia_request_irq(struct pcmcia_device *p_dev, irq_req_t *req)
 	/* if the underlying IRQ infrastructure allows for it, only allocate
 	 * the IRQ, but do not enable it
 	 */
-	if (!(req->Attributes & IRQ_HANDLE_PRESENT))
+	if (!(req->Handler))
 		type |= IRQ_NOAUTOEN;
 #endif /* IRQ_NOAUTOEN */
 
@@ -674,7 +674,7 @@ int pcmcia_request_irq(struct pcmcia_device *p_dev, irq_req_t *req)
 	} else {
 		int try;
 		u32 mask = s->irq_mask;
-		void *data = &p_dev->dev.driver; /* something unique to this device */
+		void *data = p_dev; /* something unique to this device */
 
 		for (try = 0; try < 64; try++) {
 			irq = try % 32;
@@ -691,12 +691,12 @@ int pcmcia_request_irq(struct pcmcia_device *p_dev, irq_req_t *req)
 			 * registering a dummy handle works, i.e. if the IRQ isn't
 			 * marked as used by the kernel resource management core */
 			ret = request_irq(irq,
-					  (req->Attributes & IRQ_HANDLE_PRESENT) ? req->Handler : test_action,
+					  (req->Handler) ? req->Handler : test_action,
 					  type,
 					  p_dev->devname,
-					  (req->Attributes & IRQ_HANDLE_PRESENT) ? req->Instance : data);
+					  (req->Handler) ? p_dev->priv : data);
 			if (!ret) {
-				if (!(req->Attributes & IRQ_HANDLE_PRESENT))
+				if (!req->Handler)
 					free_irq(irq, data);
 				break;
 			}
@@ -713,9 +713,9 @@ int pcmcia_request_irq(struct pcmcia_device *p_dev, irq_req_t *req)
 		irq = s->pci_irq;
 	}
 
-	if (ret && (req->Attributes & IRQ_HANDLE_PRESENT)) {
+	if (ret && req->Handler) {
 		ret = request_irq(irq, req->Handler, type,
-				  p_dev->devname, req->Instance);
+				  p_dev->devname, p_dev->priv);
 		if (ret) {
 			dev_printk(KERN_INFO, &s->dev,
 				"request_irq() failed\n");
diff --git a/drivers/scsi/pcmcia/aha152x_stub.c b/drivers/scsi/pcmcia/aha152x_stub.c
index 4329e4e5043d..528733b4a392 100644
--- a/drivers/scsi/pcmcia/aha152x_stub.c
+++ b/drivers/scsi/pcmcia/aha152x_stub.c
@@ -106,7 +106,6 @@ static int aha152x_probe(struct pcmcia_device *link)
     link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
     link->io.IOAddrLines = 10;
     link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING;
-    link->irq.IRQInfo1 = IRQ_LEVEL_ID;
     link->conf.Attributes = CONF_ENABLE_IRQ;
     link->conf.IntType = INT_MEMORY_AND_IO;
     link->conf.Present = PRESENT_OPTION;
diff --git a/drivers/scsi/pcmcia/fdomain_stub.c b/drivers/scsi/pcmcia/fdomain_stub.c
index 5792b55c9d3c..914040684079 100644
--- a/drivers/scsi/pcmcia/fdomain_stub.c
+++ b/drivers/scsi/pcmcia/fdomain_stub.c
@@ -89,7 +89,6 @@ static int fdomain_probe(struct pcmcia_device *link)
 	link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
 	link->io.IOAddrLines = 10;
 	link->irq.Attributes = IRQ_TYPE_EXCLUSIVE;
-	link->irq.IRQInfo1 = IRQ_LEVEL_ID;
 	link->conf.Attributes = CONF_ENABLE_IRQ;
 	link->conf.IntType = INT_MEMORY_AND_IO;
 	link->conf.Present = PRESENT_OPTION;
diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c
index 9dfd6f510b65..c2341af587a3 100644
--- a/drivers/scsi/pcmcia/nsp_cs.c
+++ b/drivers/scsi/pcmcia/nsp_cs.c
@@ -1564,12 +1564,10 @@ static int nsp_cs_probe(struct pcmcia_device *link)
 	link->io.IOAddrLines	 = 10;	/* not used */
 
 	/* Interrupt setup */
-	link->irq.Attributes	 = IRQ_TYPE_EXCLUSIVE | IRQ_HANDLE_PRESENT;
-	link->irq.IRQInfo1	 = IRQ_LEVEL_ID;
+	link->irq.Attributes	 = IRQ_TYPE_EXCLUSIVE;
 
 	/* Interrupt handler */
 	link->irq.Handler	 = &nspintr;
-	link->irq.Instance       = info;
 	link->irq.Attributes     |= IRQF_SHARED;
 
 	/* General socket configuration */
diff --git a/drivers/scsi/pcmcia/qlogic_stub.c b/drivers/scsi/pcmcia/qlogic_stub.c
index 65d7ad58433f..f85f094870b4 100644
--- a/drivers/scsi/pcmcia/qlogic_stub.c
+++ b/drivers/scsi/pcmcia/qlogic_stub.c
@@ -162,7 +162,6 @@ static int qlogic_probe(struct pcmcia_device *link)
 	link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
 	link->io.IOAddrLines = 10;
 	link->irq.Attributes = IRQ_TYPE_EXCLUSIVE;
-	link->irq.IRQInfo1 = IRQ_LEVEL_ID;
 	link->conf.Attributes = CONF_ENABLE_IRQ;
 	link->conf.IntType = INT_MEMORY_AND_IO;
 	link->conf.Present = PRESENT_OPTION;
diff --git a/drivers/scsi/pcmcia/sym53c500_cs.c b/drivers/scsi/pcmcia/sym53c500_cs.c
index 851a41ce4f06..e7564d8f0cbf 100644
--- a/drivers/scsi/pcmcia/sym53c500_cs.c
+++ b/drivers/scsi/pcmcia/sym53c500_cs.c
@@ -867,7 +867,6 @@ SYM53C500_probe(struct pcmcia_device *link)
 	link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
 	link->io.IOAddrLines = 10;
 	link->irq.Attributes = IRQ_TYPE_EXCLUSIVE;
-	link->irq.IRQInfo1 = IRQ_LEVEL_ID;
 	link->conf.Attributes = CONF_ENABLE_IRQ;
 	link->conf.IntType = INT_MEMORY_AND_IO;
 
diff --git a/drivers/serial/serial_cs.c b/drivers/serial/serial_cs.c
index 8d651a618455..5d6f947ef7ff 100644
--- a/drivers/serial/serial_cs.c
+++ b/drivers/serial/serial_cs.c
@@ -334,7 +334,6 @@ static int serial_probe(struct pcmcia_device *link)
 	link->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
 	link->io.NumPorts1 = 8;
 	link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING;
-	link->irq.IRQInfo1 = IRQ_LEVEL_ID;
 	link->conf.Attributes = CONF_ENABLE_IRQ;
 	if (do_sound) {
 		link->conf.Attributes |= CONF_ENABLE_SPKR;
diff --git a/drivers/staging/comedi/drivers/cb_das16_cs.c b/drivers/staging/comedi/drivers/cb_das16_cs.c
index 9e758027efee..39923cb388be 100644
--- a/drivers/staging/comedi/drivers/cb_das16_cs.c
+++ b/drivers/staging/comedi/drivers/cb_das16_cs.c
@@ -703,7 +703,6 @@ static int das16cs_pcmcia_attach(struct pcmcia_device *link)
 	/* Initialize the pcmcia_device structure */
 	/* Interrupt setup */
 	link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING;
-	link->irq.IRQInfo1 = IRQ_LEVEL_ID;
 	link->irq.Handler = NULL;
 
 	link->conf.Attributes = 0;
diff --git a/drivers/staging/comedi/drivers/das08_cs.c b/drivers/staging/comedi/drivers/das08_cs.c
index 384a77a37c26..9b945e5fdd32 100644
--- a/drivers/staging/comedi/drivers/das08_cs.c
+++ b/drivers/staging/comedi/drivers/das08_cs.c
@@ -173,7 +173,6 @@ static int das08_pcmcia_attach(struct pcmcia_device *link)
 
 	/* Interrupt setup */
 	link->irq.Attributes = IRQ_TYPE_EXCLUSIVE;
-	link->irq.IRQInfo1 = IRQ_LEVEL_ID;
 	link->irq.Handler = NULL;
 
 	/*
diff --git a/drivers/staging/comedi/drivers/ni_daq_700.c b/drivers/staging/comedi/drivers/ni_daq_700.c
index 7328a84ac500..ef5e1183d47d 100644
--- a/drivers/staging/comedi/drivers/ni_daq_700.c
+++ b/drivers/staging/comedi/drivers/ni_daq_700.c
@@ -503,7 +503,6 @@ static int dio700_cs_attach(struct pcmcia_device *link)
 
 	/* Interrupt setup */
 	link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING;
-	link->irq.IRQInfo1 = IRQ_LEVEL_ID;
 	link->irq.Handler = NULL;
 
 	/*
diff --git a/drivers/staging/comedi/drivers/ni_daq_dio24.c b/drivers/staging/comedi/drivers/ni_daq_dio24.c
index 505631553ef7..9017be3a92f1 100644
--- a/drivers/staging/comedi/drivers/ni_daq_dio24.c
+++ b/drivers/staging/comedi/drivers/ni_daq_dio24.c
@@ -254,7 +254,6 @@ static int dio24_cs_attach(struct pcmcia_device *link)
 
 	/* Interrupt setup */
 	link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING;
-	link->irq.IRQInfo1 = IRQ_LEVEL_ID;
 	link->irq.Handler = NULL;
 
 	/*
diff --git a/drivers/staging/comedi/drivers/ni_labpc_cs.c b/drivers/staging/comedi/drivers/ni_labpc_cs.c
index 7fb5058ad43c..7d514b3ee754 100644
--- a/drivers/staging/comedi/drivers/ni_labpc_cs.c
+++ b/drivers/staging/comedi/drivers/ni_labpc_cs.c
@@ -230,7 +230,6 @@ static int labpc_cs_attach(struct pcmcia_device *link)
 
 	/* Interrupt setup */
 	link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_FORCED_PULSE;
-	link->irq.IRQInfo1 = IRQ_INFO2_VALID | IRQ_PULSE_ID;
 	link->irq.Handler = NULL;
 
 	/*
diff --git a/drivers/staging/comedi/drivers/ni_mio_cs.c b/drivers/staging/comedi/drivers/ni_mio_cs.c
index ca7ab4abdc2e..d692f4bb47ea 100644
--- a/drivers/staging/comedi/drivers/ni_mio_cs.c
+++ b/drivers/staging/comedi/drivers/ni_mio_cs.c
@@ -274,7 +274,6 @@ static int cs_attach(struct pcmcia_device *link)
 	link->io.Attributes1 = IO_DATA_PATH_WIDTH_16;
 	link->io.NumPorts1 = 16;
 	link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING;
-	link->irq.IRQInfo1 = IRQ_LEVEL_ID;
 	link->conf.Attributes = CONF_ENABLE_IRQ;
 	link->conf.IntType = INT_MEMORY_AND_IO;
 
@@ -325,9 +324,6 @@ static int mio_pcmcia_config_loop(struct pcmcia_device *p_dev,
 	p_dev->io.IOAddrLines = cfg->io.flags & CISTPL_IO_LINES_MASK;
 	p_dev->io.NumPorts2 = 0;
 
-	p_dev->irq.IRQInfo1 = cfg->irq.IRQInfo1;
-	p_dev->irq.IRQInfo2 = cfg->irq.IRQInfo2;
-
 	for (base = 0x000; base < 0x400; base += 0x20) {
 		p_dev->io.BasePort1 = base;
 		ret = pcmcia_request_io(p_dev, &p_dev->io);
diff --git a/drivers/staging/comedi/drivers/quatech_daqp_cs.c b/drivers/staging/comedi/drivers/quatech_daqp_cs.c
index 48e7c27ed87a..5256fd933162 100644
--- a/drivers/staging/comedi/drivers/quatech_daqp_cs.c
+++ b/drivers/staging/comedi/drivers/quatech_daqp_cs.c
@@ -1041,10 +1041,8 @@ static int daqp_cs_attach(struct pcmcia_device *link)
 	link->priv = local;
 
 	/* Interrupt setup */
-	link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT;
-	link->irq.IRQInfo1 = IRQ_LEVEL_ID;
+	link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING;
 	link->irq.Handler = daqp_interrupt;
-	link->irq.Instance = local;
 
 	/*
 	   General socket configuration defaults can go here.  In this
diff --git a/drivers/usb/host/sl811_cs.c b/drivers/usb/host/sl811_cs.c
index f87aba6db4e8..39d253e841f6 100644
--- a/drivers/usb/host/sl811_cs.c
+++ b/drivers/usb/host/sl811_cs.c
@@ -243,7 +243,6 @@ static int sl811_cs_probe(struct pcmcia_device *link)
 
 	/* Initialize */
 	link->irq.Attributes = IRQ_TYPE_EXCLUSIVE;
-	link->irq.IRQInfo1 = IRQ_INFO2_VALID|IRQ_LEVEL_ID;
 	link->irq.Handler = NULL;
 
 	link->conf.Attributes = 0;
diff --git a/include/pcmcia/cs.h b/include/pcmcia/cs.h
index 904468a191ef..afc2bfb9e917 100644
--- a/include/pcmcia/cs.h
+++ b/include/pcmcia/cs.h
@@ -15,6 +15,10 @@
 #ifndef _LINUX_CS_H
 #define _LINUX_CS_H
 
+#ifdef __KERNEL__
+#include <linux/interrupt.h>
+#endif
+
 /* For AccessConfigurationRegister */
 typedef struct conf_reg_t {
     u_char	Function;
@@ -111,11 +115,9 @@ typedef struct io_req_t {
 
 /* For RequestIRQ and ReleaseIRQ */
 typedef struct irq_req_t {
-    u_int	Attributes;
-    u_int	AssignedIRQ;
-    u_int	IRQInfo1, IRQInfo2; /* IRQInfo2 is ignored */
-    void	*Handler;
-    void	*Instance;
+	u_int		Attributes;
+	u_int		AssignedIRQ;
+	irq_handler_t	Handler;
 } irq_req_t;
 
 /* Attributes for RequestIRQ and ReleaseIRQ */
@@ -125,7 +127,7 @@ typedef struct irq_req_t {
 #define IRQ_TYPE_DYNAMIC_SHARING	0x02
 #define IRQ_FORCED_PULSE		0x04
 #define IRQ_FIRST_SHARED		0x08
-#define IRQ_HANDLE_PRESENT		0x10
+//#define IRQ_HANDLE_PRESENT		0x10
 #define IRQ_PULSE_ALLOCATED		0x100
 
 /* Bits in IRQInfo1 field */
diff --git a/sound/pcmcia/pdaudiocf/pdaudiocf.c b/sound/pcmcia/pdaudiocf/pdaudiocf.c
index 447aaaee3be6..7717e01fc071 100644
--- a/sound/pcmcia/pdaudiocf/pdaudiocf.c
+++ b/sound/pcmcia/pdaudiocf/pdaudiocf.c
@@ -142,12 +142,10 @@ static int snd_pdacf_probe(struct pcmcia_device *link)
 	link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
 	link->io.NumPorts1 = 16;
 
-	link->irq.Attributes = IRQ_TYPE_EXCLUSIVE | IRQ_HANDLE_PRESENT | IRQ_FORCED_PULSE;
+	link->irq.Attributes = IRQ_TYPE_EXCLUSIVE | IRQ_FORCED_PULSE;
 	// link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED;
 
-	link->irq.IRQInfo1 = 0 /* | IRQ_LEVEL_ID */;
 	link->irq.Handler = pdacf_interrupt;
-	link->irq.Instance = pdacf;
 	link->conf.Attributes = CONF_ENABLE_IRQ;
 	link->conf.IntType = INT_MEMORY_AND_IO;
 	link->conf.ConfigIndex = 1;
diff --git a/sound/pcmcia/vx/vxpocket.c b/sound/pcmcia/vx/vxpocket.c
index 5a5db48a91a9..7be3b3357045 100644
--- a/sound/pcmcia/vx/vxpocket.c
+++ b/sound/pcmcia/vx/vxpocket.c
@@ -161,11 +161,9 @@ static int snd_vxpocket_new(struct snd_card *card, int ibl,
 	link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
 	link->io.NumPorts1 = 16;
 
-	link->irq.Attributes = IRQ_TYPE_EXCLUSIVE | IRQ_HANDLE_PRESENT;
+	link->irq.Attributes = IRQ_TYPE_EXCLUSIVE;
 
-	link->irq.IRQInfo1 = IRQ_LEVEL_ID;
 	link->irq.Handler = &snd_vx_irq_handler;
-	link->irq.Instance = chip;
 
 	link->conf.Attributes = CONF_ENABLE_IRQ;
 	link->conf.IntType = INT_MEMORY_AND_IO;
-- 
cgit v1.2.3-70-g09d2


From 7716977b6ae5a0cdd0afab5c6035c4d0ce53f599 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 30 Nov 2009 13:24:18 +0000
Subject: mfd: Correct WM831X_MAX_ISEL_VALUE

There was confusion between the array size and the highest ISEL
value possible.

Reported-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm831x-core.c            | 2 +-
 include/linux/mfd/wm831x/regulator.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c
index 49b7885c2702..7f27576ca046 100644
--- a/drivers/mfd/wm831x-core.c
+++ b/drivers/mfd/wm831x-core.c
@@ -29,7 +29,7 @@
 /* Current settings - values are 2*2^(reg_val/4) microamps.  These are
  * exported since they are used by multiple drivers.
  */
-int wm831x_isinkv_values[WM831X_ISINK_MAX_ISEL] = {
+int wm831x_isinkv_values[WM831X_ISINK_MAX_ISEL + 1] = {
 	2,
 	2,
 	3,
diff --git a/include/linux/mfd/wm831x/regulator.h b/include/linux/mfd/wm831x/regulator.h
index f95466343fb2..955d30fc6a27 100644
--- a/include/linux/mfd/wm831x/regulator.h
+++ b/include/linux/mfd/wm831x/regulator.h
@@ -1212,7 +1212,7 @@
 #define WM831X_LDO1_OK_SHIFT                         0  /* LDO1_OK */
 #define WM831X_LDO1_OK_WIDTH                         1  /* LDO1_OK */
 
-#define WM831X_ISINK_MAX_ISEL 56
-extern int wm831x_isinkv_values[WM831X_ISINK_MAX_ISEL];
+#define WM831X_ISINK_MAX_ISEL 55
+extern int wm831x_isinkv_values[WM831X_ISINK_MAX_ISEL + 1];
 
 #endif
-- 
cgit v1.2.3-70-g09d2


From f13a48bd798a159291ca583b95453171b88b7448 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 1 Dec 2009 15:36:11 +0000
Subject: SLOW_WORK: Move slow_work's proc file to debugfs

Move slow_work's debugging proc file to debugfs.

Signed-off-by: David Howells <dhowells@redhat.com>
Requested-and-acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/slow-work.txt |   4 +-
 include/linux/slow-work.h   |   8 +-
 init/Kconfig                |   8 +-
 kernel/Makefile             |   2 +-
 kernel/slow-work-debugfs.c  | 227 ++++++++++++++++++++++++++++++++++++++++++++
 kernel/slow-work-proc.c     | 227 --------------------------------------------
 kernel/slow-work.c          |  18 ++--
 kernel/slow-work.h          |   6 +-
 8 files changed, 253 insertions(+), 247 deletions(-)
 create mode 100644 kernel/slow-work-debugfs.c
 delete mode 100644 kernel/slow-work-proc.c

(limited to 'include')

diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt
index 52bc31433723..9dbf4470c7e1 100644
--- a/Documentation/slow-work.txt
+++ b/Documentation/slow-work.txt
@@ -279,9 +279,9 @@ The slow-work thread pool has a number of configurables:
 VIEWING EXECUTING AND QUEUED ITEMS
 ==================================
 
-If CONFIG_SLOW_WORK_PROC is enabled, a proc file is made available:
+If CONFIG_SLOW_WORK_DEBUG is enabled, a debugfs file is made available:
 
-	/proc/slow_work_rq
+	/sys/kernel/debug/slow_work/runqueue
 
 through which the list of work items being executed and the queues of items to
 be executed may be viewed.  The owner of a work item is given the chance to
diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h
index 5035a2691739..13337bf6c3f5 100644
--- a/include/linux/slow-work.h
+++ b/include/linux/slow-work.h
@@ -20,7 +20,7 @@
 #include <linux/timer.h>
 
 struct slow_work;
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 struct seq_file;
 #endif
 
@@ -42,8 +42,8 @@ struct slow_work_ops {
 	/* execute a work item */
 	void (*execute)(struct slow_work *work);
 
-#ifdef CONFIG_SLOW_WORK_PROC
-	/* describe a work item for /proc */
+#ifdef CONFIG_SLOW_WORK_DEBUG
+	/* describe a work item for debugfs */
 	void (*desc)(struct slow_work *work, struct seq_file *m);
 #endif
 };
@@ -64,7 +64,7 @@ struct slow_work {
 #define SLOW_WORK_DELAYED	5	/* item is struct delayed_slow_work with active timer */
 	const struct slow_work_ops *ops; /* operations table for this item */
 	struct list_head	link;	/* link in queue */
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 	struct timespec		mark;	/* jiffies at which queued or exec begun */
 #endif
 };
diff --git a/init/Kconfig b/init/Kconfig
index ab5c64801fe5..39923ccc287b 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1098,12 +1098,12 @@ config SLOW_WORK
 
 	  See Documentation/slow-work.txt.
 
-config SLOW_WORK_PROC
-	bool "Slow work debugging through /proc"
+config SLOW_WORK_DEBUG
+	bool "Slow work debugging through debugfs"
 	default n
-	depends on SLOW_WORK && PROC_FS
+	depends on SLOW_WORK && DEBUG_FS
 	help
-	  Display the contents of the slow work run queue through /proc,
+	  Display the contents of the slow work run queue through debugfs,
 	  including items currently executing.
 
 	  See Documentation/slow-work.txt.
diff --git a/kernel/Makefile b/kernel/Makefile
index 776ffed1556d..d7c13d249b2d 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -94,7 +94,7 @@ obj-$(CONFIG_X86_DS) += trace/
 obj-$(CONFIG_RING_BUFFER) += trace/
 obj-$(CONFIG_SMP) += sched_cpupri.o
 obj-$(CONFIG_SLOW_WORK) += slow-work.o
-obj-$(CONFIG_SLOW_WORK_PROC) += slow-work-proc.o
+obj-$(CONFIG_SLOW_WORK_DEBUG) += slow-work-debugfs.o
 obj-$(CONFIG_PERF_EVENTS) += perf_event.o
 
 ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
diff --git a/kernel/slow-work-debugfs.c b/kernel/slow-work-debugfs.c
new file mode 100644
index 000000000000..e45c43645298
--- /dev/null
+++ b/kernel/slow-work-debugfs.c
@@ -0,0 +1,227 @@
+/* Slow work debugging
+ *
+ * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/slow-work.h>
+#include <linux/fs.h>
+#include <linux/time.h>
+#include <linux/seq_file.h>
+#include "slow-work.h"
+
+#define ITERATOR_SHIFT		(BITS_PER_LONG - 4)
+#define ITERATOR_SELECTOR	(0xfUL << ITERATOR_SHIFT)
+#define ITERATOR_COUNTER	(~ITERATOR_SELECTOR)
+
+void slow_work_new_thread_desc(struct slow_work *work, struct seq_file *m)
+{
+	seq_puts(m, "Slow-work: New thread");
+}
+
+/*
+ * Render the time mark field on a work item into a 5-char time with units plus
+ * a space
+ */
+static void slow_work_print_mark(struct seq_file *m, struct slow_work *work)
+{
+	struct timespec now, diff;
+
+	now = CURRENT_TIME;
+	diff = timespec_sub(now, work->mark);
+
+	if (diff.tv_sec < 0)
+		seq_puts(m, "  -ve ");
+	else if (diff.tv_sec == 0 && diff.tv_nsec < 1000)
+		seq_printf(m, "%3luns ", diff.tv_nsec);
+	else if (diff.tv_sec == 0 && diff.tv_nsec < 1000000)
+		seq_printf(m, "%3luus ", diff.tv_nsec / 1000);
+	else if (diff.tv_sec == 0 && diff.tv_nsec < 1000000000)
+		seq_printf(m, "%3lums ", diff.tv_nsec / 1000000);
+	else if (diff.tv_sec <= 1)
+		seq_puts(m, "   1s ");
+	else if (diff.tv_sec < 60)
+		seq_printf(m, "%4lus ", diff.tv_sec);
+	else if (diff.tv_sec < 60 * 60)
+		seq_printf(m, "%4lum ", diff.tv_sec / 60);
+	else if (diff.tv_sec < 60 * 60 * 24)
+		seq_printf(m, "%4luh ", diff.tv_sec / 3600);
+	else
+		seq_puts(m, "exces ");
+}
+
+/*
+ * Describe a slow work item for debugfs
+ */
+static int slow_work_runqueue_show(struct seq_file *m, void *v)
+{
+	struct slow_work *work;
+	struct list_head *p = v;
+	unsigned long id;
+
+	switch ((unsigned long) v) {
+	case 1:
+		seq_puts(m, "THR PID   ITEM ADDR        FL MARK  DESC\n");
+		return 0;
+	case 2:
+		seq_puts(m, "=== ===== ================ == ===== ==========\n");
+		return 0;
+
+	case 3 ... 3 + SLOW_WORK_THREAD_LIMIT - 1:
+		id = (unsigned long) v - 3;
+
+		read_lock(&slow_work_execs_lock);
+		work = slow_work_execs[id];
+		if (work) {
+			smp_read_barrier_depends();
+
+			seq_printf(m, "%3lu %5d %16p %2lx ",
+				   id, slow_work_pids[id], work, work->flags);
+			slow_work_print_mark(m, work);
+
+			if (work->ops->desc)
+				work->ops->desc(work, m);
+			seq_putc(m, '\n');
+		}
+		read_unlock(&slow_work_execs_lock);
+		return 0;
+
+	default:
+		work = list_entry(p, struct slow_work, link);
+		seq_printf(m, "%3s     - %16p %2lx ",
+			   work->flags & SLOW_WORK_VERY_SLOW ? "vsq" : "sq",
+			   work, work->flags);
+		slow_work_print_mark(m, work);
+
+		if (work->ops->desc)
+			work->ops->desc(work, m);
+		seq_putc(m, '\n');
+		return 0;
+	}
+}
+
+/*
+ * map the iterator to a work item
+ */
+static void *slow_work_runqueue_index(struct seq_file *m, loff_t *_pos)
+{
+	struct list_head *p;
+	unsigned long count, id;
+
+	switch (*_pos >> ITERATOR_SHIFT) {
+	case 0x0:
+		if (*_pos == 0)
+			*_pos = 1;
+		if (*_pos < 3)
+			return (void *)(unsigned long) *_pos;
+		if (*_pos < 3 + SLOW_WORK_THREAD_LIMIT)
+			for (id = *_pos - 3;
+			     id < SLOW_WORK_THREAD_LIMIT;
+			     id++, (*_pos)++)
+				if (slow_work_execs[id])
+					return (void *)(unsigned long) *_pos;
+		*_pos = 0x1UL << ITERATOR_SHIFT;
+
+	case 0x1:
+		count = *_pos & ITERATOR_COUNTER;
+		list_for_each(p, &slow_work_queue) {
+			if (count == 0)
+				return p;
+			count--;
+		}
+		*_pos = 0x2UL << ITERATOR_SHIFT;
+
+	case 0x2:
+		count = *_pos & ITERATOR_COUNTER;
+		list_for_each(p, &vslow_work_queue) {
+			if (count == 0)
+				return p;
+			count--;
+		}
+		*_pos = 0x3UL << ITERATOR_SHIFT;
+
+	default:
+		return NULL;
+	}
+}
+
+/*
+ * set up the iterator to start reading from the first line
+ */
+static void *slow_work_runqueue_start(struct seq_file *m, loff_t *_pos)
+{
+	spin_lock_irq(&slow_work_queue_lock);
+	return slow_work_runqueue_index(m, _pos);
+}
+
+/*
+ * move to the next line
+ */
+static void *slow_work_runqueue_next(struct seq_file *m, void *v, loff_t *_pos)
+{
+	struct list_head *p = v;
+	unsigned long selector = *_pos >> ITERATOR_SHIFT;
+
+	(*_pos)++;
+	switch (selector) {
+	case 0x0:
+		return slow_work_runqueue_index(m, _pos);
+
+	case 0x1:
+		if (*_pos >> ITERATOR_SHIFT == 0x1) {
+			p = p->next;
+			if (p != &slow_work_queue)
+				return p;
+		}
+		*_pos = 0x2UL << ITERATOR_SHIFT;
+		p = &vslow_work_queue;
+
+	case 0x2:
+		if (*_pos >> ITERATOR_SHIFT == 0x2) {
+			p = p->next;
+			if (p != &vslow_work_queue)
+				return p;
+		}
+		*_pos = 0x3UL << ITERATOR_SHIFT;
+
+	default:
+		return NULL;
+	}
+}
+
+/*
+ * clean up after reading
+ */
+static void slow_work_runqueue_stop(struct seq_file *m, void *v)
+{
+	spin_unlock_irq(&slow_work_queue_lock);
+}
+
+static const struct seq_operations slow_work_runqueue_ops = {
+	.start		= slow_work_runqueue_start,
+	.stop		= slow_work_runqueue_stop,
+	.next		= slow_work_runqueue_next,
+	.show		= slow_work_runqueue_show,
+};
+
+/*
+ * open "/sys/kernel/debug/slow_work/runqueue" to list queue contents
+ */
+static int slow_work_runqueue_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &slow_work_runqueue_ops);
+}
+
+const struct file_operations slow_work_runqueue_fops = {
+	.owner		= THIS_MODULE,
+	.open		= slow_work_runqueue_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
diff --git a/kernel/slow-work-proc.c b/kernel/slow-work-proc.c
deleted file mode 100644
index 3988032571f5..000000000000
--- a/kernel/slow-work-proc.c
+++ /dev/null
@@ -1,227 +0,0 @@
-/* Slow work debugging
- *
- * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/slow-work.h>
-#include <linux/fs.h>
-#include <linux/time.h>
-#include <linux/seq_file.h>
-#include "slow-work.h"
-
-#define ITERATOR_SHIFT		(BITS_PER_LONG - 4)
-#define ITERATOR_SELECTOR	(0xfUL << ITERATOR_SHIFT)
-#define ITERATOR_COUNTER	(~ITERATOR_SELECTOR)
-
-void slow_work_new_thread_desc(struct slow_work *work, struct seq_file *m)
-{
-	seq_puts(m, "Slow-work: New thread");
-}
-
-/*
- * Render the time mark field on a work item into a 5-char time with units plus
- * a space
- */
-static void slow_work_print_mark(struct seq_file *m, struct slow_work *work)
-{
-	struct timespec now, diff;
-
-	now = CURRENT_TIME;
-	diff = timespec_sub(now, work->mark);
-
-	if (diff.tv_sec < 0)
-		seq_puts(m, "  -ve ");
-	else if (diff.tv_sec == 0 && diff.tv_nsec < 1000)
-		seq_printf(m, "%3luns ", diff.tv_nsec);
-	else if (diff.tv_sec == 0 && diff.tv_nsec < 1000000)
-		seq_printf(m, "%3luus ", diff.tv_nsec / 1000);
-	else if (diff.tv_sec == 0 && diff.tv_nsec < 1000000000)
-		seq_printf(m, "%3lums ", diff.tv_nsec / 1000000);
-	else if (diff.tv_sec <= 1)
-		seq_puts(m, "   1s ");
-	else if (diff.tv_sec < 60)
-		seq_printf(m, "%4lus ", diff.tv_sec);
-	else if (diff.tv_sec < 60 * 60)
-		seq_printf(m, "%4lum ", diff.tv_sec / 60);
-	else if (diff.tv_sec < 60 * 60 * 24)
-		seq_printf(m, "%4luh ", diff.tv_sec / 3600);
-	else
-		seq_puts(m, "exces ");
-}
-
-/*
- * Describe a slow work item for /proc
- */
-static int slow_work_runqueue_show(struct seq_file *m, void *v)
-{
-	struct slow_work *work;
-	struct list_head *p = v;
-	unsigned long id;
-
-	switch ((unsigned long) v) {
-	case 1:
-		seq_puts(m, "THR PID   ITEM ADDR        FL MARK  DESC\n");
-		return 0;
-	case 2:
-		seq_puts(m, "=== ===== ================ == ===== ==========\n");
-		return 0;
-
-	case 3 ... 3 + SLOW_WORK_THREAD_LIMIT - 1:
-		id = (unsigned long) v - 3;
-
-		read_lock(&slow_work_execs_lock);
-		work = slow_work_execs[id];
-		if (work) {
-			smp_read_barrier_depends();
-
-			seq_printf(m, "%3lu %5d %16p %2lx ",
-				   id, slow_work_pids[id], work, work->flags);
-			slow_work_print_mark(m, work);
-
-			if (work->ops->desc)
-				work->ops->desc(work, m);
-			seq_putc(m, '\n');
-		}
-		read_unlock(&slow_work_execs_lock);
-		return 0;
-
-	default:
-		work = list_entry(p, struct slow_work, link);
-		seq_printf(m, "%3s     - %16p %2lx ",
-			   work->flags & SLOW_WORK_VERY_SLOW ? "vsq" : "sq",
-			   work, work->flags);
-		slow_work_print_mark(m, work);
-
-		if (work->ops->desc)
-			work->ops->desc(work, m);
-		seq_putc(m, '\n');
-		return 0;
-	}
-}
-
-/*
- * map the iterator to a work item
- */
-static void *slow_work_runqueue_index(struct seq_file *m, loff_t *_pos)
-{
-	struct list_head *p;
-	unsigned long count, id;
-
-	switch (*_pos >> ITERATOR_SHIFT) {
-	case 0x0:
-		if (*_pos == 0)
-			*_pos = 1;
-		if (*_pos < 3)
-			return (void *)(unsigned long) *_pos;
-		if (*_pos < 3 + SLOW_WORK_THREAD_LIMIT)
-			for (id = *_pos - 3;
-			     id < SLOW_WORK_THREAD_LIMIT;
-			     id++, (*_pos)++)
-				if (slow_work_execs[id])
-					return (void *)(unsigned long) *_pos;
-		*_pos = 0x1UL << ITERATOR_SHIFT;
-
-	case 0x1:
-		count = *_pos & ITERATOR_COUNTER;
-		list_for_each(p, &slow_work_queue) {
-			if (count == 0)
-				return p;
-			count--;
-		}
-		*_pos = 0x2UL << ITERATOR_SHIFT;
-
-	case 0x2:
-		count = *_pos & ITERATOR_COUNTER;
-		list_for_each(p, &vslow_work_queue) {
-			if (count == 0)
-				return p;
-			count--;
-		}
-		*_pos = 0x3UL << ITERATOR_SHIFT;
-
-	default:
-		return NULL;
-	}
-}
-
-/*
- * set up the iterator to start reading from the first line
- */
-static void *slow_work_runqueue_start(struct seq_file *m, loff_t *_pos)
-{
-	spin_lock_irq(&slow_work_queue_lock);
-	return slow_work_runqueue_index(m, _pos);
-}
-
-/*
- * move to the next line
- */
-static void *slow_work_runqueue_next(struct seq_file *m, void *v, loff_t *_pos)
-{
-	struct list_head *p = v;
-	unsigned long selector = *_pos >> ITERATOR_SHIFT;
-
-	(*_pos)++;
-	switch (selector) {
-	case 0x0:
-		return slow_work_runqueue_index(m, _pos);
-
-	case 0x1:
-		if (*_pos >> ITERATOR_SHIFT == 0x1) {
-			p = p->next;
-			if (p != &slow_work_queue)
-				return p;
-		}
-		*_pos = 0x2UL << ITERATOR_SHIFT;
-		p = &vslow_work_queue;
-
-	case 0x2:
-		if (*_pos >> ITERATOR_SHIFT == 0x2) {
-			p = p->next;
-			if (p != &vslow_work_queue)
-				return p;
-		}
-		*_pos = 0x3UL << ITERATOR_SHIFT;
-
-	default:
-		return NULL;
-	}
-}
-
-/*
- * clean up after reading
- */
-static void slow_work_runqueue_stop(struct seq_file *m, void *v)
-{
-	spin_unlock_irq(&slow_work_queue_lock);
-}
-
-static const struct seq_operations slow_work_runqueue_ops = {
-	.start		= slow_work_runqueue_start,
-	.stop		= slow_work_runqueue_stop,
-	.next		= slow_work_runqueue_next,
-	.show		= slow_work_runqueue_show,
-};
-
-/*
- * open "/proc/slow_work_rq" to list queue contents
- */
-static int slow_work_runqueue_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &slow_work_runqueue_ops);
-}
-
-const struct file_operations slow_work_runqueue_fops = {
-	.owner		= THIS_MODULE,
-	.open		= slow_work_runqueue_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
diff --git a/kernel/slow-work.c b/kernel/slow-work.c
index b5c17f15f9de..00889bd3c590 100644
--- a/kernel/slow-work.c
+++ b/kernel/slow-work.c
@@ -16,7 +16,7 @@
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 #include <linux/wait.h>
-#include <linux/proc_fs.h>
+#include <linux/debugfs.h>
 #include "slow-work.h"
 
 static void slow_work_cull_timeout(unsigned long);
@@ -138,7 +138,7 @@ static void slow_work_clear_thread_processing(int id) {}
 /*
  * Data for tracking currently executing items for indication through /proc
  */
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 struct slow_work *slow_work_execs[SLOW_WORK_THREAD_LIMIT];
 pid_t slow_work_pids[SLOW_WORK_THREAD_LIMIT];
 DEFINE_RWLOCK(slow_work_execs_lock);
@@ -823,7 +823,7 @@ static void slow_work_new_thread_execute(struct slow_work *work)
 static const struct slow_work_ops slow_work_new_thread_ops = {
 	.owner		= THIS_MODULE,
 	.execute	= slow_work_new_thread_execute,
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 	.desc		= slow_work_new_thread_desc,
 #endif
 };
@@ -1055,9 +1055,15 @@ static int __init init_slow_work(void)
 	if (slow_work_max_max_threads < nr_cpus * 2)
 		slow_work_max_max_threads = nr_cpus * 2;
 #endif
-#ifdef CONFIG_SLOW_WORK_PROC
-	proc_create("slow_work_rq", S_IFREG | 0400, NULL,
-		    &slow_work_runqueue_fops);
+#ifdef CONFIG_SLOW_WORK_DEBUG
+	{
+		struct dentry *dbdir;
+
+		dbdir = debugfs_create_dir("slow_work", NULL);
+		if (dbdir && !IS_ERR(dbdir))
+			debugfs_create_file("runqueue", S_IFREG | 0400, dbdir,
+					    NULL, &slow_work_runqueue_fops);
+	}
 #endif
 	return 0;
 }
diff --git a/kernel/slow-work.h b/kernel/slow-work.h
index 3c2f007f3ad6..321f3c59d732 100644
--- a/kernel/slow-work.h
+++ b/kernel/slow-work.h
@@ -19,7 +19,7 @@
 /*
  * slow-work.c
  */
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 extern struct slow_work *slow_work_execs[];
 extern pid_t slow_work_pids[];
 extern rwlock_t slow_work_execs_lock;
@@ -30,9 +30,9 @@ extern struct list_head vslow_work_queue;
 extern spinlock_t slow_work_queue_lock;
 
 /*
- * slow-work-proc.c
+ * slow-work-debugfs.c
  */
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 extern const struct file_operations slow_work_runqueue_fops;
 
 extern void slow_work_new_thread_desc(struct slow_work *, struct seq_file *);
-- 
cgit v1.2.3-70-g09d2


From bf56a4ea9f1683c5b223fd3a5dbea23f1fa91c34 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 1 Dec 2009 16:23:20 +0800
Subject: trace_syscalls: Remove unused event_syscall_enter and
 event_syscall_exit

fix event_enter_##sname->event
fix event_exit_##sname->event

remove unused event_syscall_enter and event_syscall_exit

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: Jason Baron <jbaron@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4B14D278.4090209@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/syscalls.h      | 4 ++--
 include/trace/syscall.h       | 2 --
 kernel/trace/trace_syscalls.c | 8 --------
 3 files changed, 2 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index b50974a93af0..2f7c539ab96d 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -178,7 +178,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused)     \
 	  event_enter_##sname = {					\
 		.name                   = "sys_enter"#sname,		\
 		.system                 = "syscalls",			\
-		.event                  = &event_syscall_enter,		\
+		.event                  = &enter_syscall_print_##sname,	\
 		.raw_init		= init_enter_##sname,		\
 		.show_format		= syscall_enter_format,		\
 		.define_fields		= syscall_enter_define_fields,	\
@@ -214,7 +214,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused)     \
 	  event_exit_##sname = {					\
 		.name                   = "sys_exit"#sname,		\
 		.system                 = "syscalls",			\
-		.event                  = &event_syscall_exit,		\
+		.event                  = &exit_syscall_print_##sname,	\
 		.raw_init		= init_exit_##sname,		\
 		.show_format		= syscall_exit_format,		\
 		.define_fields		= syscall_exit_define_fields,	\
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 51ee17d3632a..5f8827c92db7 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -37,8 +37,6 @@ extern unsigned long arch_syscall_addr(int nr);
 extern int syscall_name_to_nr(char *name);
 void set_syscall_enter_id(int num, int id);
 void set_syscall_exit_id(int num, int id);
-extern struct trace_event event_syscall_enter;
-extern struct trace_event event_syscall_exit;
 
 extern int syscall_enter_format(struct ftrace_event_call *call,
 				struct trace_seq *s);
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 63aa8070365d..00d6e176f5b6 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -444,14 +444,6 @@ void unreg_event_syscall_exit(struct ftrace_event_call *call)
 	mutex_unlock(&syscall_trace_lock);
 }
 
-struct trace_event event_syscall_enter = {
-	.trace			= print_syscall_enter,
-};
-
-struct trace_event event_syscall_exit = {
-	.trace			= print_syscall_exit,
-};
-
 int __init init_ftrace_syscalls(void)
 {
 	struct syscall_metadata *meta;
-- 
cgit v1.2.3-70-g09d2


From 31c16b13349970b2684248c7d8608d2a96ae135d Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 1 Dec 2009 16:23:30 +0800
Subject: trace_syscalls: Set event_enter_##sname->data to its metadata

Set event_enter_##sname->data to its metadata,
it makes codes simpler.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: Jason Baron <jbaron@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4B14D282.7050709@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/syscalls.h      |  6 ++++--
 include/trace/syscall.h       |  2 +-
 kernel/trace/trace_syscalls.c | 36 +++++++++++-------------------------
 3 files changed, 16 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 2f7c539ab96d..d3c9fd01a110 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -153,6 +153,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused)     \
 #define __SC_STR_TDECL6(t, a, ...)	#t, __SC_STR_TDECL5(__VA_ARGS__)
 
 #define SYSCALL_TRACE_ENTER_EVENT(sname)				\
+	static const struct syscall_metadata __syscall_meta_##sname;	\
 	static struct ftrace_event_call event_enter_##sname;		\
 	struct trace_event enter_syscall_print_##sname = {		\
 		.trace                  = print_syscall_enter,		\
@@ -184,11 +185,12 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused)     \
 		.define_fields		= syscall_enter_define_fields,	\
 		.regfunc		= reg_event_syscall_enter,	\
 		.unregfunc		= unreg_event_syscall_enter,	\
-		.data			= "sys"#sname,			\
+		.data			= (void *)&__syscall_meta_##sname,\
 		TRACE_SYS_ENTER_PROFILE_INIT(sname)			\
 	}
 
 #define SYSCALL_TRACE_EXIT_EVENT(sname)					\
+	static const struct syscall_metadata __syscall_meta_##sname;	\
 	static struct ftrace_event_call event_exit_##sname;		\
 	struct trace_event exit_syscall_print_##sname = {		\
 		.trace                  = print_syscall_exit,		\
@@ -220,7 +222,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused)     \
 		.define_fields		= syscall_exit_define_fields,	\
 		.regfunc		= reg_event_syscall_exit,	\
 		.unregfunc		= unreg_event_syscall_exit,	\
-		.data			= "sys"#sname,			\
+		.data			= (void *)&__syscall_meta_##sname,\
 		TRACE_SYS_EXIT_PROFILE_INIT(sname)			\
 	}
 
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 5f8827c92db7..c5265c81c4e7 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -34,7 +34,7 @@ struct syscall_metadata {
 
 #ifdef CONFIG_FTRACE_SYSCALLS
 extern unsigned long arch_syscall_addr(int nr);
-extern int syscall_name_to_nr(char *name);
+extern int syscall_name_to_nr(const char *name);
 void set_syscall_enter_id(int num, int id);
 void set_syscall_exit_id(int num, int id);
 
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 00d6e176f5b6..39649b1675dd 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -51,7 +51,7 @@ static struct syscall_metadata *syscall_nr_to_meta(int nr)
 	return syscalls_metadata[nr];
 }
 
-int syscall_name_to_nr(char *name)
+int syscall_name_to_nr(const char *name)
 {
 	int i;
 
@@ -172,18 +172,11 @@ extern char *__bad_type_size(void);
 int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
 {
 	int i;
-	int nr;
 	int ret;
-	struct syscall_metadata *entry;
+	struct syscall_metadata *entry = call->data;
 	struct syscall_trace_enter trace;
 	int offset = offsetof(struct syscall_trace_enter, args);
 
-	nr = syscall_name_to_nr(call->data);
-	entry = syscall_nr_to_meta(nr);
-
-	if (!entry)
-		return 0;
-
 	ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
 			       "\tsigned:%u;\n",
 			       SYSCALL_FIELD(int, nr));
@@ -245,18 +238,11 @@ int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
 int syscall_enter_define_fields(struct ftrace_event_call *call)
 {
 	struct syscall_trace_enter trace;
-	struct syscall_metadata *meta;
+	struct syscall_metadata *meta = call->data;
 	int ret;
-	int nr;
 	int i;
 	int offset = offsetof(typeof(trace), args);
 
-	nr = syscall_name_to_nr(call->data);
-	meta = syscall_nr_to_meta(nr);
-
-	if (!meta)
-		return 0;
-
 	ret = trace_define_common_fields(call);
 	if (ret)
 		return ret;
@@ -366,9 +352,9 @@ int reg_event_syscall_enter(struct ftrace_event_call *call)
 {
 	int ret = 0;
 	int num;
-	char *name;
+	const char *name;
 
-	name = (char *)call->data;
+	name = ((struct syscall_metadata *)call->data)->name;
 	num = syscall_name_to_nr(name);
 	if (num < 0 || num >= NR_syscalls)
 		return -ENOSYS;
@@ -389,9 +375,9 @@ int reg_event_syscall_enter(struct ftrace_event_call *call)
 void unreg_event_syscall_enter(struct ftrace_event_call *call)
 {
 	int num;
-	char *name;
+	const char *name;
 
-	name = (char *)call->data;
+	name = ((struct syscall_metadata *)call->data)->name;
 	num = syscall_name_to_nr(name);
 	if (num < 0 || num >= NR_syscalls)
 		return;
@@ -407,9 +393,9 @@ int reg_event_syscall_exit(struct ftrace_event_call *call)
 {
 	int ret = 0;
 	int num;
-	char *name;
+	const char *name;
 
-	name = call->data;
+	name = ((struct syscall_metadata *)call->data)->name;
 	num = syscall_name_to_nr(name);
 	if (num < 0 || num >= NR_syscalls)
 		return -ENOSYS;
@@ -430,9 +416,9 @@ int reg_event_syscall_exit(struct ftrace_event_call *call)
 void unreg_event_syscall_exit(struct ftrace_event_call *call)
 {
 	int num;
-	char *name;
+	const char *name;
 
-	name = call->data;
+	name = ((struct syscall_metadata *)call->data)->name;
 	num = syscall_name_to_nr(name);
 	if (num < 0 || num >= NR_syscalls)
 		return;
-- 
cgit v1.2.3-70-g09d2


From fcc19438dda38dacc8c144e2db3ebc6b9fd4f8b8 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 1 Dec 2009 16:23:36 +0800
Subject: trace_syscalls: Remove enter_id exit_id

use ->enter_event->id instead of ->enter_id
use ->exit_event->id instead of ->exit_id

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: Jason Baron <jbaron@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4B14D288.7030001@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/syscalls.h      |  2 --
 include/trace/syscall.h       |  6 ------
 kernel/trace/trace_syscalls.c | 30 ++++++++++--------------------
 3 files changed, 10 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index d3c9fd01a110..b9af87560adb 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -168,7 +168,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused)     \
 		if (!id)						\
 			return -ENODEV;					\
 		event_enter_##sname.id = id;				\
-		set_syscall_enter_id(num, id);				\
 		INIT_LIST_HEAD(&event_enter_##sname.fields);		\
 		return 0;						\
 	}								\
@@ -205,7 +204,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused)     \
 		if (!id)						\
 			return -ENODEV;					\
 		event_exit_##sname.id = id;				\
-		set_syscall_exit_id(num, id);				\
 		INIT_LIST_HEAD(&event_exit_##sname.fields);		\
 		return 0;						\
 	}								\
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index c5265c81c4e7..ca09561cd578 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -15,8 +15,6 @@
  * @nb_args: number of parameters it takes
  * @types: list of types as strings
  * @args: list of args as strings (args[i] matches types[i])
- * @enter_id: associated ftrace enter event id
- * @exit_id: associated ftrace exit event id
  * @enter_event: associated syscall_enter trace event
  * @exit_event: associated syscall_exit trace event
  */
@@ -25,8 +23,6 @@ struct syscall_metadata {
 	int		nb_args;
 	const char	**types;
 	const char	**args;
-	int		enter_id;
-	int		exit_id;
 
 	struct ftrace_event_call *enter_event;
 	struct ftrace_event_call *exit_event;
@@ -35,8 +31,6 @@ struct syscall_metadata {
 #ifdef CONFIG_FTRACE_SYSCALLS
 extern unsigned long arch_syscall_addr(int nr);
 extern int syscall_name_to_nr(const char *name);
-void set_syscall_enter_id(int num, int id);
-void set_syscall_exit_id(int num, int id);
 
 extern int syscall_enter_format(struct ftrace_event_call *call,
 				struct trace_seq *s);
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 39649b1675dd..27eb18d69222 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -67,16 +67,6 @@ int syscall_name_to_nr(const char *name)
 	return -1;
 }
 
-void set_syscall_enter_id(int num, int id)
-{
-	syscalls_metadata[num]->enter_id = id;
-}
-
-void set_syscall_exit_id(int num, int id)
-{
-	syscalls_metadata[num]->exit_id = id;
-}
-
 enum print_line_t
 print_syscall_enter(struct trace_iterator *iter, int flags)
 {
@@ -93,7 +83,7 @@ print_syscall_enter(struct trace_iterator *iter, int flags)
 	if (!entry)
 		goto end;
 
-	if (entry->enter_id != ent->type) {
+	if (entry->enter_event->id != ent->type) {
 		WARN_ON_ONCE(1);
 		goto end;
 	}
@@ -148,7 +138,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags)
 		return TRACE_TYPE_HANDLED;
 	}
 
-	if (entry->exit_id != ent->type) {
+	if (entry->exit_event->id != ent->type) {
 		WARN_ON_ONCE(1);
 		return TRACE_TYPE_UNHANDLED;
 	}
@@ -302,8 +292,8 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id)
 
 	size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
 
-	event = trace_current_buffer_lock_reserve(&buffer, sys_data->enter_id,
-						  size, 0, 0);
+	event = trace_current_buffer_lock_reserve(&buffer,
+			sys_data->enter_event->id, size, 0, 0);
 	if (!event)
 		return;
 
@@ -334,8 +324,8 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
 	if (!sys_data)
 		return;
 
-	event = trace_current_buffer_lock_reserve(&buffer, sys_data->exit_id,
-				sizeof(*entry), 0, 0);
+	event = trace_current_buffer_lock_reserve(&buffer,
+			sys_data->exit_event->id, sizeof(*entry), 0, 0);
 	if (!event)
 		return;
 
@@ -510,11 +500,11 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
 
 	rec = (struct syscall_trace_enter *) raw_data;
 	tracing_generic_entry_update(&rec->ent, 0, 0);
-	rec->ent.type = sys_data->enter_id;
+	rec->ent.type = sys_data->enter_event->id;
 	rec->nr = syscall_nr;
 	syscall_get_arguments(current, regs, 0, sys_data->nb_args,
 			       (unsigned long *)&rec->args);
-	perf_tp_event(sys_data->enter_id, 0, 1, rec, size);
+	perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size);
 
 end:
 	perf_swevent_put_recursion_context(rctx);
@@ -615,11 +605,11 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 	rec = (struct syscall_trace_exit *)raw_data;
 
 	tracing_generic_entry_update(&rec->ent, 0, 0);
-	rec->ent.type = sys_data->exit_id;
+	rec->ent.type = sys_data->exit_event->id;
 	rec->nr = syscall_nr;
 	rec->ret = syscall_get_return_value(current, regs);
 
-	perf_tp_event(sys_data->exit_id, 0, 1, rec, size);
+	perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size);
 
 end:
 	perf_swevent_put_recursion_context(rctx);
-- 
cgit v1.2.3-70-g09d2


From c252f65793874b56d50395ab604db465ce688665 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 1 Dec 2009 16:23:47 +0800
Subject: trace_syscalls: Add syscall_nr field to struct syscall_metadata

Add syscall_nr field to struct syscall_metadata,
it helps us to get syscall number easier.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: Jason Baron <jbaron@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4B14D293.6090800@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/syscalls.h      |  4 ++--
 include/trace/syscall.h       |  3 ++-
 kernel/trace/trace_syscalls.c | 22 +++++++++-------------
 3 files changed, 13 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index b9af87560adb..3c280d7ecb76 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -161,7 +161,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused)     \
 	static int init_enter_##sname(struct ftrace_event_call *call)	\
 	{								\
 		int num, id;						\
-		num = syscall_name_to_nr("sys"#sname);			\
+		num = __syscall_meta_##sname.syscall_nr;		\
 		if (num < 0)						\
 			return -ENOSYS;					\
 		id = register_ftrace_event(&enter_syscall_print_##sname);\
@@ -197,7 +197,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused)     \
 	static int init_exit_##sname(struct ftrace_event_call *call)	\
 	{								\
 		int num, id;						\
-		num = syscall_name_to_nr("sys"#sname);			\
+		num = __syscall_meta_##sname.syscall_nr;		\
 		if (num < 0)						\
 			return -ENOSYS;					\
 		id = register_ftrace_event(&exit_syscall_print_##sname);\
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index ca09561cd578..1531eef3071f 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -12,6 +12,7 @@
  * A syscall entry in the ftrace syscalls array.
  *
  * @name: name of the syscall
+ * @syscall_nr: number of the syscall
  * @nb_args: number of parameters it takes
  * @types: list of types as strings
  * @args: list of args as strings (args[i] matches types[i])
@@ -20,6 +21,7 @@
  */
 struct syscall_metadata {
 	const char	*name;
+	int		syscall_nr;
 	int		nb_args;
 	const char	**types;
 	const char	**args;
@@ -30,7 +32,6 @@ struct syscall_metadata {
 
 #ifdef CONFIG_FTRACE_SYSCALLS
 extern unsigned long arch_syscall_addr(int nr);
-extern int syscall_name_to_nr(const char *name);
 
 extern int syscall_enter_format(struct ftrace_event_call *call,
 				struct trace_seq *s);
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 27eb18d69222..144cc14d8551 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -51,7 +51,7 @@ static struct syscall_metadata *syscall_nr_to_meta(int nr)
 	return syscalls_metadata[nr];
 }
 
-int syscall_name_to_nr(const char *name)
+static int syscall_name_to_nr(const char *name)
 {
 	int i;
 
@@ -342,10 +342,8 @@ int reg_event_syscall_enter(struct ftrace_event_call *call)
 {
 	int ret = 0;
 	int num;
-	const char *name;
 
-	name = ((struct syscall_metadata *)call->data)->name;
-	num = syscall_name_to_nr(name);
+	num = ((struct syscall_metadata *)call->data)->syscall_nr;
 	if (num < 0 || num >= NR_syscalls)
 		return -ENOSYS;
 	mutex_lock(&syscall_trace_lock);
@@ -365,10 +363,8 @@ int reg_event_syscall_enter(struct ftrace_event_call *call)
 void unreg_event_syscall_enter(struct ftrace_event_call *call)
 {
 	int num;
-	const char *name;
 
-	name = ((struct syscall_metadata *)call->data)->name;
-	num = syscall_name_to_nr(name);
+	num = ((struct syscall_metadata *)call->data)->syscall_nr;
 	if (num < 0 || num >= NR_syscalls)
 		return;
 	mutex_lock(&syscall_trace_lock);
@@ -383,10 +379,8 @@ int reg_event_syscall_exit(struct ftrace_event_call *call)
 {
 	int ret = 0;
 	int num;
-	const char *name;
 
-	name = ((struct syscall_metadata *)call->data)->name;
-	num = syscall_name_to_nr(name);
+	num = ((struct syscall_metadata *)call->data)->syscall_nr;
 	if (num < 0 || num >= NR_syscalls)
 		return -ENOSYS;
 	mutex_lock(&syscall_trace_lock);
@@ -406,10 +400,8 @@ int reg_event_syscall_exit(struct ftrace_event_call *call)
 void unreg_event_syscall_exit(struct ftrace_event_call *call)
 {
 	int num;
-	const char *name;
 
-	name = ((struct syscall_metadata *)call->data)->name;
-	num = syscall_name_to_nr(name);
+	num = ((struct syscall_metadata *)call->data)->syscall_nr;
 	if (num < 0 || num >= NR_syscalls)
 		return;
 	mutex_lock(&syscall_trace_lock);
@@ -436,6 +428,10 @@ int __init init_ftrace_syscalls(void)
 	for (i = 0; i < NR_syscalls; i++) {
 		addr = arch_syscall_addr(i);
 		meta = find_syscall_meta(addr);
+		if (!meta)
+			continue;
+
+		meta->syscall_nr = i;
 		syscalls_metadata[i] = meta;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From a1301da0997bf73c44dbe584e9070a13adc89672 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 1 Dec 2009 16:23:55 +0800
Subject: trace_syscalls: Remove duplicate init_enter_##sname()

use only one init_syscall_trace instead of
many init_enter_##sname()/init_exit_##sname()

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: Jason Baron <jbaron@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4B14D29B.6090708@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/syscalls.h      | 30 ++----------------------------
 include/trace/syscall.h       |  1 +
 kernel/trace/trace_syscalls.c | 12 ++++++++++++
 3 files changed, 15 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 3c280d7ecb76..cf0d923ea40e 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -158,19 +158,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused)     \
 	struct trace_event enter_syscall_print_##sname = {		\
 		.trace                  = print_syscall_enter,		\
 	};								\
-	static int init_enter_##sname(struct ftrace_event_call *call)	\
-	{								\
-		int num, id;						\
-		num = __syscall_meta_##sname.syscall_nr;		\
-		if (num < 0)						\
-			return -ENOSYS;					\
-		id = register_ftrace_event(&enter_syscall_print_##sname);\
-		if (!id)						\
-			return -ENODEV;					\
-		event_enter_##sname.id = id;				\
-		INIT_LIST_HEAD(&event_enter_##sname.fields);		\
-		return 0;						\
-	}								\
 	TRACE_SYS_ENTER_PROFILE(sname);					\
 	static struct ftrace_event_call __used				\
 	  __attribute__((__aligned__(4)))				\
@@ -179,7 +166,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused)     \
 		.name                   = "sys_enter"#sname,		\
 		.system                 = "syscalls",			\
 		.event                  = &enter_syscall_print_##sname,	\
-		.raw_init		= init_enter_##sname,		\
+		.raw_init		= init_syscall_trace,		\
 		.show_format		= syscall_enter_format,		\
 		.define_fields		= syscall_enter_define_fields,	\
 		.regfunc		= reg_event_syscall_enter,	\
@@ -194,19 +181,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused)     \
 	struct trace_event exit_syscall_print_##sname = {		\
 		.trace                  = print_syscall_exit,		\
 	};								\
-	static int init_exit_##sname(struct ftrace_event_call *call)	\
-	{								\
-		int num, id;						\
-		num = __syscall_meta_##sname.syscall_nr;		\
-		if (num < 0)						\
-			return -ENOSYS;					\
-		id = register_ftrace_event(&exit_syscall_print_##sname);\
-		if (!id)						\
-			return -ENODEV;					\
-		event_exit_##sname.id = id;				\
-		INIT_LIST_HEAD(&event_exit_##sname.fields);		\
-		return 0;						\
-	}								\
 	TRACE_SYS_EXIT_PROFILE(sname);					\
 	static struct ftrace_event_call __used				\
 	  __attribute__((__aligned__(4)))				\
@@ -215,7 +189,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused)     \
 		.name                   = "sys_exit"#sname,		\
 		.system                 = "syscalls",			\
 		.event                  = &exit_syscall_print_##sname,	\
-		.raw_init		= init_exit_##sname,		\
+		.raw_init		= init_syscall_trace,		\
 		.show_format		= syscall_exit_format,		\
 		.define_fields		= syscall_exit_define_fields,	\
 		.regfunc		= reg_event_syscall_exit,	\
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 1531eef3071f..dff9371e5274 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -32,6 +32,7 @@ struct syscall_metadata {
 
 #ifdef CONFIG_FTRACE_SYSCALLS
 extern unsigned long arch_syscall_addr(int nr);
+extern int init_syscall_trace(struct ftrace_event_call *call);
 
 extern int syscall_enter_format(struct ftrace_event_call *call,
 				struct trace_seq *s);
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 144cc14d8551..c6514093c95a 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -412,6 +412,18 @@ void unreg_event_syscall_exit(struct ftrace_event_call *call)
 	mutex_unlock(&syscall_trace_lock);
 }
 
+int init_syscall_trace(struct ftrace_event_call *call)
+{
+	int id;
+
+	id = register_ftrace_event(call->event);
+	if (!id)
+		return -ENODEV;
+	call->id = id;
+	INIT_LIST_HEAD(&call->fields);
+	return 0;
+}
+
 int __init init_ftrace_syscalls(void)
 {
 	struct syscall_metadata *meta;
-- 
cgit v1.2.3-70-g09d2


From 3bbe84e9d385205d638035ee9dcc4db1b486ea08 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 1 Dec 2009 16:24:01 +0800
Subject: trace_syscalls: Simplify syscall profile

use only one prof_sysenter_enable() instead of
prof_sysenter_enable_##sname()

use only one prof_sysenter_disable() instead of
prof_sysenter_disable_##sname()

use only one prof_sysexit_enable() instead of
prof_sysexit_enable_##sname()

use only one prof_sysexit_disable() instead of
prof_sysexit_disable_##sname()

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: Jason Baron <jbaron@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4B14D2A1.8060304@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/syscalls.h      | 31 ++++---------------------------
 include/trace/syscall.h       |  8 ++++----
 kernel/trace/trace_syscalls.c | 24 ++++++++----------------
 3 files changed, 16 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index cf0d923ea40e..c2df3a593236 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -99,37 +99,16 @@ struct perf_event_attr;
 #define __SC_TEST6(t6, a6, ...)	__SC_TEST(t6); __SC_TEST5(__VA_ARGS__)
 
 #ifdef CONFIG_EVENT_PROFILE
-#define TRACE_SYS_ENTER_PROFILE(sname)					       \
-static int prof_sysenter_enable_##sname(struct ftrace_event_call *unused)      \
-{									       \
-	return reg_prof_syscall_enter("sys"#sname);			       \
-}									       \
-									       \
-static void prof_sysenter_disable_##sname(struct ftrace_event_call *unused)    \
-{									       \
-	unreg_prof_syscall_enter("sys"#sname);				       \
-}
-
-#define TRACE_SYS_EXIT_PROFILE(sname)					       \
-static int prof_sysexit_enable_##sname(struct ftrace_event_call *unused)       \
-{									       \
-	return reg_prof_syscall_exit("sys"#sname);			       \
-}									       \
-									       \
-static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused)     \
-{                                                                              \
-	unreg_prof_syscall_exit("sys"#sname);				       \
-}
 
 #define TRACE_SYS_ENTER_PROFILE_INIT(sname)				       \
 	.profile_count = ATOMIC_INIT(-1),				       \
-	.profile_enable = prof_sysenter_enable_##sname,			       \
-	.profile_disable = prof_sysenter_disable_##sname,
+	.profile_enable = prof_sysenter_enable,				       \
+	.profile_disable = prof_sysenter_disable,
 
 #define TRACE_SYS_EXIT_PROFILE_INIT(sname)				       \
 	.profile_count = ATOMIC_INIT(-1),				       \
-	.profile_enable = prof_sysexit_enable_##sname,			       \
-	.profile_disable = prof_sysexit_disable_##sname,
+	.profile_enable = prof_sysexit_enable,				       \
+	.profile_disable = prof_sysexit_disable,
 #else
 #define TRACE_SYS_ENTER_PROFILE(sname)
 #define TRACE_SYS_ENTER_PROFILE_INIT(sname)
@@ -158,7 +137,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused)     \
 	struct trace_event enter_syscall_print_##sname = {		\
 		.trace                  = print_syscall_enter,		\
 	};								\
-	TRACE_SYS_ENTER_PROFILE(sname);					\
 	static struct ftrace_event_call __used				\
 	  __attribute__((__aligned__(4)))				\
 	  __attribute__((section("_ftrace_events")))			\
@@ -181,7 +159,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused)     \
 	struct trace_event exit_syscall_print_##sname = {		\
 		.trace                  = print_syscall_exit,		\
 	};								\
-	TRACE_SYS_EXIT_PROFILE(sname);					\
 	static struct ftrace_event_call __used				\
 	  __attribute__((__aligned__(4)))				\
 	  __attribute__((section("_ftrace_events")))			\
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index dff9371e5274..961fda3556bb 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -50,10 +50,10 @@ enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags);
 enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags);
 #endif
 #ifdef CONFIG_EVENT_PROFILE
-int reg_prof_syscall_enter(char *name);
-void unreg_prof_syscall_enter(char *name);
-int reg_prof_syscall_exit(char *name);
-void unreg_prof_syscall_exit(char *name);
+int prof_sysenter_enable(struct ftrace_event_call *call);
+void prof_sysenter_disable(struct ftrace_event_call *call);
+int prof_sysexit_enable(struct ftrace_event_call *call);
+void prof_sysexit_disable(struct ftrace_event_call *call);
 
 #endif
 
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index c6514093c95a..1e85b6cc26aa 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -520,14 +520,12 @@ end_recursion:
 	local_irq_restore(flags);
 }
 
-int reg_prof_syscall_enter(char *name)
+int prof_sysenter_enable(struct ftrace_event_call *call)
 {
 	int ret = 0;
 	int num;
 
-	num = syscall_name_to_nr(name);
-	if (num < 0 || num >= NR_syscalls)
-		return -ENOSYS;
+	num = ((struct syscall_metadata *)call->data)->syscall_nr;
 
 	mutex_lock(&syscall_trace_lock);
 	if (!sys_prof_refcount_enter)
@@ -543,13 +541,11 @@ int reg_prof_syscall_enter(char *name)
 	return ret;
 }
 
-void unreg_prof_syscall_enter(char *name)
+void prof_sysenter_disable(struct ftrace_event_call *call)
 {
 	int num;
 
-	num = syscall_name_to_nr(name);
-	if (num < 0 || num >= NR_syscalls)
-		return;
+	num = ((struct syscall_metadata *)call->data)->syscall_nr;
 
 	mutex_lock(&syscall_trace_lock);
 	sys_prof_refcount_enter--;
@@ -625,14 +621,12 @@ end_recursion:
 	local_irq_restore(flags);
 }
 
-int reg_prof_syscall_exit(char *name)
+int prof_sysexit_enable(struct ftrace_event_call *call)
 {
 	int ret = 0;
 	int num;
 
-	num = syscall_name_to_nr(name);
-	if (num < 0 || num >= NR_syscalls)
-		return -ENOSYS;
+	num = ((struct syscall_metadata *)call->data)->syscall_nr;
 
 	mutex_lock(&syscall_trace_lock);
 	if (!sys_prof_refcount_exit)
@@ -648,13 +642,11 @@ int reg_prof_syscall_exit(char *name)
 	return ret;
 }
 
-void unreg_prof_syscall_exit(char *name)
+void prof_sysexit_disable(struct ftrace_event_call *call)
 {
 	int num;
 
-	num = syscall_name_to_nr(name);
-	if (num < 0 || num >= NR_syscalls)
-		return;
+	num = ((struct syscall_metadata *)call->data)->syscall_nr;
 
 	mutex_lock(&syscall_trace_lock);
 	sys_prof_refcount_exit--;
-- 
cgit v1.2.3-70-g09d2


From 8592e6486a177a02f048567cb928bc3a1f9a86c3 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 2 Dec 2009 12:56:46 +0900
Subject: sched: Revert 498657a478c60be092208422fefa9c7b248729c2

498657a478c60be092208422fefa9c7b248729c2 incorrectly assumed
that preempt wasn't disabled around context_switch() and thus
was fixing imaginary problem.  It also broke KVM because it
depended on ->sched_in() to be called with irq enabled so that
it can do smp calls from there.

Revert the incorrect commit and add comment describing different
contexts under with the two callbacks are invoked.

Avi: spotted transposed in/out in the added comment.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Avi Kivity <avi@redhat.com>
Cc: peterz@infradead.org
Cc: efault@gmx.de
Cc: rusty@rustcorp.com.au
LKML-Reference: <1259726212-30259-2-git-send-email-tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/preempt.h | 5 +++++
 kernel/sched.c          | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 72b1a10a59b6..2e681d9555bd 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -105,6 +105,11 @@ struct preempt_notifier;
  * @sched_out: we've just been preempted
  *    notifier: struct preempt_notifier for the task being preempted
  *    next: the task that's kicking us out
+ *
+ * Please note that sched_in and out are called under different
+ * contexts.  sched_out is called with rq lock held and irq disabled
+ * while sched_in is called without rq lock and irq enabled.  This
+ * difference is intentional and depended upon by its users.
  */
 struct preempt_ops {
 	void (*sched_in)(struct preempt_notifier *notifier, int cpu);
diff --git a/kernel/sched.c b/kernel/sched.c
index b3d4e2be95aa..1031cae39c4c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2768,9 +2768,9 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
 	prev_state = prev->state;
 	finish_arch_switch(prev);
 	perf_event_task_sched_in(current, cpu_of(rq));
-	fire_sched_in_preempt_notifiers(current);
 	finish_lock_switch(rq, prev);
 
+	fire_sched_in_preempt_notifiers(current);
 	if (mm)
 		mmdrop(mm);
 	if (unlikely(prev_state == TASK_DEAD)) {
-- 
cgit v1.2.3-70-g09d2


From 6b62fe019e39edfd1dbe3f224ecd0a87d9365223 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 2 Dec 2009 07:23:10 +0100
Subject: tracing/syscalls: Make syscall events print callbacks static

enter_syscall_print_##sname and exit_syscall_print_##sname don't
need to have a global scope. Make them static.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
LKML-Reference: <1259734990-9034-1-git-send-regression-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/syscalls.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index c2df3a593236..e79e2f3ccc51 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -134,7 +134,7 @@ struct perf_event_attr;
 #define SYSCALL_TRACE_ENTER_EVENT(sname)				\
 	static const struct syscall_metadata __syscall_meta_##sname;	\
 	static struct ftrace_event_call event_enter_##sname;		\
-	struct trace_event enter_syscall_print_##sname = {		\
+	static struct trace_event enter_syscall_print_##sname = {	\
 		.trace                  = print_syscall_enter,		\
 	};								\
 	static struct ftrace_event_call __used				\
@@ -156,7 +156,7 @@ struct perf_event_attr;
 #define SYSCALL_TRACE_EXIT_EVENT(sname)					\
 	static const struct syscall_metadata __syscall_meta_##sname;	\
 	static struct ftrace_event_call event_exit_##sname;		\
-	struct trace_event exit_syscall_print_##sname = {		\
+	static struct trace_event exit_syscall_print_##sname = {	\
 		.trace                  = print_syscall_exit,		\
 	};								\
 	static struct ftrace_event_call __used				\
-- 
cgit v1.2.3-70-g09d2


From fa1452e808732ae10e8b1267fd75fc2d028d634b Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Mon, 30 Nov 2009 14:59:44 +0900
Subject: locking, task_struct: Reduce size on TRACE_IRQFLAGS and 64bit

Reorder task_struct field for TRACE_IRQFLAGS to remove padding
on 64-bit.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4B135F50.8070302@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 75e6e60bf583..49be8f7c05f6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1421,17 +1421,17 @@ struct task_struct {
 #endif
 #ifdef CONFIG_TRACE_IRQFLAGS
 	unsigned int irq_events;
-	int hardirqs_enabled;
 	unsigned long hardirq_enable_ip;
-	unsigned int hardirq_enable_event;
 	unsigned long hardirq_disable_ip;
+	unsigned int hardirq_enable_event;
 	unsigned int hardirq_disable_event;
-	int softirqs_enabled;
+	int hardirqs_enabled;
+	int hardirq_context;
 	unsigned long softirq_disable_ip;
-	unsigned int softirq_disable_event;
 	unsigned long softirq_enable_ip;
+	unsigned int softirq_disable_event;
 	unsigned int softirq_enable_event;
-	int hardirq_context;
+	int softirqs_enabled;
 	int softirq_context;
 #endif
 #ifdef CONFIG_LOCKDEP
-- 
cgit v1.2.3-70-g09d2


From d99ca3b977fc5a93141304f571475c2af9e6c1c5 Mon Sep 17 00:00:00 2001
From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Date: Wed, 2 Dec 2009 17:26:47 +0900
Subject: sched, cputime: Cleanups related to task_times()

- Remove if({u,s}t)s because no one call it with NULL now.
- Use cputime_{add,sub}().
- Add ifndef-endif for prev_{u,s}time since they are used
  only when !VIRT_CPU_ACCOUNTING.

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Spencer Candland <spencer@bluehost.com>
Cc: Americo Wang <xiyou.wangcong@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
LKML-Reference: <4B1624C7.7040302@jp.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  2 ++
 kernel/fork.c         |  2 ++
 kernel/sched.c        | 16 ++++++----------
 3 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0395b0f4df3a..dff85e58264e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1331,7 +1331,9 @@ struct task_struct {
 
 	cputime_t utime, stime, utimescaled, stimescaled;
 	cputime_t gtime;
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING
 	cputime_t prev_utime, prev_stime;
+#endif
 	unsigned long nvcsw, nivcsw; /* context switch counts */
 	struct timespec start_time; 		/* monotonic time */
 	struct timespec real_start_time;	/* boot based time */
diff --git a/kernel/fork.c b/kernel/fork.c
index 166b8c49257c..ad7cb6d1193c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1066,8 +1066,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	p->gtime = cputime_zero;
 	p->utimescaled = cputime_zero;
 	p->stimescaled = cputime_zero;
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING
 	p->prev_utime = cputime_zero;
 	p->prev_stime = cputime_zero;
+#endif
 
 	p->default_timer_slack_ns = current->timer_slack_ns;
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 4883fee99314..17e2c1db2bde 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5184,10 +5184,8 @@ void account_idle_ticks(unsigned long ticks)
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
 void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 {
-	if (ut)
-		*ut = p->utime;
-	if (st)
-		*st = p->stime;
+	*ut = p->utime;
+	*st = p->stime;
 }
 #else
 
@@ -5197,7 +5195,7 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 
 void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 {
-	cputime_t rtime, utime = p->utime, total = utime + p->stime;
+	cputime_t rtime, utime = p->utime, total = cputime_add(utime, p->stime);
 
 	/*
 	 * Use CFS's precise accounting:
@@ -5217,12 +5215,10 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 	 * Compare with previous values, to keep monotonicity:
 	 */
 	p->prev_utime = max(p->prev_utime, utime);
-	p->prev_stime = max(p->prev_stime, rtime - p->prev_utime);
+	p->prev_stime = max(p->prev_stime, cputime_sub(rtime, p->prev_utime));
 
-	if (ut)
-		*ut = p->prev_utime;
-	if (st)
-		*st = p->prev_stime;
+	*ut = p->prev_utime;
+	*st = p->prev_stime;
 }
 #endif
 
-- 
cgit v1.2.3-70-g09d2


From 0cf55e1ec08bb5a22e068309e2d8ba1180ab4239 Mon Sep 17 00:00:00 2001
From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Date: Wed, 2 Dec 2009 17:28:07 +0900
Subject: sched, cputime: Introduce thread_group_times()

This is a real fix for problem of utime/stime values decreasing
described in the thread:

   http://lkml.org/lkml/2009/11/3/522

Now cputime is accounted in the following way:

 - {u,s}time in task_struct are increased every time when the thread
   is interrupted by a tick (timer interrupt).

 - When a thread exits, its {u,s}time are added to signal->{u,s}time,
   after adjusted by task_times().

 - When all threads in a thread_group exits, accumulated {u,s}time
   (and also c{u,s}time) in signal struct are added to c{u,s}time
   in signal struct of the group's parent.

So {u,s}time in task struct are "raw" tick count, while
{u,s}time and c{u,s}time in signal struct are "adjusted" values.

And accounted values are used by:

 - task_times(), to get cputime of a thread:
   This function returns adjusted values that originates from raw
   {u,s}time and scaled by sum_exec_runtime that accounted by CFS.

 - thread_group_cputime(), to get cputime of a thread group:
   This function returns sum of all {u,s}time of living threads in
   the group, plus {u,s}time in the signal struct that is sum of
   adjusted cputimes of all exited threads belonged to the group.

The problem is the return value of thread_group_cputime(),
because it is mixed sum of "raw" value and "adjusted" value:

  group's {u,s}time = foreach(thread){{u,s}time} + exited({u,s}time)

This misbehavior can break {u,s}time monotonicity.
Assume that if there is a thread that have raw values greater
than adjusted values (e.g. interrupted by 1000Hz ticks 50 times
but only runs 45ms) and if it exits, cputime will decrease (e.g.
-5ms).

To fix this, we could do:

  group's {u,s}time = foreach(t){task_times(t)} + exited({u,s}time)

But task_times() contains hard divisions, so applying it for
every thread should be avoided.

This patch fixes the above problem in the following way:

 - Modify thread's exit (= __exit_signal()) not to use task_times().
   It means {u,s}time in signal struct accumulates raw values instead
   of adjusted values.  As the result it makes thread_group_cputime()
   to return pure sum of "raw" values.

 - Introduce a new function thread_group_times(*task, *utime, *stime)
   that converts "raw" values of thread_group_cputime() to "adjusted"
   values, in same calculation procedure as task_times().

 - Modify group's exit (= wait_task_zombie()) to use this introduced
   thread_group_times().  It make c{u,s}time in signal struct to
   have adjusted values like before this patch.

 - Replace some thread_group_cputime() by thread_group_times().
   This replacements are only applied where conveys the "adjusted"
   cputime to users, and where already uses task_times() near by it.
   (i.e. sys_times(), getrusage(), and /proc/<PID>/stat.)

This patch have a positive side effect:

 - Before this patch, if a group contains many short-life threads
   (e.g. runs 0.9ms and not interrupted by ticks), the group's
   cputime could be invisible since thread's cputime was accumulated
   after adjusted: imagine adjustment function as adj(ticks, runtime),
     {adj(0, 0.9) + adj(0, 0.9) + ....} = {0 + 0 + ....} = 0.
   After this patch it will not happen because the adjustment is
   applied after accumulated.

v2:
 - remove if()s, put new variables into signal_struct.

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Spencer Candland <spencer@bluehost.com>
Cc: Americo Wang <xiyou.wangcong@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
LKML-Reference: <4B162517.8040909@jp.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 fs/proc/array.c       |  5 +----
 include/linux/sched.h |  4 ++++
 kernel/exit.c         | 23 ++++++++++++-----------
 kernel/fork.c         |  3 +++
 kernel/sched.c        | 41 +++++++++++++++++++++++++++++++++++++++++
 kernel/sys.c          | 18 ++++++++----------
 6 files changed, 69 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/fs/proc/array.c b/fs/proc/array.c
index ca61a88aed66..2571da43c736 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -506,7 +506,6 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 
 		/* add up live thread stats at the group level */
 		if (whole) {
-			struct task_cputime cputime;
 			struct task_struct *t = task;
 			do {
 				min_flt += t->min_flt;
@@ -517,9 +516,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 
 			min_flt += sig->min_flt;
 			maj_flt += sig->maj_flt;
-			thread_group_cputime(task, &cputime);
-			utime = cputime.utime;
-			stime = cputime.stime;
+			thread_group_times(task, &utime, &stime);
 			gtime = cputime_add(gtime, sig->gtime);
 		}
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index dff85e58264e..34238bd10ebf 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -624,6 +624,9 @@ struct signal_struct {
 	cputime_t utime, stime, cutime, cstime;
 	cputime_t gtime;
 	cputime_t cgtime;
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+	cputime_t prev_utime, prev_stime;
+#endif
 	unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
 	unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
 	unsigned long inblock, oublock, cinblock, coublock;
@@ -1723,6 +1726,7 @@ static inline void put_task_struct(struct task_struct *t)
 }
 
 extern void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st);
+extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st);
 
 /*
  * Per process flags
diff --git a/kernel/exit.c b/kernel/exit.c
index 2eaf68b634e3..b221ad65fd20 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -91,8 +91,6 @@ static void __exit_signal(struct task_struct *tsk)
 	if (atomic_dec_and_test(&sig->count))
 		posix_cpu_timers_exit_group(tsk);
 	else {
-		cputime_t utime, stime;
-
 		/*
 		 * If there is any task waiting for the group exit
 		 * then notify it:
@@ -112,9 +110,8 @@ static void __exit_signal(struct task_struct *tsk)
 		 * We won't ever get here for the group leader, since it
 		 * will have been the last reference on the signal_struct.
 		 */
-		task_times(tsk, &utime, &stime);
-		sig->utime = cputime_add(sig->utime, utime);
-		sig->stime = cputime_add(sig->stime, stime);
+		sig->utime = cputime_add(sig->utime, tsk->utime);
+		sig->stime = cputime_add(sig->stime, tsk->stime);
 		sig->gtime = cputime_add(sig->gtime, tsk->gtime);
 		sig->min_flt += tsk->min_flt;
 		sig->maj_flt += tsk->maj_flt;
@@ -1208,6 +1205,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 		struct signal_struct *psig;
 		struct signal_struct *sig;
 		unsigned long maxrss;
+		cputime_t tgutime, tgstime;
 
 		/*
 		 * The resource counters for the group leader are in its
@@ -1223,20 +1221,23 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 		 * need to protect the access to parent->signal fields,
 		 * as other threads in the parent group can be right
 		 * here reaping other children at the same time.
+		 *
+		 * We use thread_group_times() to get times for the thread
+		 * group, which consolidates times for all threads in the
+		 * group including the group leader.
 		 */
+		thread_group_times(p, &tgutime, &tgstime);
 		spin_lock_irq(&p->real_parent->sighand->siglock);
 		psig = p->real_parent->signal;
 		sig = p->signal;
 		psig->cutime =
 			cputime_add(psig->cutime,
-			cputime_add(p->utime,
-			cputime_add(sig->utime,
-				    sig->cutime)));
+			cputime_add(tgutime,
+				    sig->cutime));
 		psig->cstime =
 			cputime_add(psig->cstime,
-			cputime_add(p->stime,
-			cputime_add(sig->stime,
-				    sig->cstime)));
+			cputime_add(tgstime,
+				    sig->cstime));
 		psig->cgtime =
 			cputime_add(psig->cgtime,
 			cputime_add(p->gtime,
diff --git a/kernel/fork.c b/kernel/fork.c
index ad7cb6d1193c..3d6f121bbe8a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -884,6 +884,9 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
 	sig->gtime = cputime_zero;
 	sig->cgtime = cputime_zero;
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+	sig->prev_utime = sig->prev_stime = cputime_zero;
+#endif
 	sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
 	sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
 	sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
diff --git a/kernel/sched.c b/kernel/sched.c
index 17e2c1db2bde..e6ba726941ae 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5187,6 +5187,16 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 	*ut = p->utime;
 	*st = p->stime;
 }
+
+void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+{
+	struct task_cputime cputime;
+
+	thread_group_cputime(p, &cputime);
+
+	*ut = cputime.utime;
+	*st = cputime.stime;
+}
 #else
 
 #ifndef nsecs_to_cputime
@@ -5220,6 +5230,37 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 	*ut = p->prev_utime;
 	*st = p->prev_stime;
 }
+
+/*
+ * Must be called with siglock held.
+ */
+void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+{
+	struct signal_struct *sig = p->signal;
+	struct task_cputime cputime;
+	cputime_t rtime, utime, total;
+
+	thread_group_cputime(p, &cputime);
+
+	total = cputime_add(cputime.utime, cputime.stime);
+	rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
+
+	if (total) {
+		u64 temp;
+
+		temp = (u64)(rtime * cputime.utime);
+		do_div(temp, total);
+		utime = (cputime_t)temp;
+	} else
+		utime = rtime;
+
+	sig->prev_utime = max(sig->prev_utime, utime);
+	sig->prev_stime = max(sig->prev_stime,
+			      cputime_sub(rtime, sig->prev_utime));
+
+	*ut = sig->prev_utime;
+	*st = sig->prev_stime;
+}
 #endif
 
 /*
diff --git a/kernel/sys.c b/kernel/sys.c
index bbdfce0d4347..9968c5fb55b9 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -911,16 +911,15 @@ change_okay:
 
 void do_sys_times(struct tms *tms)
 {
-	struct task_cputime cputime;
-	cputime_t cutime, cstime;
+	cputime_t tgutime, tgstime, cutime, cstime;
 
-	thread_group_cputime(current, &cputime);
 	spin_lock_irq(&current->sighand->siglock);
+	thread_group_times(current, &tgutime, &tgstime);
 	cutime = current->signal->cutime;
 	cstime = current->signal->cstime;
 	spin_unlock_irq(&current->sighand->siglock);
-	tms->tms_utime = cputime_to_clock_t(cputime.utime);
-	tms->tms_stime = cputime_to_clock_t(cputime.stime);
+	tms->tms_utime = cputime_to_clock_t(tgutime);
+	tms->tms_stime = cputime_to_clock_t(tgstime);
 	tms->tms_cutime = cputime_to_clock_t(cutime);
 	tms->tms_cstime = cputime_to_clock_t(cstime);
 }
@@ -1338,8 +1337,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 {
 	struct task_struct *t;
 	unsigned long flags;
-	cputime_t utime, stime;
-	struct task_cputime cputime;
+	cputime_t tgutime, tgstime, utime, stime;
 	unsigned long maxrss = 0;
 
 	memset((char *) r, 0, sizeof *r);
@@ -1372,9 +1370,9 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 				break;
 
 		case RUSAGE_SELF:
-			thread_group_cputime(p, &cputime);
-			utime = cputime_add(utime, cputime.utime);
-			stime = cputime_add(stime, cputime.stime);
+			thread_group_times(p, &tgutime, &tgstime);
+			utime = cputime_add(utime, tgutime);
+			stime = cputime_add(stime, tgstime);
 			r->ru_nvcsw += p->signal->nvcsw;
 			r->ru_nivcsw += p->signal->nivcsw;
 			r->ru_minflt += p->signal->min_flt;
-- 
cgit v1.2.3-70-g09d2


From 7cff7ce94a7df2ccf5ac76b48ee0995fee2060df Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 9 Oct 2009 00:01:39 -0700
Subject: include/linux/compiler-gcc4.h: Fix build bug - gcc-4.0.2 doesn't
 understand __builtin_object_size

Maybe 4.1.0 doesn't too, but this fixed it for me.

Caused by:

 4a31276: x86: Turn the copy_from_user check into an (optional) compile time warning
 9f0cf4a: x86: Use __builtin_object_size() to validate the buffer size for copy_from_user()

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
LKML-Reference: <200910090724.n997OQl6013538@imap1.linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/compiler-gcc4.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index 77542c57e20a..e6ef279ca20c 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -38,7 +38,9 @@
 
 #endif
 
+#if __GNUC_MINOR__ > 0
 #define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
+#endif
 #if __GNUC_MINOR__ >= 4
 #define __compiletime_warning(message) __attribute__((warning(message)))
 #define __compiletime_error(message) __attribute__((error(message)))
-- 
cgit v1.2.3-70-g09d2


From 796bd9524731850967d437b7f47a86acc776ea89 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 29 Sep 2009 12:27:23 +0100
Subject: VFS: Add forget_all_cached_acls()

This is required for cluster filesystems which want to use
cached ACLs so that they can invalidate the cache when
required.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Cc: Alexander Viro <aviro@redhat.com>
Cc: Christoph Hellwig <hch@infradead.org>
---
 include/linux/posix_acl.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h
index 065a3652a3ea..67608161df6b 100644
--- a/include/linux/posix_acl.h
+++ b/include/linux/posix_acl.h
@@ -147,6 +147,20 @@ static inline void forget_cached_acl(struct inode *inode, int type)
 	if (old != ACL_NOT_CACHED)
 		posix_acl_release(old);
 }
+
+static inline void forget_all_cached_acls(struct inode *inode)
+{
+	struct posix_acl *old_access, *old_default;
+	spin_lock(&inode->i_lock);
+	old_access = inode->i_acl;
+	old_default = inode->i_default_acl;
+	inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
+	spin_unlock(&inode->i_lock);
+	if (old_access != ACL_NOT_CACHED)
+		posix_acl_release(old_access);
+	if (old_default != ACL_NOT_CACHED)
+		posix_acl_release(old_default);
+}
 #endif
 
 static inline void cache_no_acl(struct inode *inode)
-- 
cgit v1.2.3-70-g09d2


From 86e931a35e93d94e6e91b57cc76456e16d188ea9 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Mon, 28 Sep 2009 12:35:17 +0100
Subject: VFS: Export dquot_send_warning

Sending a message to userspace in a generic format to warn
of events (e.g. quota exceeded) in the quota subsystem is
a generically useful feature. This patch makes some minor
changes to the send_message function from dquot.c renaming
it quota_send_message, moving it to quota.c and exporting it
for use by filesystems which do not use the dquot code.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/quota/Kconfig      |  2 +-
 fs/quota/dquot.c      | 93 +++++----------------------------------------------
 fs/quota/quota.c      | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/quota.h | 11 ++++++
 4 files changed, 114 insertions(+), 85 deletions(-)

(limited to 'include')

diff --git a/fs/quota/Kconfig b/fs/quota/Kconfig
index 8047e01ef46b..353e78a9ebee 100644
--- a/fs/quota/Kconfig
+++ b/fs/quota/Kconfig
@@ -17,7 +17,7 @@ config QUOTA
 
 config QUOTA_NETLINK_INTERFACE
 	bool "Report quota messages through netlink interface"
-	depends on QUOTA && NET
+	depends on QUOTACTL && NET
 	help
 	  If you say Y here, quota warnings (about exceeding softlimit, reaching
 	  hardlimit, etc.) will be reported through netlink interface. If unsure,
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 39b49c42a7ed..9b6ad908dcb2 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -77,10 +77,6 @@
 #include <linux/capability.h>
 #include <linux/quotaops.h>
 #include <linux/writeback.h> /* for inode_lock, oddly enough.. */
-#ifdef CONFIG_QUOTA_NETLINK_INTERFACE
-#include <net/netlink.h>
-#include <net/genetlink.h>
-#endif
 
 #include <asm/uaccess.h>
 
@@ -1071,73 +1067,6 @@ static void print_warning(struct dquot *dquot, const int warntype)
 }
 #endif
 
-#ifdef CONFIG_QUOTA_NETLINK_INTERFACE
-
-/* Netlink family structure for quota */
-static struct genl_family quota_genl_family = {
-	.id = GENL_ID_GENERATE,
-	.hdrsize = 0,
-	.name = "VFS_DQUOT",
-	.version = 1,
-	.maxattr = QUOTA_NL_A_MAX,
-};
-
-/* Send warning to userspace about user which exceeded quota */
-static void send_warning(const struct dquot *dquot, const char warntype)
-{
-	static atomic_t seq;
-	struct sk_buff *skb;
-	void *msg_head;
-	int ret;
-	int msg_size = 4 * nla_total_size(sizeof(u32)) +
-		       2 * nla_total_size(sizeof(u64));
-
-	/* We have to allocate using GFP_NOFS as we are called from a
-	 * filesystem performing write and thus further recursion into
-	 * the fs to free some data could cause deadlocks. */
-	skb = genlmsg_new(msg_size, GFP_NOFS);
-	if (!skb) {
-		printk(KERN_ERR
-		  "VFS: Not enough memory to send quota warning.\n");
-		return;
-	}
-	msg_head = genlmsg_put(skb, 0, atomic_add_return(1, &seq),
-			&quota_genl_family, 0, QUOTA_NL_C_WARNING);
-	if (!msg_head) {
-		printk(KERN_ERR
-		  "VFS: Cannot store netlink header in quota warning.\n");
-		goto err_out;
-	}
-	ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, dquot->dq_type);
-	if (ret)
-		goto attr_err_out;
-	ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, dquot->dq_id);
-	if (ret)
-		goto attr_err_out;
-	ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype);
-	if (ret)
-		goto attr_err_out;
-	ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MAJOR,
-		MAJOR(dquot->dq_sb->s_dev));
-	if (ret)
-		goto attr_err_out;
-	ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR,
-		MINOR(dquot->dq_sb->s_dev));
-	if (ret)
-		goto attr_err_out;
-	ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current_uid());
-	if (ret)
-		goto attr_err_out;
-	genlmsg_end(skb, msg_head);
-
-	genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS);
-	return;
-attr_err_out:
-	printk(KERN_ERR "VFS: Not enough space to compose quota message!\n");
-err_out:
-	kfree_skb(skb);
-}
-#endif
 /*
  * Write warnings to the console and send warning messages over netlink.
  *
@@ -1145,18 +1074,20 @@ err_out:
  */
 static void flush_warnings(struct dquot *const *dquots, char *warntype)
 {
+	struct dquot *dq;
 	int i;
 
-	for (i = 0; i < MAXQUOTAS; i++)
-		if (dquots[i] && warntype[i] != QUOTA_NL_NOWARN &&
-		    !warning_issued(dquots[i], warntype[i])) {
+	for (i = 0; i < MAXQUOTAS; i++) {
+		dq = dquots[i];
+		if (dq && warntype[i] != QUOTA_NL_NOWARN &&
+		    !warning_issued(dq, warntype[i])) {
 #ifdef CONFIG_PRINT_QUOTA_WARNING
-			print_warning(dquots[i], warntype[i]);
-#endif
-#ifdef CONFIG_QUOTA_NETLINK_INTERFACE
-			send_warning(dquots[i], warntype[i]);
+			print_warning(dq, warntype[i]);
 #endif
+			quota_send_warning(dq->dq_type, dq->dq_id,
+					   dq->dq_sb->s_dev, warntype[i]);
 		}
+	}
 }
 
 static int ignore_hardlimit(struct dquot *dquot)
@@ -2607,12 +2538,6 @@ static int __init dquot_init(void)
 
 	register_shrinker(&dqcache_shrinker);
 
-#ifdef CONFIG_QUOTA_NETLINK_INTERFACE
-	if (genl_register_family(&quota_genl_family) != 0)
-		printk(KERN_ERR
-		       "VFS: Failed to create quota netlink interface.\n");
-#endif
-
 	return 0;
 }
 module_init(dquot_init);
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 95c5b42384b2..ee91e2756950 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -18,6 +18,8 @@
 #include <linux/capability.h>
 #include <linux/quotaops.h>
 #include <linux/types.h>
+#include <net/netlink.h>
+#include <net/genetlink.h>
 
 /* Check validity of generic quotactl commands */
 static int generic_quotactl_valid(struct super_block *sb, int type, int cmd,
@@ -525,3 +527,94 @@ asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special,
 	return ret;
 }
 #endif
+
+
+#ifdef CONFIG_QUOTA_NETLINK_INTERFACE
+
+/* Netlink family structure for quota */
+static struct genl_family quota_genl_family = {
+	.id = GENL_ID_GENERATE,
+	.hdrsize = 0,
+	.name = "VFS_DQUOT",
+	.version = 1,
+	.maxattr = QUOTA_NL_A_MAX,
+};
+
+/**
+ * quota_send_warning - Send warning to userspace about exceeded quota
+ * @type: The quota type: USRQQUOTA, GRPQUOTA,...
+ * @id: The user or group id of the quota that was exceeded
+ * @dev: The device on which the fs is mounted (sb->s_dev)
+ * @warntype: The type of the warning: QUOTA_NL_...
+ *
+ * This can be used by filesystems (including those which don't use
+ * dquot) to send a message to userspace relating to quota limits.
+ *
+ */
+
+void quota_send_warning(short type, unsigned int id, dev_t dev,
+			const char warntype)
+{
+	static atomic_t seq;
+	struct sk_buff *skb;
+	void *msg_head;
+	int ret;
+	int msg_size = 4 * nla_total_size(sizeof(u32)) +
+		       2 * nla_total_size(sizeof(u64));
+
+	/* We have to allocate using GFP_NOFS as we are called from a
+	 * filesystem performing write and thus further recursion into
+	 * the fs to free some data could cause deadlocks. */
+	skb = genlmsg_new(msg_size, GFP_NOFS);
+	if (!skb) {
+		printk(KERN_ERR
+		  "VFS: Not enough memory to send quota warning.\n");
+		return;
+	}
+	msg_head = genlmsg_put(skb, 0, atomic_add_return(1, &seq),
+			&quota_genl_family, 0, QUOTA_NL_C_WARNING);
+	if (!msg_head) {
+		printk(KERN_ERR
+		  "VFS: Cannot store netlink header in quota warning.\n");
+		goto err_out;
+	}
+	ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, type);
+	if (ret)
+		goto attr_err_out;
+	ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, id);
+	if (ret)
+		goto attr_err_out;
+	ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype);
+	if (ret)
+		goto attr_err_out;
+	ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MAJOR, MAJOR(dev));
+	if (ret)
+		goto attr_err_out;
+	ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR, MINOR(dev));
+	if (ret)
+		goto attr_err_out;
+	ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current_uid());
+	if (ret)
+		goto attr_err_out;
+	genlmsg_end(skb, msg_head);
+
+	genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS);
+	return;
+attr_err_out:
+	printk(KERN_ERR "VFS: Not enough space to compose quota message!\n");
+err_out:
+	kfree_skb(skb);
+}
+EXPORT_SYMBOL(quota_send_warning);
+
+static int __init quota_init(void)
+{
+	if (genl_register_family(&quota_genl_family) != 0)
+		printk(KERN_ERR
+		       "VFS: Failed to create quota netlink interface.\n");
+	return 0;
+};
+
+module_init(quota_init);
+#endif
+
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 78c48895b12a..ce9a9b2e5cd4 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -376,6 +376,17 @@ static inline unsigned int dquot_generic_flag(unsigned int flags, int type)
 	return flags >> _DQUOT_STATE_FLAGS;
 }
 
+#ifdef CONFIG_QUOTA_NETLINK_INTERFACE
+extern void quota_send_warning(short type, unsigned int id, dev_t dev,
+			       const char warntype);
+#else
+static inline void quota_send_warning(short type, unsigned int id, dev_t dev,
+				      const char warntype)
+{
+	return;
+}
+#endif /* CONFIG_QUOTA_NETLINK_INTERFACE */
+
 struct quota_info {
 	unsigned int flags;			/* Flags for diskquotas on this device */
 	struct mutex dqio_mutex;		/* lock device while I/O in progress */
-- 
cgit v1.2.3-70-g09d2


From 0ab7d13fcbd7ce1658c563e345990ba453719deb Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Fri, 6 Nov 2009 16:20:51 +0000
Subject: GFS2: Tag all metadata with jid

There are two spare field in the header common to all GFS2
metadata. One is just the right size to fit a journal id
in it, and this patch updates the journal code so that each
time a metadata block is modified, we tag it with the journal
id of the node which is performing the modification.

The reason for this is that it should make it much easier to
debug issues which arise if we can tell which node was the
last to modify a particular metadata block.

Since the field is updated before the block is written into
the journal, each journal should only contain metadata which
is tagged with its own journal id. The one exception to this
is the journal header block, which might have a different node's
id in it, if that journal was recovered by another node in the
cluster.

Thus each journal will contain a record of which nodes recovered
it, via the journal header.

The other field in the metadata header could potentially be
used to hold information about what kind of operation was
performed, but for the time being we just zero it on each
transaction so that if we use it for that in future, we'll
know that the information (where it exists) is reliable.

I did consider using the other field to hold the journal
sequence number, however since in GFS2's journaling we write
the modified data into the journal and not the original
data, this gives no information as to what action caused the
modification, so I think we can probably come up with a better
use for those 64 bits in the future.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c             | 2 --
 fs/gfs2/log.c               | 2 ++
 fs/gfs2/lops.c              | 4 ++++
 fs/gfs2/recovery.c          | 2 ++
 include/linux/gfs2_ondisk.h | 6 +++++-
 5 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 6380cd9314b0..26ba2a4c4a2d 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -947,9 +947,7 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 
 	str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
 	str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
-	str->di_header.__pad0 = 0;
 	str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
-	str->di_header.__pad1 = 0;
 	str->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
 	str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
 	str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 13c6237c5f67..4511b08fc451 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -596,7 +596,9 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
 	memset(lh, 0, sizeof(struct gfs2_log_header));
 	lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
 	lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
+	lh->lh_header.__pad0 = cpu_to_be64(0);
 	lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
+	lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
 	lh->lh_sequence = cpu_to_be64(sdp->sd_log_sequence++);
 	lh->lh_flags = cpu_to_be32(flags);
 	lh->lh_tail = cpu_to_be32(tail);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 9969ff062c5b..de97632ba32f 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -132,6 +132,7 @@ static struct buffer_head *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type)
 static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
 {
 	struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
+	struct gfs2_meta_header *mh;
 	struct gfs2_trans *tr;
 
 	lock_buffer(bd->bd_bh);
@@ -148,6 +149,9 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
 	set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
 	gfs2_meta_check(sdp, bd->bd_bh);
 	gfs2_pin(sdp, bd->bd_bh);
+	mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
+	mh->__pad0 = cpu_to_be64(0);
+	mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
 	sdp->sd_log_num_buf++;
 	list_add(&le->le_list, &sdp->sd_log_le_buf);
 	tr->tr_num_buf_new++;
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 09fa31965576..4b9bece3d437 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -410,7 +410,9 @@ static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *hea
 	memset(lh, 0, sizeof(struct gfs2_log_header));
 	lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
 	lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
+	lh->lh_header.__pad0 = cpu_to_be64(0);
 	lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
+	lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
 	lh->lh_sequence = cpu_to_be64(head->lh_sequence + 1);
 	lh->lh_flags = cpu_to_be32(GFS2_LOG_HEAD_UNMOUNT);
 	lh->lh_blkno = cpu_to_be32(lblock);
diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h
index b80c88dedbbb..81f90a59cda6 100644
--- a/include/linux/gfs2_ondisk.h
+++ b/include/linux/gfs2_ondisk.h
@@ -81,7 +81,11 @@ struct gfs2_meta_header {
 	__be32 mh_type;
 	__be64 __pad0;		/* Was generation number in gfs1 */
 	__be32 mh_format;
-	__be32 __pad1;		/* Was incarnation number in gfs1 */
+	/* This union is to keep userspace happy */
+	union {
+		__be32 mh_jid;		/* Was incarnation number in gfs1 */
+		__be32 __pad1;
+	};
 };
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 38938c879eb0c39edf85d5164aa0cffe2874304c Mon Sep 17 00:00:00 2001
From: David Daney <ddaney@caviumnetworks.com>
Date: Fri, 4 Dec 2009 17:44:50 -0800
Subject: Add support for GCC-4.5's __builtin_unreachable() to compiler.h (v2)

Starting with version 4.5, GCC has a new built-in function
__builtin_unreachable() that can be used in places like the kernel's
BUG() where inline assembly is used to transfer control flow.  This
eliminated the need for an endless loop in these places.

The patch adds a new macro 'unreachable()' that will expand to either
__builtin_unreachable() or an endless loop depending on the compiler
version.

Change from v1: Simplify unreachable() for non-GCC 4.5 case.

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
Acked-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-gcc4.h | 14 ++++++++++++++
 include/linux/compiler.h      |  5 +++++
 2 files changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index 450fa597c94d..ab3af40a53c6 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -36,4 +36,18 @@
    the kernel context */
 #define __cold			__attribute__((__cold__))
 
+
+#if __GNUC_MINOR__ >= 5
+/*
+ * Mark a position in code as unreachable.  This can be used to
+ * suppress control flow warnings after asm blocks that transfer
+ * control elsewhere.
+ *
+ * Early snapshots of gcc 4.5 don't support this and we can't detect
+ * this in the preprocessor, but we can live with this because they're
+ * unreleased.  Really, we need to have autoconf for the kernel.
+ */
+#define unreachable() __builtin_unreachable()
+#endif
+
 #endif
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 04fb5135b4e1..59f208926d13 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -144,6 +144,11 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 # define barrier() __memory_barrier()
 #endif
 
+/* Unreachable code */
+#ifndef unreachable
+# define unreachable() do { } while (1)
+#endif
+
 #ifndef RELOC_HIDE
 # define RELOC_HIDE(ptr, off)					\
   ({ unsigned long __ptr;					\
-- 
cgit v1.2.3-70-g09d2