From e545a6140b698b2494daf0b32107bdcc5e901390 Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Sun, 7 Sep 2008 16:57:22 +0200
Subject: kernel/cpu.c: create a CPU_STARTING cpu_chain notifier

Right now, there is no notifier that is called on a new cpu, before the new
cpu begins processing interrupts/softirqs.
Various kernel function would need that notification, e.g. kvm works around
by calling smp_call_function_single(), rcu polls cpu_online_map.

The patch adds a CPU_STARTING notification. It also adds a helper function
that sends the message to all cpu_chain handlers.

Tested on x86-64.
All other archs are untested. Especially on sparc, I'm not sure if I got
it right.

Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/s390/kernel/smp.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/s390')

diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 00b9b4dec5eb..9e8b1f9b8f4d 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -585,6 +585,8 @@ int __cpuinit start_secondary(void *cpuvoid)
 	/* Enable pfault pseudo page faults on this cpu. */
 	pfault_init();
 
+	/* call cpu notifiers */
+	notify_cpu_starting(smp_processor_id());
 	/* Mark this cpu as online */
 	spin_lock(&call_lock);
 	cpu_set(smp_processor_id(), cpu_online_map);
-- 
cgit v1.2.3-70-g09d2


From 3d6e48f43340343d97839eadb1ab7b6a3ea98797 Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jwilson@redhat.com>
Date: Tue, 9 Sep 2008 12:38:56 +0200
Subject: [S390] CVE-2008-1514: prevent ptrace padding area read/write in
 31-bit mode

When running a 31-bit ptrace, on either an s390 or s390x kernel,
reads and writes into a padding area in struct user_regs_struct32
will result in a kernel panic.

This is also known as CVE-2008-1514.

Test case available here:
http://sources.redhat.com/cgi-bin/cvsweb.cgi/~checkout~/tests/ptrace-tests/tests/user-area-padding.c?cvsroot=systemtap

Steps to reproduce:
1) wget the above
2) gcc -o user-area-padding-31bit user-area-padding.c -Wall -ggdb2 -D_GNU_SOURCE -m31
3) ./user-area-padding-31bit
<panic>

Test status
-----------
Without patch, both s390 and s390x kernels panic. With patch, the test case,
as well as the gdb testsuite, pass without incident, padding area reads
returning zero, writes ignored.

Nb: original version returned -EINVAL on write attempts, which broke the
gdb test and made the test case slightly unhappy, Jan Kratochvil suggested
the change to return 0 on write attempts.

Signed-off-by: Jarod Wilson <jarod@redhat.com>
Tested-by: Jan Kratochvil <jan.kratochvil@redhat.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/compat_ptrace.h |  1 +
 arch/s390/kernel/ptrace.c        | 28 ++++++++++++++++++++++++++++
 2 files changed, 29 insertions(+)

(limited to 'arch/s390')

diff --git a/arch/s390/kernel/compat_ptrace.h b/arch/s390/kernel/compat_ptrace.h
index cde81fa64f89..a2be3a978d5c 100644
--- a/arch/s390/kernel/compat_ptrace.h
+++ b/arch/s390/kernel/compat_ptrace.h
@@ -42,6 +42,7 @@ struct user_regs_struct32
 	u32 gprs[NUM_GPRS];
 	u32 acrs[NUM_ACRS];
 	u32 orig_gpr2;
+	/* nb: there's a 4-byte hole here */
 	s390_fp_regs fp_regs;
 	/*
 	 * These per registers are in here so that gdb can modify them
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 2815bfe348a6..c8b08289eb87 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -170,6 +170,13 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr)
 		 */
 		tmp = (addr_t) task_pt_regs(child)->orig_gpr2;
 
+	} else if (addr < (addr_t) &dummy->regs.fp_regs) {
+		/*
+		 * prevent reads of padding hole between
+		 * orig_gpr2 and fp_regs on s390.
+		 */
+		tmp = 0;
+
 	} else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
 		/* 
 		 * floating point regs. are stored in the thread structure
@@ -270,6 +277,13 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
 		 */
 		task_pt_regs(child)->orig_gpr2 = data;
 
+	} else if (addr < (addr_t) &dummy->regs.fp_regs) {
+		/*
+		 * prevent writes of padding hole between
+		 * orig_gpr2 and fp_regs on s390.
+		 */
+		return 0;
+
 	} else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
 		/*
 		 * floating point regs. are stored in the thread structure
@@ -428,6 +442,13 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr)
 		 */
 		tmp = *(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4);
 
+	} else if (addr < (addr_t) &dummy32->regs.fp_regs) {
+		/*
+		 * prevent reads of padding hole between
+		 * orig_gpr2 and fp_regs on s390.
+		 */
+		tmp = 0;
+
 	} else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
 		/*
 		 * floating point regs. are stored in the thread structure 
@@ -514,6 +535,13 @@ static int __poke_user_compat(struct task_struct *child,
 		 */
 		*(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4) = tmp;
 
+	} else if (addr < (addr_t) &dummy32->regs.fp_regs) {
+		/*
+		 * prevent writess of padding hole between
+		 * orig_gpr2 and fp_regs on s390.
+		 */
+		return 0;
+
 	} else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
 		/*
 		 * floating point regs. are stored in the thread structure 
-- 
cgit v1.2.3-70-g09d2


From d3d238c7744d08c36a114a59cb537d4c0c6c9a86 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 3 Oct 2008 21:54:59 +0200
Subject: [S390] nohz: Fix __udelay.

This fixes a regression that came with 934b2857cc576ae53c92a66e63fce7ddcfa74691
("[S390] nohz/sclp: disable timer on synchronous waits.").
If udelay() gets called from a disabled context it sets the clock comparator
to a value where it expects the next interrupt. When the interrupt happens
the clock comparator gets not reset and therefore the interrupt condition
doesn't get cleared. The result is an endless timer interrupt loop.

In addition this patch fixes also the following:

rcutorture reveals that our __udelay implementation is still buggy,
since it might schedule tasklets, but prevents their execution:

NOHZ: local_softirq_pending 42
NOHZ: local_softirq_pending 02
NOHZ: local_softirq_pending 142
NOHZ: local_softirq_pending 02

To fix this we make sure that only the clock comparator interrupt
is enabled when the enabled wait psw is loaded.
Also no code gets called anymore which might schedule tasklets.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/time.c |  2 ++
 arch/s390/lib/delay.c   | 88 ++++++++++++++++++++++++++++++-------------------
 2 files changed, 56 insertions(+), 34 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index ca114fe46ffb..06acb1a18bbc 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -169,6 +169,8 @@ void init_cpu_timer(void)
 
 static void clock_comparator_interrupt(__u16 code)
 {
+	if (S390_lowcore.clock_comparator == -1ULL)
+		set_clock_comparator(S390_lowcore.clock_comparator);
 }
 
 static void etr_timing_alert(struct etr_irq_parm *);
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index fc6ab6094df8..0953cee05efc 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -1,14 +1,9 @@
 /*
- *  arch/s390/lib/delay.c
  *    Precise Delay Loops for S390
  *
- *  S390 version
- *    Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
- *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
- *
- *  Derived from "arch/i386/lib/delay.c"
- *    Copyright (C) 1993 Linus Torvalds
- *    Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ *    Copyright IBM Corp. 1999,2008
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *		 Heiko Carstens <heiko.carstens@de.ibm.com>,
  */
 
 #include <linux/sched.h>
@@ -29,30 +24,31 @@ void __delay(unsigned long loops)
 	asm volatile("0: brct %0,0b" : : "d" ((loops/2) + 1));
 }
 
-/*
- * Waits for 'usecs' microseconds using the TOD clock comparator.
- */
-void __udelay(unsigned long usecs)
+static void __udelay_disabled(unsigned long usecs)
 {
-	u64 end, time, old_cc = 0;
-	unsigned long flags, cr0, mask, dummy;
-	int irq_context;
+	unsigned long mask, cr0, cr0_saved;
+	u64 clock_saved;
 
-	irq_context = in_interrupt();
-	if (!irq_context)
-		local_bh_disable();
-	local_irq_save(flags);
-	if (raw_irqs_disabled_flags(flags)) {
-		old_cc = local_tick_disable();
-		S390_lowcore.clock_comparator = -1ULL;
-		__ctl_store(cr0, 0, 0);
-		dummy = (cr0 & 0xffff00e0) | 0x00000800;
-		__ctl_load(dummy , 0, 0);
-		mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT;
-	} else
-		mask = psw_kernel_bits | PSW_MASK_WAIT |
-			PSW_MASK_EXT | PSW_MASK_IO;
+	clock_saved = local_tick_disable();
+	set_clock_comparator(get_clock() + ((u64) usecs << 12));
+	__ctl_store(cr0_saved, 0, 0);
+	cr0 = (cr0_saved & 0xffff00e0) | 0x00000800;
+	__ctl_load(cr0 , 0, 0);
+	mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT;
+	trace_hardirqs_on();
+	__load_psw_mask(mask);
+	local_irq_disable();
+	__ctl_load(cr0_saved, 0, 0);
+	local_tick_enable(clock_saved);
+	set_clock_comparator(S390_lowcore.clock_comparator);
+}
 
+static void __udelay_enabled(unsigned long usecs)
+{
+	unsigned long mask;
+	u64 end, time;
+
+	mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT | PSW_MASK_IO;
 	end = get_clock() + ((u64) usecs << 12);
 	do {
 		time = end < S390_lowcore.clock_comparator ?
@@ -62,13 +58,37 @@ void __udelay(unsigned long usecs)
 		__load_psw_mask(mask);
 		local_irq_disable();
 	} while (get_clock() < end);
+	set_clock_comparator(S390_lowcore.clock_comparator);
+}
 
-	if (raw_irqs_disabled_flags(flags)) {
-		__ctl_load(cr0, 0, 0);
-		local_tick_enable(old_cc);
+/*
+ * Waits for 'usecs' microseconds using the TOD clock comparator.
+ */
+void __udelay(unsigned long usecs)
+{
+	unsigned long flags;
+
+	preempt_disable();
+	local_irq_save(flags);
+	if (in_irq()) {
+		__udelay_disabled(usecs);
+		goto out;
+	}
+	if (in_softirq()) {
+		if (raw_irqs_disabled_flags(flags))
+			__udelay_disabled(usecs);
+		else
+			__udelay_enabled(usecs);
+		goto out;
 	}
-	if (!irq_context)
+	if (raw_irqs_disabled_flags(flags)) {
+		local_bh_disable();
+		__udelay_disabled(usecs);
 		_local_bh_enable();
-	set_clock_comparator(S390_lowcore.clock_comparator);
+		goto out;
+	}
+	__udelay_enabled(usecs);
+out:
 	local_irq_restore(flags);
+	preempt_enable();
 }
-- 
cgit v1.2.3-70-g09d2


From 7a0f475513fa573bc8e072021960313da32f0ee3 Mon Sep 17 00:00:00 2001
From: Klaus-Dieter Wacker <kdwacker@de.ibm.com>
Date: Fri, 10 Oct 2008 21:33:18 +0200
Subject: [S390] qdio enhanced SIGA (iqdio) support.

Add support for z10 HiperSockets multiwrite SBALs on output
queues. This is used on LPAR with EDDP enabled devices.

Signed-off-by: Klaus-Dieter Wacker <kdwacker@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/qdio.h |  8 +++++++-
 drivers/s390/cio/qdio.h      |  3 +++
 drivers/s390/cio/qdio_main.c | 24 +++++++++++++++++++-----
 3 files changed, 29 insertions(+), 6 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index 6813772171f2..4734c3f05354 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -299,7 +299,13 @@ struct qdio_ssqd_desc {
 	u8 mbccnt;
 	u16 qdioac2;
 	u64 sch_token;
-	u64:64;
+	u8 mro;
+	u8 mri;
+	u8:8;
+	u8 sbalic;
+	u16:16;
+	u8:8;
+	u8 mmwc;
 } __attribute__ ((packed));
 
 /* params are: ccw_device, qdio_error, queue_number,
diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h
index af867731a5f4..e3ea1d5f2810 100644
--- a/drivers/s390/cio/qdio.h
+++ b/drivers/s390/cio/qdio.h
@@ -203,6 +203,9 @@ struct qdio_output_q {
 	/* PCIs are enabled for the queue */
 	int pci_out_enabled;
 
+	/* IQDIO: output multiple buffers (enhanced SIGA) */
+	int use_enh_siga;
+
 	/* timer to check for more outbound work */
 	struct timer_list timer;
 };
diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index 719066ec0c01..a50682d2a0fa 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -316,6 +316,9 @@ static inline int qdio_do_siga_output(struct qdio_q *q, unsigned int *busy_bit)
 	unsigned int fc = 0;
 	unsigned long schid;
 
+	if (q->u.out.use_enh_siga) {
+		fc = 3;
+	}
 	if (!is_qebsm(q))
 		schid = *((u32 *)&q->irq_ptr->schid);
 	else {
@@ -1449,6 +1452,8 @@ int qdio_establish(struct qdio_initialize *init_data)
 	}
 
 	qdio_setup_ssqd_info(irq_ptr);
+	sprintf(dbf_text, "qDmmwc%2x", irq_ptr->ssqd_desc.mmwc);
+	QDIO_DBF_TEXT2(0, setup, dbf_text);
 	sprintf(dbf_text, "qib ac%2x", irq_ptr->qib.ac);
 	QDIO_DBF_TEXT2(0, setup, dbf_text);
 
@@ -1621,12 +1626,21 @@ static void handle_outbound(struct qdio_q *q, unsigned int callflags,
 		if (multicast_outbound(q))
 			qdio_kick_outbound_q(q);
 		else
-			/*
-			 * One siga-w per buffer required for unicast
-			 * HiperSockets.
-			 */
-			while (count--)
+			if ((q->irq_ptr->ssqd_desc.mmwc > 1) &&
+			    (count > 1) &&
+			    (count <= q->irq_ptr->ssqd_desc.mmwc)) {
+				/* exploit enhanced SIGA */
+				q->u.out.use_enh_siga = 1;
 				qdio_kick_outbound_q(q);
+			} else {
+				/*
+				* One siga-w per buffer required for unicast
+				* HiperSockets.
+				*/
+				q->u.out.use_enh_siga = 0;
+				while (count--)
+					qdio_kick_outbound_q(q);
+			}
 		goto out;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From d86730bb9597b02bff59a3a5a01c0094d71a265f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 10 Oct 2008 21:33:19 +0200
Subject: [S390] s390: use sys_pause for 31bit pause entry point

sys32_pause is a useless copy of the generic sys_pause.
(and it's certainly not there for old sparc32 binaries..)

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/compat_linux.c   | 8 --------
 arch/s390/kernel/compat_linux.h   | 1 -
 arch/s390/kernel/compat_wrapper.S | 2 --
 arch/s390/kernel/syscalls.S       | 2 +-
 4 files changed, 1 insertion(+), 12 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index d7f22226fc4e..98e246dc0233 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -608,14 +608,6 @@ asmlinkage long sys32_settimeofday(struct compat_timeval __user *tv, struct time
 	return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL);
 }
 
-/* These are here just in case some old sparc32 binary calls it. */
-asmlinkage long sys32_pause(void)
-{
-	current->state = TASK_INTERRUPTIBLE;
-	schedule();
-	return -ERESTARTNOHAND;
-}
-
 asmlinkage long sys32_pread64(unsigned int fd, char __user *ubuf,
 				size_t count, u32 poshi, u32 poslo)
 {
diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h
index 20723a062017..05f8516366ab 100644
--- a/arch/s390/kernel/compat_linux.h
+++ b/arch/s390/kernel/compat_linux.h
@@ -206,7 +206,6 @@ long sys32_gettimeofday(struct compat_timeval __user *tv,
 			struct timezone __user *tz);
 long sys32_settimeofday(struct compat_timeval __user *tv,
 			struct timezone __user *tz);
-long sys32_pause(void);
 long sys32_pread64(unsigned int fd, char __user *ubuf, size_t count,
 		   u32 poshi, u32 poslo);
 long sys32_pwrite64(unsigned int fd, const char __user *ubuf,
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index 328a20e880b5..ee51ca9e23b5 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -128,8 +128,6 @@ sys32_alarm_wrapper:
 	llgfr	%r2,%r2			# unsigned int
 	jg	sys_alarm		# branch to system call
 
-#sys32_pause_wrapper			# void
-
 	.globl	compat_sys_utime_wrapper
 compat_sys_utime_wrapper:
 	llgtr	%r2,%r2			# char *
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index c66d35e55142..3ae303914b42 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -37,7 +37,7 @@ SYSCALL(sys_stime,sys_ni_syscall,sys32_stime_wrapper)		/* 25 old stime syscall *
 SYSCALL(sys_ptrace,sys_ptrace,sys32_ptrace_wrapper)
 SYSCALL(sys_alarm,sys_alarm,sys32_alarm_wrapper)
 NI_SYSCALL							/* old fstat syscall */
-SYSCALL(sys_pause,sys_pause,sys32_pause)
+SYSCALL(sys_pause,sys_pause,sys_pause)
 SYSCALL(sys_utime,sys_utime,compat_sys_utime_wrapper)		/* 30 */
 NI_SYSCALL							/* old stty syscall */
 NI_SYSCALL							/* old gtty syscall */
-- 
cgit v1.2.3-70-g09d2


From 753c4dd6a2fa2af81f5d809d610d29f2d9dd9bc1 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 10 Oct 2008 21:33:20 +0200
Subject: [S390] ptrace changes

* System call parameter and result access functions
* Add tracehook calls
* Split syscall_trace into two functions do_syscall_trace_enter and
  do_syscall_trace_exit

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/Kconfig                   |  1 +
 arch/s390/include/asm/ptrace.h      |  1 +
 arch/s390/include/asm/syscall.h     | 80 +++++++++++++++++++++++++++++++++++++
 arch/s390/include/asm/thread_info.h |  2 +
 arch/s390/kernel/entry.S            | 50 ++++++++++++++++++-----
 arch/s390/kernel/entry64.S          | 42 ++++++++++++++-----
 arch/s390/kernel/ptrace.c           | 61 +++++++++++++++-------------
 arch/s390/kernel/signal.c           | 13 ++++++
 8 files changed, 202 insertions(+), 48 deletions(-)
 create mode 100644 arch/s390/include/asm/syscall.h

(limited to 'arch/s390')

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 8d41908e2513..4c03049e7db9 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -74,6 +74,7 @@ config S390
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
 	select HAVE_KVM if 64BIT
+	select HAVE_ARCH_TRACEHOOK
 
 source "init/Kconfig"
 
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index af2c9ac28a07..a7226f8143fb 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -490,6 +490,7 @@ extern void user_disable_single_step(struct task_struct *);
 
 #define user_mode(regs) (((regs)->psw.mask & PSW_MASK_PSTATE) != 0)
 #define instruction_pointer(regs) ((regs)->psw.addr & PSW_ADDR_INSN)
+#define user_stack_pointer(regs)((regs)->gprs[15])
 #define regs_return_value(regs)((regs)->gprs[2])
 #define profile_pc(regs) instruction_pointer(regs)
 extern void show_regs(struct pt_regs * regs);
diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
new file mode 100644
index 000000000000..6e623971fbb9
--- /dev/null
+++ b/arch/s390/include/asm/syscall.h
@@ -0,0 +1,80 @@
+/*
+ * Access to user system call parameters and results
+ *
+ *  Copyright IBM Corp. 2008
+ *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+
+#ifndef _ASM_SYSCALL_H
+#define _ASM_SYSCALL_H	1
+
+#include <asm/ptrace.h>
+
+static inline long syscall_get_nr(struct task_struct *task,
+				  struct pt_regs *regs)
+{
+	if (regs->trap != __LC_SVC_OLD_PSW)
+		return -1;
+	return regs->gprs[2];
+}
+
+static inline void syscall_rollback(struct task_struct *task,
+				    struct pt_regs *regs)
+{
+	regs->gprs[2] = regs->orig_gpr2;
+}
+
+static inline long syscall_get_error(struct task_struct *task,
+				     struct pt_regs *regs)
+{
+	return (regs->gprs[2] >= -4096UL) ? -regs->gprs[2] : 0;
+}
+
+static inline long syscall_get_return_value(struct task_struct *task,
+					    struct pt_regs *regs)
+{
+	return regs->gprs[2];
+}
+
+static inline void syscall_set_return_value(struct task_struct *task,
+					    struct pt_regs *regs,
+					    int error, long val)
+{
+	regs->gprs[2] = error ? -error : val;
+}
+
+static inline void syscall_get_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 unsigned long *args)
+{
+	BUG_ON(i + n > 6);
+#ifdef CONFIG_COMPAT
+	if (test_tsk_thread_flag(task, TIF_31BIT)) {
+		if (i + n == 6)
+			args[--n] = (u32) regs->args[0];
+		while (n-- > 0)
+			args[n] = (u32) regs->gprs[2 + i + n];
+	}
+#endif
+	if (i + n == 6)
+		args[--n] = regs->args[0];
+	memcpy(args, &regs->gprs[2 + i], n * sizeof(args[0]));
+}
+
+static inline void syscall_set_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 const unsigned long *args)
+{
+	BUG_ON(i + n > 6);
+	if (i + n == 6)
+		regs->args[0] = args[--n];
+	memcpy(&regs->gprs[2 + i], args, n * sizeof(args[0]));
+}
+
+#endif	/* _ASM_SYSCALL_H */
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index 91a8f93ad355..ea40a9d690fc 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -86,6 +86,7 @@ static inline struct thread_info *current_thread_info(void)
  * thread information flags bit numbers
  */
 #define TIF_SYSCALL_TRACE	0	/* syscall trace active */
+#define TIF_NOTIFY_RESUME	1	/* callback before returning to user */
 #define TIF_SIGPENDING		2	/* signal pending */
 #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
 #define TIF_RESTART_SVC		4	/* restart svc with new svc number */
@@ -100,6 +101,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_RESTORE_SIGMASK	20	/* restore signal mask in do_signal() */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
+#define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
 #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 708cf9cf9a35..ed500ef799b7 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -49,9 +49,9 @@ SP_ILC	     =	STACK_FRAME_OVERHEAD + __PT_ILC
 SP_TRAP      =	STACK_FRAME_OVERHEAD + __PT_TRAP
 SP_SIZE      =	STACK_FRAME_OVERHEAD + __PT_SIZE
 
-_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
+_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
 		 _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_SINGLE_STEP )
-_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
+_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
 		 _TIF_MCCK_PENDING)
 
 STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER
@@ -318,6 +318,8 @@ sysc_work:
 	bo	BASED(sysc_reschedule)
 	tm	__TI_flags+3(%r9),_TIF_SIGPENDING
 	bnz	BASED(sysc_sigpending)
+	tm	__TI_flags+3(%r9),_TIF_NOTIFY_RESUME
+	bnz	BASED(sysc_notify_resume)
 	tm	__TI_flags+3(%r9),_TIF_RESTART_SVC
 	bo	BASED(sysc_restart)
 	tm	__TI_flags+3(%r9),_TIF_SINGLE_STEP
@@ -355,6 +357,16 @@ sysc_sigpending:
 	bo	BASED(sysc_singlestep)
 	b	BASED(sysc_work_loop)
 
+#
+# _TIF_NOTIFY_RESUME is set, call do_notify_resume
+#
+sysc_notify_resume:
+	la	%r2,SP_PTREGS(%r15)	# load pt_regs
+	l	%r1,BASED(.Ldo_notify_resume)
+	la	%r14,BASED(sysc_work_loop)
+	br	%r1			# call do_notify_resume
+
+
 #
 # _TIF_RESTART_SVC is set, set up registers and restart svc
 #
@@ -378,20 +390,21 @@ sysc_singlestep:
 	br	%r1			# branch to do_single_step
 
 #
-# call trace before and after sys_call
+# call tracehook_report_syscall_entry/tracehook_report_syscall_exit before
+# and after the system call
 #
 sysc_tracesys:
-	l	%r1,BASED(.Ltrace)
+	l	%r1,BASED(.Ltrace_entry)
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
 	la	%r3,0
 	srl	%r7,2
 	st	%r7,SP_R2(%r15)
 	basr	%r14,%r1
-	clc	SP_R2(4,%r15),BASED(.Lnr_syscalls)
+	cl	%r2,BASED(.Lnr_syscalls)
 	bnl	BASED(sysc_tracenogo)
 	l	%r8,BASED(.Lsysc_table)
-	l	%r7,SP_R2(%r15) 	# strace might have changed the
-	sll	%r7,2			#  system call
+	lr	%r7,%r2
+	sll	%r7,2			# *4
 	l	%r8,0(%r7,%r8)
 sysc_tracego:
 	lm	%r3,%r6,SP_R3(%r15)
@@ -401,9 +414,8 @@ sysc_tracego:
 sysc_tracenogo:
 	tm	__TI_flags+3(%r9),(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT)
 	bz	BASED(sysc_return)
-	l	%r1,BASED(.Ltrace)
+	l	%r1,BASED(.Ltrace_exit)
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
-	la	%r3,1
 	la	%r14,BASED(sysc_return)
 	br	%r1
 
@@ -666,6 +678,8 @@ io_work_loop:
 	bo	BASED(io_reschedule)
 	tm	__TI_flags+3(%r9),_TIF_SIGPENDING
 	bnz	BASED(io_sigpending)
+	tm	__TI_flags+3(%r9),_TIF_NOTIFY_RESUME
+	bnz	BASED(io_notify_resume)
 	b	BASED(io_restore)
 io_work_done:
 
@@ -704,6 +718,19 @@ io_sigpending:
 	TRACE_IRQS_OFF
 	b	BASED(io_work_loop)
 
+#
+# _TIF_SIGPENDING is set, call do_signal
+#
+io_notify_resume:
+	TRACE_IRQS_ON
+	stosm	__SF_EMPTY(%r15),0x03	# reenable interrupts
+	la	%r2,SP_PTREGS(%r15)	# load pt_regs
+	l	%r1,BASED(.Ldo_notify_resume)
+	basr	%r14,%r1		# call do_signal
+	stnsm	__SF_EMPTY(%r15),0xfc	# disable I/O and ext. interrupts
+	TRACE_IRQS_OFF
+	b	BASED(io_work_loop)
+
 /*
  * External interrupt handler routine
  */
@@ -1070,6 +1097,8 @@ cleanup_io_leave_insn:
 .Ldo_IRQ:	.long	do_IRQ
 .Ldo_extint:	.long	do_extint
 .Ldo_signal:	.long	do_signal
+.Ldo_notify_resume:
+		.long	do_notify_resume
 .Lhandle_per:	.long	do_single_step
 .Ldo_execve:	.long	do_execve
 .Lexecve_tail:	.long	execve_tail
@@ -1079,7 +1108,8 @@ cleanup_io_leave_insn:
 .Lpreempt_schedule_irq:
 		.long	preempt_schedule_irq
 #endif
-.Ltrace:	.long	syscall_trace
+.Ltrace_entry:	.long	do_syscall_trace_enter
+.Ltrace_exit:	.long	do_syscall_trace_exit
 .Lschedtail:	.long	schedule_tail
 .Lsysc_table:	.long	sys_call_table
 #ifdef CONFIG_TRACE_IRQFLAGS
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index fee10177dbfc..d7ce150453f2 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -52,9 +52,9 @@ SP_SIZE      =	STACK_FRAME_OVERHEAD + __PT_SIZE
 STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER
 STACK_SIZE  = 1 << STACK_SHIFT
 
-_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
+_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
 		 _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_SINGLE_STEP )
-_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
+_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
 		 _TIF_MCCK_PENDING)
 
 #define BASED(name) name-system_call(%r13)
@@ -310,6 +310,8 @@ sysc_work:
 	jo	sysc_reschedule
 	tm	__TI_flags+7(%r9),_TIF_SIGPENDING
 	jnz	sysc_sigpending
+	tm	__TI_flags+7(%r9),_TIF_NOTIFY_RESUME
+	jnz	sysc_notify_resume
 	tm	__TI_flags+7(%r9),_TIF_RESTART_SVC
 	jo	sysc_restart
 	tm	__TI_flags+7(%r9),_TIF_SINGLE_STEP
@@ -344,6 +346,14 @@ sysc_sigpending:
 	jo	sysc_singlestep
 	j	sysc_work_loop
 
+#
+# _TIF_NOTIFY_RESUME is set, call do_notify_resume
+#
+sysc_notify_resume:
+	la	%r2,SP_PTREGS(%r15)	# load pt_regs
+	larl	%r14,sysc_work_loop
+	jg	do_notify_resume	# call do_notify_resume
+
 #
 # _TIF_RESTART_SVC is set, set up registers and restart svc
 #
@@ -367,20 +377,19 @@ sysc_singlestep:
 	jg	do_single_step		# branch to do_sigtrap
 
 #
-# call syscall_trace before and after system call
-# special linkage: %r12 contains the return address for trace_svc
+# call tracehook_report_syscall_entry/tracehook_report_syscall_exit before
+# and after the system call
 #
 sysc_tracesys:
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
 	la	%r3,0
 	srl	%r7,2
 	stg	%r7,SP_R2(%r15)
-	brasl	%r14,syscall_trace
+	brasl	%r14,do_syscall_trace_enter
 	lghi	%r0,NR_syscalls
-	clg	%r0,SP_R2(%r15)
+	clgr	%r0,%r2
 	jnh	sysc_tracenogo
-	lg	%r7,SP_R2(%r15)		# strace might have changed the
-	sll	%r7,2			# system call
+	slag	%r7,%r2,2		# *4
 	lgf	%r8,0(%r7,%r10)
 sysc_tracego:
 	lmg	%r3,%r6,SP_R3(%r15)
@@ -391,9 +400,8 @@ sysc_tracenogo:
 	tm	__TI_flags+7(%r9),(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT)
 	jz	sysc_return
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
-	la	%r3,1
 	larl	%r14,sysc_return	# return point is sysc_return
-	jg	syscall_trace
+	jg	do_syscall_trace_exit
 
 #
 # a new process exits the kernel with ret_from_fork
@@ -672,6 +680,8 @@ io_work_loop:
 	jo	io_reschedule
 	tm	__TI_flags+7(%r9),_TIF_SIGPENDING
 	jnz	io_sigpending
+	tm	__TI_flags+7(%r9),_TIF_NOTIFY_RESUME
+	jnz	io_notify_resume
 	j	io_restore
 io_work_done:
 
@@ -712,6 +722,18 @@ io_sigpending:
 	TRACE_IRQS_OFF
 	j	io_work_loop
 
+#
+# _TIF_NOTIFY_RESUME or is set, call do_notify_resume
+#
+io_notify_resume:
+	TRACE_IRQS_ON
+	stosm	__SF_EMPTY(%r15),0x03	# reenable interrupts
+	la	%r2,SP_PTREGS(%r15)	# load pt_regs
+	brasl	%r14,do_notify_resume	# call do_notify_resume
+	stnsm	__SF_EMPTY(%r15),0xfc	# disable I/O and ext. interrupts
+	TRACE_IRQS_OFF
+	j	io_work_loop
+
 /*
  * External interrupt handler routine
  */
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index c8b08289eb87..1f31be1ecc4b 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -35,6 +35,7 @@
 #include <linux/signal.h>
 #include <linux/elf.h>
 #include <linux/regset.h>
+#include <linux/tracehook.h>
 
 #include <asm/segment.h>
 #include <asm/page.h>
@@ -639,40 +640,44 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 }
 #endif
 
-asmlinkage void
-syscall_trace(struct pt_regs *regs, int entryexit)
+asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 {
-	if (unlikely(current->audit_context) && entryexit)
-		audit_syscall_exit(AUDITSC_RESULT(regs->gprs[2]), regs->gprs[2]);
-
-	if (!test_thread_flag(TIF_SYSCALL_TRACE))
-		goto out;
-	if (!(current->ptrace & PT_PTRACED))
-		goto out;
-	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
-				 ? 0x80 : 0));
+	long ret;
 
 	/*
-	 * If the debuffer has set an invalid system call number,
-	 * we prepare to skip the system call restart handling.
+	 * The sysc_tracesys code in entry.S stored the system
+	 * call number to gprs[2].
 	 */
-	if (!entryexit && regs->gprs[2] >= NR_syscalls)
+	ret = regs->gprs[2];
+	if (test_thread_flag(TIF_SYSCALL_TRACE) &&
+	    (tracehook_report_syscall_entry(regs) ||
+	     regs->gprs[2] >= NR_syscalls)) {
+		/*
+		 * Tracing decided this syscall should not happen or the
+		 * debugger stored an invalid system call number. Skip
+		 * the system call and the system call restart handling.
+		 */
 		regs->trap = -1;
-
-	/*
-	 * this isn't the same as continuing with a signal, but it will do
-	 * for normal use.  strace only continues with a signal if the
-	 * stopping signal is not SIGTRAP.  -brl
-	 */
-	if (current->exit_code) {
-		send_sig(current->exit_code, current, 1);
-		current->exit_code = 0;
+		ret = -1;
 	}
- out:
-	if (unlikely(current->audit_context) && !entryexit)
-		audit_syscall_entry(test_thread_flag(TIF_31BIT)?AUDIT_ARCH_S390:AUDIT_ARCH_S390X,
-				    regs->gprs[2], regs->orig_gpr2, regs->gprs[3],
-				    regs->gprs[4], regs->gprs[5]);
+
+	if (unlikely(current->audit_context))
+		audit_syscall_entry(test_thread_flag(TIF_31BIT) ?
+					AUDIT_ARCH_S390 : AUDIT_ARCH_S390X,
+				    regs->gprs[2], regs->orig_gpr2,
+				    regs->gprs[3], regs->gprs[4],
+				    regs->gprs[5]);
+	return ret;
+}
+
+asmlinkage void do_syscall_trace_exit(struct pt_regs *regs)
+{
+	if (unlikely(current->audit_context))
+		audit_syscall_exit(AUDITSC_RESULT(regs->gprs[2]),
+				   regs->gprs[2]);
+
+	if (test_thread_flag(TIF_SYSCALL_TRACE))
+		tracehook_report_syscall_exit(regs, 0);
 }
 
 /*
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index b97682040215..4f7fc3059a8e 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -24,6 +24,7 @@
 #include <linux/tty.h>
 #include <linux/personality.h>
 #include <linux/binfmts.h>
+#include <linux/tracehook.h>
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
 #include <asm/lowcore.h>
@@ -507,6 +508,12 @@ void do_signal(struct pt_regs *regs)
 			 */
 			if (current->thread.per_info.single_step)
 				set_thread_flag(TIF_SINGLE_STEP);
+
+			/*
+			 * Let tracing know that we've done the handler setup.
+			 */
+			tracehook_signal_handler(signr, &info, &ka, regs,
+					 test_thread_flag(TIF_SINGLE_STEP));
 		}
 		return;
 	}
@@ -526,3 +533,9 @@ void do_signal(struct pt_regs *regs)
 		set_thread_flag(TIF_RESTART_SVC);
 	}
 }
+
+void do_notify_resume(struct pt_regs *regs)
+{
+	clear_thread_flag(TIF_NOTIFY_RESUME);
+	tracehook_notify_resume(regs);
+}
-- 
cgit v1.2.3-70-g09d2


From b2300b9efe1b8174833e17f37e975c9da00c388a Mon Sep 17 00:00:00 2001
From: Hongjie Yang <hongjie@us.ibm.com>
Date: Fri, 10 Oct 2008 21:33:21 +0200
Subject: [S390] dcssblk: add >2G DCSSs support and stacked contiguous DCSSs
 support.

The DCSS block device driver is modified to add >2G DCSSs support and
allow a DCSS block device to map to a set of contiguous DCSSs.  The
extmem code is also modified to use new Diagnose x'64' subcodes for
>2G DCSSs.

Signed-off-by: Hongjie Yang <hongjie@us.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/mm/extmem.c        | 251 +++++++++++++++++----
 drivers/s390/block/dcssblk.c | 515 ++++++++++++++++++++++++++++++++-----------
 2 files changed, 596 insertions(+), 170 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index f231f5ec74b6..580fc64cc735 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -43,20 +43,40 @@
 #define DCSS_FINDSEG    0x0c
 #define DCSS_LOADNOLY   0x10
 #define DCSS_SEGEXT     0x18
+#define DCSS_LOADSHRX	0x20
+#define DCSS_LOADNSRX	0x24
+#define DCSS_FINDSEGX	0x2c
+#define DCSS_SEGEXTX	0x38
 #define DCSS_FINDSEGA   0x0c
 
 struct qrange {
-	unsigned int  start; // 3byte start address, 1 byte type
-	unsigned int  end;   // 3byte end address, 1 byte reserved
+	unsigned long  start; /* last byte type */
+	unsigned long  end;   /* last byte reserved */
 };
 
 struct qout64 {
+	unsigned long segstart;
+	unsigned long segend;
+	int segcnt;
+	int segrcnt;
+	struct qrange range[6];
+};
+
+#ifdef CONFIG_64BIT
+struct qrange_old {
+	unsigned int start; /* last byte type */
+	unsigned int end;   /* last byte reserved */
+};
+
+/* output area format for the Diag x'64' old subcode x'18' */
+struct qout64_old {
 	int segstart;
 	int segend;
 	int segcnt;
 	int segrcnt;
-	struct qrange range[6];
+	struct qrange_old range[6];
 };
+#endif
 
 struct qin64 {
 	char qopcode;
@@ -86,6 +106,55 @@ static DEFINE_MUTEX(dcss_lock);
 static LIST_HEAD(dcss_list);
 static char *segtype_string[] = { "SW", "EW", "SR", "ER", "SN", "EN", "SC",
 					"EW/EN-MIXED" };
+static int loadshr_scode, loadnsr_scode, findseg_scode;
+static int segext_scode, purgeseg_scode;
+static int scode_set;
+
+/* set correct Diag x'64' subcodes. */
+static int
+dcss_set_subcodes(void)
+{
+#ifdef CONFIG_64BIT
+	char *name = kmalloc(8 * sizeof(char), GFP_DMA);
+	unsigned long rx, ry;
+	int rc;
+
+	if (name == NULL)
+		return -ENOMEM;
+
+	rx = (unsigned long) name;
+	ry = DCSS_FINDSEGX;
+
+	strcpy(name, "dummy");
+	asm volatile(
+		"	diag	%0,%1,0x64\n"
+		"0:	ipm	%2\n"
+		"	srl	%2,28\n"
+		"	j	2f\n"
+		"1:	la	%2,3\n"
+		"2:\n"
+		EX_TABLE(0b, 1b)
+		: "+d" (rx), "+d" (ry), "=d" (rc) : : "cc");
+
+	kfree(name);
+	/* Diag x'64' new subcodes are supported, set to new subcodes */
+	if (rc != 3) {
+		loadshr_scode = DCSS_LOADSHRX;
+		loadnsr_scode = DCSS_LOADNSRX;
+		purgeseg_scode = DCSS_PURGESEG;
+		findseg_scode = DCSS_FINDSEGX;
+		segext_scode = DCSS_SEGEXTX;
+		return 0;
+	}
+#endif
+	/* Diag x'64' new subcodes are not supported, set to old subcodes */
+	loadshr_scode = DCSS_LOADNOLY;
+	loadnsr_scode = DCSS_LOADNSR;
+	purgeseg_scode = DCSS_PURGESEG;
+	findseg_scode = DCSS_FINDSEG;
+	segext_scode = DCSS_SEGEXT;
+	return 0;
+}
 
 /*
  * Create the 8 bytes, ebcdic VM segment name from
@@ -135,25 +204,45 @@ segment_by_name (char *name)
  * Perform a function on a dcss segment.
  */
 static inline int
-dcss_diag (__u8 func, void *parameter,
+dcss_diag(int *func, void *parameter,
            unsigned long *ret1, unsigned long *ret2)
 {
 	unsigned long rx, ry;
 	int rc;
 
+	if (scode_set == 0) {
+		rc = dcss_set_subcodes();
+		if (rc < 0)
+			return rc;
+		scode_set = 1;
+	}
 	rx = (unsigned long) parameter;
-	ry = (unsigned long) func;
-	asm volatile(
+	ry = (unsigned long) *func;
+
 #ifdef CONFIG_64BIT
-		"	sam31\n"
-		"	diag	%0,%1,0x64\n"
-		"	sam64\n"
+	/* 64-bit Diag x'64' new subcode, keep in 64-bit addressing mode */
+	if (*func > DCSS_SEGEXT)
+		asm volatile(
+			"	diag	%0,%1,0x64\n"
+			"	ipm	%2\n"
+			"	srl	%2,28\n"
+			: "+d" (rx), "+d" (ry), "=d" (rc) : : "cc");
+	/* 31-bit Diag x'64' old subcode, switch to 31-bit addressing mode */
+	else
+		asm volatile(
+			"	sam31\n"
+			"	diag	%0,%1,0x64\n"
+			"	sam64\n"
+			"	ipm	%2\n"
+			"	srl	%2,28\n"
+			: "+d" (rx), "+d" (ry), "=d" (rc) : : "cc");
 #else
+	asm volatile(
 		"	diag	%0,%1,0x64\n"
-#endif
 		"	ipm	%2\n"
 		"	srl	%2,28\n"
 		: "+d" (rx), "+d" (ry), "=d" (rc) : : "cc");
+#endif
 	*ret1 = rx;
 	*ret2 = ry;
 	return rc;
@@ -190,14 +279,45 @@ query_segment_type (struct dcss_segment *seg)
 	qin->qoutlen = sizeof(struct qout64);
 	memcpy (qin->qname, seg->dcss_name, 8);
 
-	diag_cc = dcss_diag (DCSS_SEGEXT, qin, &dummy, &vmrc);
+	diag_cc = dcss_diag(&segext_scode, qin, &dummy, &vmrc);
 
+	if (diag_cc < 0) {
+		rc = diag_cc;
+		goto out_free;
+	}
 	if (diag_cc > 1) {
 		PRINT_WARN ("segment_type: diag returned error %ld\n", vmrc);
 		rc = dcss_diag_translate_rc (vmrc);
 		goto out_free;
 	}
 
+#ifdef CONFIG_64BIT
+	/* Only old format of output area of Diagnose x'64' is supported,
+	   copy data for the new format. */
+	if (segext_scode == DCSS_SEGEXT) {
+		struct qout64_old *qout_old;
+		qout_old = kzalloc(sizeof(struct qout64_old), GFP_DMA);
+		if (qout_old == NULL) {
+			rc = -ENOMEM;
+			goto out_free;
+		}
+		memcpy(qout_old, qout, sizeof(struct qout64_old));
+		qout->segstart = (unsigned long) qout_old->segstart;
+		qout->segend = (unsigned long) qout_old->segend;
+		qout->segcnt = qout_old->segcnt;
+		qout->segrcnt = qout_old->segrcnt;
+
+		if (qout->segcnt > 6)
+			qout->segrcnt = 6;
+		for (i = 0; i < qout->segrcnt; i++) {
+			qout->range[i].start =
+				(unsigned long) qout_old->range[i].start;
+			qout->range[i].end =
+				(unsigned long) qout_old->range[i].end;
+		}
+		kfree(qout_old);
+	}
+#endif
 	if (qout->segcnt > 6) {
 		rc = -ENOTSUPP;
 		goto out_free;
@@ -268,6 +388,30 @@ segment_type (char* name)
 	return seg.vm_segtype;
 }
 
+/*
+ * check if segment collides with other segments that are currently loaded
+ * returns 1 if this is the case, 0 if no collision was found
+ */
+static int
+segment_overlaps_others (struct dcss_segment *seg)
+{
+	struct list_head *l;
+	struct dcss_segment *tmp;
+
+	BUG_ON(!mutex_is_locked(&dcss_lock));
+	list_for_each(l, &dcss_list) {
+		tmp = list_entry(l, struct dcss_segment, list);
+		if ((tmp->start_addr >> 20) > (seg->end >> 20))
+			continue;
+		if ((tmp->end >> 20) < (seg->start_addr >> 20))
+			continue;
+		if (seg == tmp)
+			continue;
+		return 1;
+	}
+	return 0;
+}
+
 /*
  * real segment loading function, called from segment_load
  */
@@ -276,7 +420,8 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
 {
 	struct dcss_segment *seg = kmalloc(sizeof(struct dcss_segment),
 			GFP_DMA);
-	int dcss_command, rc, diag_cc;
+	int rc, diag_cc;
+	unsigned long start_addr, end_addr, dummy;
 
 	if (seg == NULL) {
 		rc = -ENOMEM;
@@ -287,6 +432,13 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
 	if (rc < 0)
 		goto out_free;
 
+	if (loadshr_scode == DCSS_LOADSHRX) {
+		if (segment_overlaps_others(seg)) {
+			rc = -EBUSY;
+			goto out_free;
+		}
+	}
+
 	rc = vmem_add_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
 
 	if (rc)
@@ -316,20 +468,28 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
 	}
 
 	if (do_nonshared)
-		dcss_command = DCSS_LOADNSR;
+		diag_cc = dcss_diag(&loadnsr_scode, seg->dcss_name,
+				&start_addr, &end_addr);
 	else
-		dcss_command = DCSS_LOADNOLY;
-
-	diag_cc = dcss_diag(dcss_command, seg->dcss_name,
-			&seg->start_addr, &seg->end);
+		diag_cc = dcss_diag(&loadshr_scode, seg->dcss_name,
+				&start_addr, &end_addr);
+	if (diag_cc < 0) {
+		dcss_diag(&purgeseg_scode, seg->dcss_name,
+				&dummy, &dummy);
+		rc = diag_cc;
+		goto out_resource;
+	}
 	if (diag_cc > 1) {
 		PRINT_WARN ("segment_load: could not load segment %s - "
-				"diag returned error (%ld)\n",name,seg->end);
-		rc = dcss_diag_translate_rc (seg->end);
-		dcss_diag(DCSS_PURGESEG, seg->dcss_name,
-				&seg->start_addr, &seg->end);
+				"diag returned error (%ld)\n",
+				name, end_addr);
+		rc = dcss_diag_translate_rc(end_addr);
+		dcss_diag(&purgeseg_scode, seg->dcss_name,
+				&dummy, &dummy);
 		goto out_resource;
 	}
+	seg->start_addr = start_addr;
+	seg->end = end_addr;
 	seg->do_nonshared = do_nonshared;
 	atomic_set(&seg->ref_count, 1);
 	list_add(&seg->list, &dcss_list);
@@ -423,8 +583,8 @@ int
 segment_modify_shared (char *name, int do_nonshared)
 {
 	struct dcss_segment *seg;
-	unsigned long dummy;
-	int dcss_command, rc, diag_cc;
+	unsigned long start_addr, end_addr, dummy;
+	int rc, diag_cc;
 
 	mutex_lock(&dcss_lock);
 	seg = segment_by_name (name);
@@ -445,38 +605,51 @@ segment_modify_shared (char *name, int do_nonshared)
 		goto out_unlock;
 	}
 	release_resource(seg->res);
-	if (do_nonshared) {
-		dcss_command = DCSS_LOADNSR;
+	if (do_nonshared)
 		seg->res->flags &= ~IORESOURCE_READONLY;
-	} else {
-		dcss_command = DCSS_LOADNOLY;
+	else
 		if (seg->vm_segtype == SEG_TYPE_SR ||
 		    seg->vm_segtype == SEG_TYPE_ER)
 			seg->res->flags |= IORESOURCE_READONLY;
-	}
+
 	if (request_resource(&iomem_resource, seg->res)) {
 		PRINT_WARN("segment_modify_shared: could not reload segment %s"
 			   " - overlapping resources\n", name);
 		rc = -EBUSY;
 		kfree(seg->res);
-		goto out_del;
+		goto out_del_mem;
+	}
+
+	dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy);
+	if (do_nonshared)
+		diag_cc = dcss_diag(&loadnsr_scode, seg->dcss_name,
+				&start_addr, &end_addr);
+	else
+		diag_cc = dcss_diag(&loadshr_scode, seg->dcss_name,
+				&start_addr, &end_addr);
+	if (diag_cc < 0) {
+		rc = diag_cc;
+		goto out_del_res;
 	}
-	dcss_diag(DCSS_PURGESEG, seg->dcss_name, &dummy, &dummy);
-	diag_cc = dcss_diag(dcss_command, seg->dcss_name,
-			&seg->start_addr, &seg->end);
 	if (diag_cc > 1) {
 		PRINT_WARN ("segment_modify_shared: could not reload segment %s"
-				" - diag returned error (%ld)\n",name,seg->end);
-		rc = dcss_diag_translate_rc (seg->end);
-		goto out_del;
+				" - diag returned error (%ld)\n",
+				name, end_addr);
+		rc = dcss_diag_translate_rc(end_addr);
+		goto out_del_res;
 	}
+	seg->start_addr = start_addr;
+	seg->end = end_addr;
 	seg->do_nonshared = do_nonshared;
 	rc = 0;
 	goto out_unlock;
- out_del:
+ out_del_res:
+	release_resource(seg->res);
+	kfree(seg->res);
+ out_del_mem:
 	vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
 	list_del(&seg->list);
-	dcss_diag(DCSS_PURGESEG, seg->dcss_name, &dummy, &dummy);
+	dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy);
 	kfree(seg);
  out_unlock:
 	mutex_unlock(&dcss_lock);
@@ -510,7 +683,7 @@ segment_unload(char *name)
 	kfree(seg->res);
 	vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
 	list_del(&seg->list);
-	dcss_diag(DCSS_PURGESEG, seg->dcss_name, &dummy, &dummy);
+	dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy);
 	kfree(seg);
 out_unlock:
 	mutex_unlock(&dcss_lock);
@@ -545,7 +718,7 @@ segment_save(char *name)
 	endpfn = (seg->end) >> PAGE_SHIFT;
 	sprintf(cmd1, "DEFSEG %s", name);
 	for (i=0; i<seg->segcnt; i++) {
-		sprintf(cmd1+strlen(cmd1), " %X-%X %s",
+		sprintf(cmd1+strlen(cmd1), " %lX-%lX %s",
 			seg->range[i].start >> PAGE_SHIFT,
 			seg->range[i].end >> PAGE_SHIFT,
 			segtype_string[seg->range[i].start & 0xff]);
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index ea4272c8c677..a7ff167d5b81 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -31,7 +31,6 @@
 #define PRINT_WARN(x...)  printk(KERN_WARNING DCSSBLK_NAME " warning: " x)
 #define PRINT_ERR(x...)	  printk(KERN_ERR DCSSBLK_NAME " error: " x)
 
-
 static int dcssblk_open(struct inode *inode, struct file *filp);
 static int dcssblk_release(struct inode *inode, struct file *filp);
 static int dcssblk_make_request(struct request_queue *q, struct bio *bio);
@@ -48,6 +47,30 @@ static struct block_device_operations dcssblk_devops = {
 	.direct_access 	= dcssblk_direct_access,
 };
 
+struct dcssblk_dev_info {
+	struct list_head lh;
+	struct device dev;
+	char segment_name[BUS_ID_SIZE];
+	atomic_t use_count;
+	struct gendisk *gd;
+	unsigned long start;
+	unsigned long end;
+	int segment_type;
+	unsigned char save_pending;
+	unsigned char is_shared;
+	struct request_queue *dcssblk_queue;
+	int num_of_segments;
+	struct list_head seg_list;
+};
+
+struct segment_info {
+	struct list_head lh;
+	char segment_name[BUS_ID_SIZE];
+	unsigned long start;
+	unsigned long end;
+	int segment_type;
+};
+
 static ssize_t dcssblk_add_store(struct device * dev, struct device_attribute *attr, const char * buf,
 				  size_t count);
 static ssize_t dcssblk_remove_store(struct device * dev, struct device_attribute *attr, const char * buf,
@@ -58,30 +81,20 @@ static ssize_t dcssblk_save_show(struct device *dev, struct device_attribute *at
 static ssize_t dcssblk_shared_store(struct device * dev, struct device_attribute *attr, const char * buf,
 				  size_t count);
 static ssize_t dcssblk_shared_show(struct device *dev, struct device_attribute *attr, char *buf);
+static ssize_t dcssblk_seglist_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf);
 
 static DEVICE_ATTR(add, S_IWUSR, NULL, dcssblk_add_store);
 static DEVICE_ATTR(remove, S_IWUSR, NULL, dcssblk_remove_store);
-static DEVICE_ATTR(save, S_IWUSR | S_IRUGO, dcssblk_save_show,
+static DEVICE_ATTR(save, S_IWUSR | S_IRUSR, dcssblk_save_show,
 		   dcssblk_save_store);
-static DEVICE_ATTR(shared, S_IWUSR | S_IRUGO, dcssblk_shared_show,
+static DEVICE_ATTR(shared, S_IWUSR | S_IRUSR, dcssblk_shared_show,
 		   dcssblk_shared_store);
+static DEVICE_ATTR(seglist, S_IRUSR, dcssblk_seglist_show, NULL);
 
 static struct device *dcssblk_root_dev;
 
-struct dcssblk_dev_info {
-	struct list_head lh;
-	struct device dev;
-	char segment_name[BUS_ID_SIZE];
-	atomic_t use_count;
-	struct gendisk *gd;
-	unsigned long start;
-	unsigned long end;
-	int segment_type;
-	unsigned char save_pending;
-	unsigned char is_shared;
-	struct request_queue *dcssblk_queue;
-};
-
 static LIST_HEAD(dcssblk_devices);
 static struct rw_semaphore dcssblk_devices_sem;
 
@@ -91,8 +104,15 @@ static struct rw_semaphore dcssblk_devices_sem;
 static void
 dcssblk_release_segment(struct device *dev)
 {
-	PRINT_DEBUG("segment release fn called for %s\n", dev_name(dev));
-	kfree(container_of(dev, struct dcssblk_dev_info, dev));
+	struct dcssblk_dev_info *dev_info;
+	struct segment_info *entry, *temp;
+
+	dev_info = container_of(dev, struct dcssblk_dev_info, dev);
+	list_for_each_entry_safe(entry, temp, &dev_info->seg_list, lh) {
+		list_del(&entry->lh);
+		kfree(entry);
+	}
+	kfree(dev_info);
 	module_put(THIS_MODULE);
 }
 
@@ -142,6 +162,169 @@ dcssblk_get_device_by_name(char *name)
 	return NULL;
 }
 
+/*
+ * get the struct segment_info from seg_list
+ * for the given name.
+ * down_read(&dcssblk_devices_sem) must be held.
+ */
+static struct segment_info *
+dcssblk_get_segment_by_name(char *name)
+{
+	struct dcssblk_dev_info *dev_info;
+	struct segment_info *entry;
+
+	list_for_each_entry(dev_info, &dcssblk_devices, lh) {
+		list_for_each_entry(entry, &dev_info->seg_list, lh) {
+			if (!strcmp(name, entry->segment_name))
+				return entry;
+		}
+	}
+	return NULL;
+}
+
+/*
+ * get the highest address of the multi-segment block.
+ */
+static unsigned long
+dcssblk_find_highest_addr(struct dcssblk_dev_info *dev_info)
+{
+	unsigned long highest_addr;
+	struct segment_info *entry;
+
+	highest_addr = 0;
+	list_for_each_entry(entry, &dev_info->seg_list, lh) {
+		if (highest_addr < entry->end)
+			highest_addr = entry->end;
+	}
+	return highest_addr;
+}
+
+/*
+ * get the lowest address of the multi-segment block.
+ */
+static unsigned long
+dcssblk_find_lowest_addr(struct dcssblk_dev_info *dev_info)
+{
+	int set_first;
+	unsigned long lowest_addr;
+	struct segment_info *entry;
+
+	set_first = 0;
+	lowest_addr = 0;
+	list_for_each_entry(entry, &dev_info->seg_list, lh) {
+		if (set_first == 0) {
+			lowest_addr = entry->start;
+			set_first = 1;
+		} else {
+			if (lowest_addr > entry->start)
+				lowest_addr = entry->start;
+		}
+	}
+	return lowest_addr;
+}
+
+/*
+ * Check continuity of segments.
+ */
+static int
+dcssblk_is_continuous(struct dcssblk_dev_info *dev_info)
+{
+	int i, j, rc;
+	struct segment_info *sort_list, *entry, temp;
+
+	if (dev_info->num_of_segments <= 1)
+		return 0;
+
+	sort_list = kzalloc(
+			sizeof(struct segment_info) * dev_info->num_of_segments,
+			GFP_KERNEL);
+	if (sort_list == NULL)
+		return -ENOMEM;
+	i = 0;
+	list_for_each_entry(entry, &dev_info->seg_list, lh) {
+		memcpy(&sort_list[i], entry, sizeof(struct segment_info));
+		i++;
+	}
+
+	/* sort segments */
+	for (i = 0; i < dev_info->num_of_segments; i++)
+		for (j = 0; j < dev_info->num_of_segments; j++)
+			if (sort_list[j].start > sort_list[i].start) {
+				memcpy(&temp, &sort_list[i],
+					sizeof(struct segment_info));
+				memcpy(&sort_list[i], &sort_list[j],
+					sizeof(struct segment_info));
+				memcpy(&sort_list[j], &temp,
+					sizeof(struct segment_info));
+			}
+
+	/* check continuity */
+	for (i = 0; i < dev_info->num_of_segments - 1; i++) {
+		if ((sort_list[i].end + 1) != sort_list[i+1].start) {
+			PRINT_ERR("Segment %s is not contiguous with "
+				"segment %s\n",
+				sort_list[i].segment_name,
+				sort_list[i+1].segment_name);
+			rc = -EINVAL;
+			goto out;
+		}
+		/* EN and EW are allowed in a block device */
+		if (sort_list[i].segment_type != sort_list[i+1].segment_type) {
+			if (!(sort_list[i].segment_type & SEGMENT_EXCLUSIVE) ||
+				(sort_list[i].segment_type == SEG_TYPE_ER) ||
+				!(sort_list[i+1].segment_type &
+				SEGMENT_EXCLUSIVE) ||
+				(sort_list[i+1].segment_type == SEG_TYPE_ER)) {
+				PRINT_ERR("Segment %s has different type from "
+					"segment %s\n",
+					sort_list[i].segment_name,
+					sort_list[i+1].segment_name);
+				rc = -EINVAL;
+				goto out;
+			}
+		}
+	}
+	rc = 0;
+out:
+	kfree(sort_list);
+	return rc;
+}
+
+/*
+ * Load a segment
+ */
+static int
+dcssblk_load_segment(char *name, struct segment_info **seg_info)
+{
+	int rc;
+
+	/* already loaded? */
+	down_read(&dcssblk_devices_sem);
+	*seg_info = dcssblk_get_segment_by_name(name);
+	up_read(&dcssblk_devices_sem);
+	if (*seg_info != NULL)
+		return -EEXIST;
+
+	/* get a struct segment_info */
+	*seg_info = kzalloc(sizeof(struct segment_info), GFP_KERNEL);
+	if (*seg_info == NULL)
+		return -ENOMEM;
+
+	strcpy((*seg_info)->segment_name, name);
+
+	/* load the segment */
+	rc = segment_load(name, SEGMENT_SHARED,
+			&(*seg_info)->start, &(*seg_info)->end);
+	if (rc < 0) {
+		segment_warning(rc, (*seg_info)->segment_name);
+		kfree(*seg_info);
+	} else {
+		INIT_LIST_HEAD(&(*seg_info)->lh);
+		(*seg_info)->segment_type = rc;
+	}
+	return rc;
+}
+
 static void dcssblk_unregister_callback(struct device *dev)
 {
 	device_unregister(dev);
@@ -165,6 +348,7 @@ static ssize_t
 dcssblk_shared_store(struct device *dev, struct device_attribute *attr, const char *inbuf, size_t count)
 {
 	struct dcssblk_dev_info *dev_info;
+	struct segment_info *entry, *temp;
 	int rc;
 
 	if ((count > 1) && (inbuf[1] != '\n') && (inbuf[1] != '\0'))
@@ -172,46 +356,46 @@ dcssblk_shared_store(struct device *dev, struct device_attribute *attr, const ch
 	down_write(&dcssblk_devices_sem);
 	dev_info = container_of(dev, struct dcssblk_dev_info, dev);
 	if (atomic_read(&dev_info->use_count)) {
-		PRINT_ERR("share: segment %s is busy!\n",
-			  dev_info->segment_name);
 		rc = -EBUSY;
 		goto out;
 	}
 	if (inbuf[0] == '1') {
-		// reload segment in shared mode
-		rc = segment_modify_shared(dev_info->segment_name,
-					   SEGMENT_SHARED);
-		if (rc < 0) {
-			BUG_ON(rc == -EINVAL);
-			if (rc != -EAGAIN)
-				goto removeseg;
-		} else {
-			dev_info->is_shared = 1;
-			switch (dev_info->segment_type) {
-				case SEG_TYPE_SR:
-				case SEG_TYPE_ER:
-				case SEG_TYPE_SC:
-					set_disk_ro(dev_info->gd,1);
+		/* reload segments in shared mode */
+		list_for_each_entry(entry, &dev_info->seg_list, lh) {
+			rc = segment_modify_shared(entry->segment_name,
+						SEGMENT_SHARED);
+			if (rc < 0) {
+				BUG_ON(rc == -EINVAL);
+				if (rc != -EAGAIN)
+					goto removeseg;
 			}
 		}
+		dev_info->is_shared = 1;
+		switch (dev_info->segment_type) {
+		case SEG_TYPE_SR:
+		case SEG_TYPE_ER:
+		case SEG_TYPE_SC:
+			set_disk_ro(dev_info->gd, 1);
+		}
 	} else if (inbuf[0] == '0') {
-		// reload segment in exclusive mode
+		/* reload segments in exclusive mode */
 		if (dev_info->segment_type == SEG_TYPE_SC) {
 			PRINT_ERR("Segment type SC (%s) cannot be loaded in "
-				  "non-shared mode\n", dev_info->segment_name);
+				"non-shared mode\n", dev_info->segment_name);
 			rc = -EINVAL;
 			goto out;
 		}
-		rc = segment_modify_shared(dev_info->segment_name,
-					   SEGMENT_EXCLUSIVE);
-		if (rc < 0) {
-			BUG_ON(rc == -EINVAL);
-			if (rc != -EAGAIN)
-				goto removeseg;
-		} else {
-			dev_info->is_shared = 0;
-			set_disk_ro(dev_info->gd, 0);
+		list_for_each_entry(entry, &dev_info->seg_list, lh) {
+			rc = segment_modify_shared(entry->segment_name,
+						   SEGMENT_EXCLUSIVE);
+			if (rc < 0) {
+				BUG_ON(rc == -EINVAL);
+				if (rc != -EAGAIN)
+					goto removeseg;
+			}
 		}
+		dev_info->is_shared = 0;
+		set_disk_ro(dev_info->gd, 0);
 	} else {
 		rc = -EINVAL;
 		goto out;
@@ -220,8 +404,14 @@ dcssblk_shared_store(struct device *dev, struct device_attribute *attr, const ch
 	goto out;
 
 removeseg:
-	PRINT_ERR("Could not reload segment %s, removing it now!\n",
-			dev_info->segment_name);
+	PRINT_ERR("Could not reload segment(s) of the device %s, removing "
+		"segment(s) now!\n",
+		dev_info->segment_name);
+	temp = entry;
+	list_for_each_entry(entry, &dev_info->seg_list, lh) {
+		if (entry != temp)
+			segment_unload(entry->segment_name);
+	}
 	list_del(&dev_info->lh);
 
 	del_gendisk(dev_info->gd);
@@ -254,6 +444,7 @@ static ssize_t
 dcssblk_save_store(struct device *dev, struct device_attribute *attr, const char *inbuf, size_t count)
 {
 	struct dcssblk_dev_info *dev_info;
+	struct segment_info *entry;
 
 	if ((count > 1) && (inbuf[1] != '\n') && (inbuf[1] != '\0'))
 		return -EINVAL;
@@ -263,14 +454,16 @@ dcssblk_save_store(struct device *dev, struct device_attribute *attr, const char
 	if (inbuf[0] == '1') {
 		if (atomic_read(&dev_info->use_count) == 0) {
 			// device is idle => we save immediately
-			PRINT_INFO("Saving segment %s\n",
+			PRINT_INFO("Saving segment(s) of the device %s\n",
 				   dev_info->segment_name);
-			segment_save(dev_info->segment_name);
+			list_for_each_entry(entry, &dev_info->seg_list, lh) {
+				segment_save(entry->segment_name);
+			}
 		}  else {
 			// device is busy => we save it when it becomes
 			// idle in dcssblk_release
-			PRINT_INFO("Segment %s is currently busy, it will "
-				   "be saved when it becomes idle...\n",
+			PRINT_INFO("Device %s is currently busy, segment(s) "
+				   "will be saved when it becomes idle...\n",
 				   dev_info->segment_name);
 			dev_info->save_pending = 1;
 		}
@@ -279,7 +472,8 @@ dcssblk_save_store(struct device *dev, struct device_attribute *attr, const char
 			// device is busy & the user wants to undo his save
 			// request
 			dev_info->save_pending = 0;
-			PRINT_INFO("Pending save for segment %s deactivated\n",
+			PRINT_INFO("Pending save for segment(s) of the device "
+					"%s deactivated\n",
 					dev_info->segment_name);
 		}
 	} else {
@@ -290,67 +484,124 @@ dcssblk_save_store(struct device *dev, struct device_attribute *attr, const char
 	return count;
 }
 
+/*
+ * device attribute for showing all segments in a device
+ */
+static ssize_t
+dcssblk_seglist_show(struct device *dev, struct device_attribute *attr,
+		char *buf)
+{
+	int i;
+
+	struct dcssblk_dev_info *dev_info;
+	struct segment_info *entry;
+
+	down_read(&dcssblk_devices_sem);
+	dev_info = container_of(dev, struct dcssblk_dev_info, dev);
+	i = 0;
+	buf[0] = '\0';
+	list_for_each_entry(entry, &dev_info->seg_list, lh) {
+		strcpy(&buf[i], entry->segment_name);
+		i += strlen(entry->segment_name);
+		buf[i] = '\n';
+		i++;
+	}
+	up_read(&dcssblk_devices_sem);
+	return i;
+}
+
 /*
  * device attribute for adding devices
  */
 static ssize_t
 dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
 {
-	int rc, i;
+	int rc, i, j, num_of_segments;
 	struct dcssblk_dev_info *dev_info;
+	struct segment_info *seg_info, *temp;
 	char *local_buf;
 	unsigned long seg_byte_size;
 
 	dev_info = NULL;
+	seg_info = NULL;
 	if (dev != dcssblk_root_dev) {
 		rc = -EINVAL;
 		goto out_nobuf;
 	}
+	if ((count < 1) || (buf[0] == '\0') || (buf[0] == '\n')) {
+		rc = -ENAMETOOLONG;
+		goto out_nobuf;
+	}
+
 	local_buf = kmalloc(count + 1, GFP_KERNEL);
 	if (local_buf == NULL) {
 		rc = -ENOMEM;
 		goto out_nobuf;
 	}
+
 	/*
 	 * parse input
 	 */
+	num_of_segments = 0;
 	for (i = 0; ((buf[i] != '\0') && (buf[i] != '\n') && i < count); i++) {
-		local_buf[i] = toupper(buf[i]);
+		for (j = i; (buf[j] != ':') &&
+			(buf[j] != '\0') &&
+			(buf[j] != '\n') &&
+			j < count; j++) {
+			local_buf[j-i] = toupper(buf[j]);
+		}
+		local_buf[j-i] = '\0';
+		if (((j - i) == 0) || ((j - i) > 8)) {
+			rc = -ENAMETOOLONG;
+			goto seg_list_del;
+		}
+
+		rc = dcssblk_load_segment(local_buf, &seg_info);
+		if (rc < 0)
+			goto seg_list_del;
+		/*
+		 * get a struct dcssblk_dev_info
+		 */
+		if (num_of_segments == 0) {
+			dev_info = kzalloc(sizeof(struct dcssblk_dev_info),
+					GFP_KERNEL);
+			if (dev_info == NULL) {
+				rc = -ENOMEM;
+				goto out;
+			}
+			strcpy(dev_info->segment_name, local_buf);
+			dev_info->segment_type = seg_info->segment_type;
+			INIT_LIST_HEAD(&dev_info->seg_list);
+		}
+		list_add_tail(&seg_info->lh, &dev_info->seg_list);
+		num_of_segments++;
+		i = j;
+
+		if ((buf[j] == '\0') || (buf[j] == '\n'))
+			break;
 	}
-	local_buf[i] = '\0';
-	if ((i == 0) || (i > 8)) {
+
+	/* no trailing colon at the end of the input */
+	if ((i > 0) && (buf[i-1] == ':')) {
 		rc = -ENAMETOOLONG;
-		goto out;
-	}
-	/*
-	 * already loaded?
-	 */
-	down_read(&dcssblk_devices_sem);
-	dev_info = dcssblk_get_device_by_name(local_buf);
-	up_read(&dcssblk_devices_sem);
-	if (dev_info != NULL) {
-		PRINT_WARN("Segment %s already loaded!\n", local_buf);
-		rc = -EEXIST;
-		goto out;
-	}
-	/*
-	 * get a struct dcssblk_dev_info
-	 */
-	dev_info = kzalloc(sizeof(struct dcssblk_dev_info), GFP_KERNEL);
-	if (dev_info == NULL) {
-		rc = -ENOMEM;
-		goto out;
+		goto seg_list_del;
 	}
+	strlcpy(local_buf, buf, i + 1);
+	dev_info->num_of_segments = num_of_segments;
+	rc = dcssblk_is_continuous(dev_info);
+	if (rc < 0)
+		goto seg_list_del;
+
+	dev_info->start = dcssblk_find_lowest_addr(dev_info);
+	dev_info->end = dcssblk_find_highest_addr(dev_info);
 
-	strcpy(dev_info->segment_name, local_buf);
-	dev_set_name(&dev_info->dev, local_buf);
+	dev_set_name(&dev_info->dev, dev_info->segment_name);
 	dev_info->dev.release = dcssblk_release_segment;
 	INIT_LIST_HEAD(&dev_info->lh);
-
 	dev_info->gd = alloc_disk(DCSSBLK_MINORS_PER_DISK);
 	if (dev_info->gd == NULL) {
 		rc = -ENOMEM;
-		goto free_dev_info;
+		goto seg_list_del;
 	}
 	dev_info->gd->major = dcssblk_major;
 	dev_info->gd->fops = &dcssblk_devops;
@@ -360,65 +611,52 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
 	dev_info->gd->driverfs_dev = &dev_info->dev;
 	blk_queue_make_request(dev_info->dcssblk_queue, dcssblk_make_request);
 	blk_queue_hardsect_size(dev_info->dcssblk_queue, 4096);
-	/*
-	 * load the segment
-	 */
-	rc = segment_load(local_buf, SEGMENT_SHARED,
-				&dev_info->start, &dev_info->end);
-	if (rc < 0) {
-		segment_warning(rc, dev_info->segment_name);
-		goto dealloc_gendisk;
-	}
+
 	seg_byte_size = (dev_info->end - dev_info->start + 1);
 	set_capacity(dev_info->gd, seg_byte_size >> 9); // size in sectors
-	PRINT_INFO("Loaded segment %s, size = %lu Byte, "
+	PRINT_INFO("Loaded segment(s) %s, size = %lu Byte, "
 		   "capacity = %lu (512 Byte) sectors\n", local_buf,
 		   seg_byte_size, seg_byte_size >> 9);
 
-	dev_info->segment_type = rc;
 	dev_info->save_pending = 0;
 	dev_info->is_shared = 1;
 	dev_info->dev.parent = dcssblk_root_dev;
 
 	/*
-	 * get minor, add to list
+	 *get minor, add to list
 	 */
 	down_write(&dcssblk_devices_sem);
-	if (dcssblk_get_device_by_name(local_buf)) {
-		up_write(&dcssblk_devices_sem);
+	if (dcssblk_get_segment_by_name(local_buf)) {
 		rc = -EEXIST;
-		goto unload_seg;
+		goto release_gd;
 	}
 	rc = dcssblk_assign_free_minor(dev_info);
-	if (rc) {
-		up_write(&dcssblk_devices_sem);
-		PRINT_ERR("No free minor number available! "
-			  "Unloading segment...\n");
-		goto unload_seg;
-	}
+	if (rc)
+		goto release_gd;
 	sprintf(dev_info->gd->disk_name, "dcssblk%d",
 		MINOR(disk_devt(dev_info->gd)));
 	list_add_tail(&dev_info->lh, &dcssblk_devices);
 
 	if (!try_module_get(THIS_MODULE)) {
 		rc = -ENODEV;
-		goto list_del;
+		goto dev_list_del;
 	}
 	/*
 	 * register the device
 	 */
 	rc = device_register(&dev_info->dev);
 	if (rc) {
-		PRINT_ERR("Segment %s could not be registered RC=%d\n",
-				local_buf, rc);
 		module_put(THIS_MODULE);
-		goto list_del;
+		goto dev_list_del;
 	}
 	get_device(&dev_info->dev);
 	rc = device_create_file(&dev_info->dev, &dev_attr_shared);
 	if (rc)
 		goto unregister_dev;
 	rc = device_create_file(&dev_info->dev, &dev_attr_save);
+	if (rc)
+		goto unregister_dev;
+	rc = device_create_file(&dev_info->dev, &dev_attr_seglist);
 	if (rc)
 		goto unregister_dev;
 
@@ -434,7 +672,6 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
 			set_disk_ro(dev_info->gd,0);
 			break;
 	}
-	PRINT_DEBUG("Segment %s loaded successfully\n", local_buf);
 	up_write(&dcssblk_devices_sem);
 	rc = count;
 	goto out;
@@ -445,20 +682,27 @@ unregister_dev:
 	dev_info->gd->queue = NULL;
 	put_disk(dev_info->gd);
 	device_unregister(&dev_info->dev);
-	segment_unload(dev_info->segment_name);
+	list_for_each_entry(seg_info, &dev_info->seg_list, lh) {
+		segment_unload(seg_info->segment_name);
+	}
 	put_device(&dev_info->dev);
 	up_write(&dcssblk_devices_sem);
 	goto out;
-list_del:
+dev_list_del:
 	list_del(&dev_info->lh);
-	up_write(&dcssblk_devices_sem);
-unload_seg:
-	segment_unload(local_buf);
-dealloc_gendisk:
+release_gd:
 	blk_cleanup_queue(dev_info->dcssblk_queue);
 	dev_info->gd->queue = NULL;
 	put_disk(dev_info->gd);
-free_dev_info:
+	up_write(&dcssblk_devices_sem);
+seg_list_del:
+	if (dev_info == NULL)
+		goto out;
+	list_for_each_entry_safe(seg_info, temp, &dev_info->seg_list, lh) {
+		list_del(&seg_info->lh);
+		segment_unload(seg_info->segment_name);
+		kfree(seg_info);
+	}
 	kfree(dev_info);
 out:
 	kfree(local_buf);
@@ -473,6 +717,7 @@ static ssize_t
 dcssblk_remove_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct dcssblk_dev_info *dev_info;
+	struct segment_info *entry;
 	int rc, i;
 	char *local_buf;
 
@@ -499,26 +744,28 @@ dcssblk_remove_store(struct device *dev, struct device_attribute *attr, const ch
 	dev_info = dcssblk_get_device_by_name(local_buf);
 	if (dev_info == NULL) {
 		up_write(&dcssblk_devices_sem);
-		PRINT_WARN("Segment %s is not loaded!\n", local_buf);
+		PRINT_WARN("Device %s is not loaded!\n", local_buf);
 		rc = -ENODEV;
 		goto out_buf;
 	}
 	if (atomic_read(&dev_info->use_count) != 0) {
 		up_write(&dcssblk_devices_sem);
-		PRINT_WARN("Segment %s is in use!\n", local_buf);
+		PRINT_WARN("Device %s is in use!\n", local_buf);
 		rc = -EBUSY;
 		goto out_buf;
 	}
-	list_del(&dev_info->lh);
 
+	list_del(&dev_info->lh);
 	del_gendisk(dev_info->gd);
 	blk_cleanup_queue(dev_info->dcssblk_queue);
 	dev_info->gd->queue = NULL;
 	put_disk(dev_info->gd);
 	device_unregister(&dev_info->dev);
-	segment_unload(dev_info->segment_name);
-	PRINT_DEBUG("Segment %s unloaded successfully\n",
-			dev_info->segment_name);
+
+	/* unload all related segments */
+	list_for_each_entry(entry, &dev_info->seg_list, lh)
+		segment_unload(entry->segment_name);
+
 	put_device(&dev_info->dev);
 	up_write(&dcssblk_devices_sem);
 
@@ -550,6 +797,7 @@ static int
 dcssblk_release(struct inode *inode, struct file *filp)
 {
 	struct dcssblk_dev_info *dev_info;
+	struct segment_info *entry;
 	int rc;
 
 	dev_info = inode->i_bdev->bd_disk->private_data;
@@ -560,9 +808,11 @@ dcssblk_release(struct inode *inode, struct file *filp)
 	down_write(&dcssblk_devices_sem);
 	if (atomic_dec_and_test(&dev_info->use_count)
 	    && (dev_info->save_pending)) {
-		PRINT_INFO("Segment %s became idle and is being saved now\n",
+		PRINT_INFO("Device %s became idle and is being saved now\n",
 			    dev_info->segment_name);
-		segment_save(dev_info->segment_name);
+		list_for_each_entry(entry, &dev_info->seg_list, lh) {
+			segment_save(entry->segment_name);
+		}
 		dev_info->save_pending = 0;
 	}
 	up_write(&dcssblk_devices_sem);
@@ -602,7 +852,7 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio)
 		case SEG_TYPE_SC:
 			/* cannot write to these segments */
 			if (bio_data_dir(bio) == WRITE) {
-				PRINT_WARN("rejecting write to ro segment %s\n",
+				PRINT_WARN("rejecting write to ro device %s\n",
 					   dev_name(&dev_info->dev));
 				goto fail;
 			}
@@ -658,7 +908,7 @@ static void
 dcssblk_check_params(void)
 {
 	int rc, i, j, k;
-	char buf[9];
+	char buf[DCSSBLK_PARM_LEN + 1];
 	struct dcssblk_dev_info *dev_info;
 
 	for (i = 0; (i < DCSSBLK_PARM_LEN) && (dcssblk_segments[i] != '\0');
@@ -666,15 +916,16 @@ dcssblk_check_params(void)
 		for (j = i; (dcssblk_segments[j] != ',')  &&
 			    (dcssblk_segments[j] != '\0') &&
 			    (dcssblk_segments[j] != '(')  &&
-			    (j - i) < 8; j++)
+			    (j < DCSSBLK_PARM_LEN); j++)
 		{
 			buf[j-i] = dcssblk_segments[j];
 		}
 		buf[j-i] = '\0';
 		rc = dcssblk_add_store(dcssblk_root_dev, NULL, buf, j-i);
 		if ((rc >= 0) && (dcssblk_segments[j] == '(')) {
-			for (k = 0; buf[k] != '\0'; k++)
+			for (k = 0; (buf[k] != ':') && (buf[k] != '\0'); k++)
 				buf[k] = toupper(buf[k]);
+			buf[k] = '\0';
 			if (!strncmp(&dcssblk_segments[j], "(local)", 7)) {
 				down_read(&dcssblk_devices_sem);
 				dev_info = dcssblk_get_device_by_name(buf);
@@ -741,10 +992,12 @@ module_exit(dcssblk_exit);
 
 module_param_string(segments, dcssblk_segments, DCSSBLK_PARM_LEN, 0444);
 MODULE_PARM_DESC(segments, "Name of DCSS segment(s) to be loaded, "
-		 "comma-separated list, each name max. 8 chars.\n"
-		 "Adding \"(local)\" to segment name equals echoing 0 to "
-		 "/sys/devices/dcssblk/<segment name>/shared after loading "
-		 "the segment - \n"
-		 "e.g. segments=\"mydcss1,mydcss2,mydcss3(local)\"");
+		 "comma-separated list, names in each set separated "
+		 "by commas are separated by colons, each set contains "
+		 "names of contiguous segments and each name max. 8 chars.\n"
+		 "Adding \"(local)\" to the end of each set equals echoing 0 "
+		 "to /sys/devices/dcssblk/<device name>/shared after loading "
+		 "the contiguous segments - \n"
+		 "e.g. segments=\"mydcss1,mydcss2:mydcss3,mydcss4(local)\"");
 
 MODULE_LICENSE("GPL");
-- 
cgit v1.2.3-70-g09d2


From 5a0d0e65379256b4da2c9092e197a2c761f51c01 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 10 Oct 2008 21:33:22 +0200
Subject: [S390] Move private simple udelay function to arch/s390/lib/delay.c.

Move cio's private simple udelay function to lib/delay.c and turn it
into something much more readable. So we have all implementations
at one place.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/delay.h |  1 +
 arch/s390/lib/delay.c         | 13 +++++++++++++
 drivers/s390/cio/cio.c        | 17 ++---------------
 3 files changed, 16 insertions(+), 15 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/include/asm/delay.h b/arch/s390/include/asm/delay.h
index 78357314c450..a356c958e260 100644
--- a/arch/s390/include/asm/delay.h
+++ b/arch/s390/include/asm/delay.h
@@ -15,6 +15,7 @@
 #define _S390_DELAY_H
 
 extern void __udelay(unsigned long usecs);
+extern void udelay_simple(unsigned long usecs);
 extern void __delay(unsigned long loops);
 
 #define udelay(n) __udelay(n)
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index 0953cee05efc..6ccb9fab055a 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -92,3 +92,16 @@ out:
 	local_irq_restore(flags);
 	preempt_enable();
 }
+
+/*
+ * Simple udelay variant. To be used on startup and reboot
+ * when the interrupt handler isn't working.
+ */
+void udelay_simple(unsigned long usecs)
+{
+	u64 end;
+
+	end = get_clock() + ((u64) usecs << 12);
+	while (get_clock() < end)
+		cpu_relax();
+}
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index c0cb72547256..3db2c386546f 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -859,19 +859,6 @@ __disable_subchannel_easy(struct subchannel_id schid, struct schib *schib)
 	return -EBUSY; /* uhm... */
 }
 
-/* we can't use the normal udelay here, since it enables external interrupts */
-
-static void udelay_reset(unsigned long usecs)
-{
-	uint64_t start_cc, end_cc;
-
-	asm volatile ("STCK %0" : "=m" (start_cc));
-	do {
-		cpu_relax();
-		asm volatile ("STCK %0" : "=m" (end_cc));
-	} while (((end_cc - start_cc)/4096) < usecs);
-}
-
 static int
 __clear_io_subchannel_easy(struct subchannel_id schid)
 {
@@ -887,7 +874,7 @@ __clear_io_subchannel_easy(struct subchannel_id schid)
 			if (schid_equal(&ti.schid, &schid))
 				return 0;
 		}
-		udelay_reset(100);
+		udelay_simple(100);
 	}
 	return -EBUSY;
 }
@@ -895,7 +882,7 @@ __clear_io_subchannel_easy(struct subchannel_id schid)
 static void __clear_chsc_subchannel_easy(void)
 {
 	/* It seems we can only wait for a bit here :/ */
-	udelay_reset(100);
+	udelay_simple(100);
 }
 
 static int pgm_check_occured;
-- 
cgit v1.2.3-70-g09d2


From ab1d848fd6a9151b02c6cbf4bddce6e24707b094 Mon Sep 17 00:00:00 2001
From: Nigel Hislop <hislop_nigel@emc.com>
Date: Fri, 10 Oct 2008 21:33:25 +0200
Subject: [S390] Add ioctl support for EMC Symmetrix Subsystem Control I/O

EMC Symmetrix Subsystem Control I/O through CKD dasd requires a
specific parameter list sent to the array via a Perform Subsystem
Function CCW. The Symmetrix response is retrieved from the array
via a Read Subsystem Data CCW.

Signed-off-by: Nigel Hislop <hislop_nigel@emc.com>
Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/dasd.h   |  13 ++++++
 drivers/s390/block/dasd_eckd.c | 101 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 114 insertions(+)

(limited to 'arch/s390')

diff --git a/arch/s390/include/asm/dasd.h b/arch/s390/include/asm/dasd.h
index 3f002e13d024..55b2b80cdf6e 100644
--- a/arch/s390/include/asm/dasd.h
+++ b/arch/s390/include/asm/dasd.h
@@ -3,6 +3,8 @@
  * Author(s)......: Holger Smolinski <Holger.Smolinski@de.ibm.com>
  * Bugreports.to..: <Linux390@de.ibm.com>
  * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 1999,2000
+ * EMC Symmetrix ioctl Copyright EMC Corporation, 2008
+ * Author.........: Nigel Hislop <hislop_nigel@emc.com>
  *
  * This file is the interface of the DASD device driver, which is exported to user space
  * any future changes wrt the API will result in a change of the APIVERSION reported
@@ -202,6 +204,16 @@ typedef struct attrib_data_t {
 #define DASD_SEQ_PRESTAGE  0x4
 #define DASD_REC_ACCESS    0x5
 
+/*
+ * Perform EMC Symmetrix I/O
+ */
+typedef struct dasd_symmio_parms {
+	unsigned char reserved[8];	/* compat with older releases */
+	unsigned long long psf_data;	/* char * cast to u64 */
+	unsigned long long rssd_result; /* char * cast to u64 */
+	int psf_data_len;
+	int rssd_result_len;
+} __attribute__ ((packed)) dasd_symmio_parms_t;
 
 /********************************************************************************
  * SECTION: Definition of IOCTLs
@@ -247,6 +259,7 @@ typedef struct attrib_data_t {
 /* Set Attributes (cache operations) */
 #define BIODASDSATTR   _IOW(DASD_IOCTL_LETTER,2,attrib_data_t) 
 
+#define BIODASDSYMMIO  _IOWR(DASD_IOCTL_LETTER, 240, dasd_symmio_parms_t)
 
 #endif				/* DASD_H */
 
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 8095629bc493..49f9d221e23d 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -6,6 +6,8 @@
  *		    Martin Schwidefsky <schwidefsky@de.ibm.com>
  * Bugreports.to..: <Linux390@de.ibm.com>
  * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 1999,2000
+ * EMC Symmetrix ioctl Copyright EMC Corporation, 2008
+ * Author.........: Nigel Hislop <hislop_nigel@emc.com>
  *
  */
 
@@ -2083,6 +2085,103 @@ dasd_eckd_set_attrib(struct dasd_device *device, void __user *argp)
 	return 0;
 }
 
+/*
+ * Issue syscall I/O to EMC Symmetrix array.
+ * CCWs are PSF and RSSD
+ */
+static int dasd_symm_io(struct dasd_device *device, void __user *argp)
+{
+	struct dasd_symmio_parms usrparm;
+	char *psf_data, *rssd_result;
+	struct dasd_ccw_req *cqr;
+	struct ccw1 *ccw;
+	int rc;
+
+	/* Copy parms from caller */
+	rc = -EFAULT;
+	if (copy_from_user(&usrparm, argp, sizeof(usrparm)))
+		goto out;
+#ifndef CONFIG_64BIT
+	/* Make sure pointers are sane even on 31 bit. */
+	if ((usrparm.psf_data >> 32) != 0 || (usrparm.rssd_result >> 32) != 0) {
+		rc = -EINVAL;
+		goto out;
+	}
+#endif
+	/* alloc I/O data area */
+	psf_data = kzalloc(usrparm.psf_data_len, GFP_KERNEL | GFP_DMA);
+	rssd_result = kzalloc(usrparm.rssd_result_len, GFP_KERNEL | GFP_DMA);
+	if (!psf_data || !rssd_result) {
+		rc = -ENOMEM;
+		goto out_free;
+	}
+
+	/* get syscall header from user space */
+	rc = -EFAULT;
+	if (copy_from_user(psf_data,
+			   (void __user *)(unsigned long) usrparm.psf_data,
+			   usrparm.psf_data_len))
+		goto out_free;
+
+	/* sanity check on syscall header */
+	if (psf_data[0] != 0x17 && psf_data[1] != 0xce) {
+		rc = -EINVAL;
+		goto out_free;
+	}
+
+	/* setup CCWs for PSF + RSSD */
+	cqr = dasd_smalloc_request("ECKD", 2 , 0, device);
+	if (IS_ERR(cqr)) {
+		DEV_MESSAGE(KERN_WARNING, device, "%s",
+			"Could not allocate initialization request");
+		rc = PTR_ERR(cqr);
+		goto out_free;
+	}
+
+	cqr->startdev = device;
+	cqr->memdev = device;
+	cqr->retries = 3;
+	cqr->expires = 10 * HZ;
+	cqr->buildclk = get_clock();
+	cqr->status = DASD_CQR_FILLED;
+
+	/* Build the ccws */
+	ccw = cqr->cpaddr;
+
+	/* PSF ccw */
+	ccw->cmd_code = DASD_ECKD_CCW_PSF;
+	ccw->count = usrparm.psf_data_len;
+	ccw->flags |= CCW_FLAG_CC;
+	ccw->cda = (__u32)(addr_t) psf_data;
+
+	ccw++;
+
+	/* RSSD ccw  */
+	ccw->cmd_code = DASD_ECKD_CCW_RSSD;
+	ccw->count = usrparm.rssd_result_len;
+	ccw->flags = CCW_FLAG_SLI ;
+	ccw->cda = (__u32)(addr_t) rssd_result;
+
+	rc = dasd_sleep_on(cqr);
+	if (rc)
+		goto out_sfree;
+
+	rc = -EFAULT;
+	if (copy_to_user((void __user *)(unsigned long) usrparm.rssd_result,
+			   rssd_result, usrparm.rssd_result_len))
+		goto out_sfree;
+	rc = 0;
+
+out_sfree:
+	dasd_sfree_request(cqr, cqr->memdev);
+out_free:
+	kfree(rssd_result);
+	kfree(psf_data);
+out:
+	DBF_DEV_EVENT(DBF_WARNING, device, "Symmetrix ioctl: rc=%d", rc);
+	return rc;
+}
+
 static int
 dasd_eckd_ioctl(struct dasd_block *block, unsigned int cmd, void __user *argp)
 {
@@ -2101,6 +2200,8 @@ dasd_eckd_ioctl(struct dasd_block *block, unsigned int cmd, void __user *argp)
 		return dasd_eckd_reserve(device);
 	case BIODASDSLCK:
 		return dasd_eckd_steal_lock(device);
+	case BIODASDSYMMIO:
+		return dasd_symm_io(device, argp);
 	default:
 		return -ENOIOCTLCMD;
 	}
-- 
cgit v1.2.3-70-g09d2


From 15e86b0c752d50e910b2cca6e83ce74c4440d06c Mon Sep 17 00:00:00 2001
From: Florian Funke <ffunke@de.ibm.com>
Date: Fri, 10 Oct 2008 21:33:26 +0200
Subject: [S390] introduce dirty bit for kvm live migration

This patch defines a dirty bit in the PGSTE that can be used to implement
dirty pages logging for KVM's live migration. The bit is set in the
ptep_rcp_copy function, which is called to save dirty and referenced information
from the storage key in the PGSTE. The bit can be tested and reset by KVM using
the kvm_s390_test_and_clear_page_dirty function that is introduced by this patch.

Acked-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Florian Funke <ffunke@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/pgtable.h | 45 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 43 insertions(+), 2 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 0bdb704ae051..1a928f84afd6 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -281,6 +281,9 @@ extern char empty_zero_page[PAGE_SIZE];
 #define RCP_GR_BIT	50
 #define RCP_GC_BIT	49
 
+/* User dirty bit for KVM's migration feature */
+#define KVM_UD_BIT	47
+
 #ifndef __s390x__
 
 /* Bits in the segment table address-space-control-element */
@@ -575,12 +578,16 @@ static inline void ptep_rcp_copy(pte_t *ptep)
 	unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE);
 
 	skey = page_get_storage_key(page_to_phys(page));
-	if (skey & _PAGE_CHANGED)
+	if (skey & _PAGE_CHANGED) {
 		set_bit_simple(RCP_GC_BIT, pgste);
+		set_bit_simple(KVM_UD_BIT, pgste);
+	}
 	if (skey & _PAGE_REFERENCED)
 		set_bit_simple(RCP_GR_BIT, pgste);
-	if (test_and_clear_bit_simple(RCP_HC_BIT, pgste))
+	if (test_and_clear_bit_simple(RCP_HC_BIT, pgste)) {
 		SetPageDirty(page);
+		set_bit_simple(KVM_UD_BIT, pgste);
+	}
 	if (test_and_clear_bit_simple(RCP_HR_BIT, pgste))
 		SetPageReferenced(page);
 #endif
@@ -744,6 +751,40 @@ static inline pte_t pte_mkspecial(pte_t pte)
 	return pte;
 }
 
+#ifdef CONFIG_PGSTE
+/*
+ * Get (and clear) the user dirty bit for a PTE.
+ */
+static inline int kvm_s390_test_and_clear_page_dirty(struct mm_struct *mm,
+						     pte_t *ptep)
+{
+	int dirty;
+	unsigned long *pgste;
+	struct page *page;
+	unsigned int skey;
+
+	if (!mm->context.pgstes)
+		return -EINVAL;
+	rcp_lock(ptep);
+	pgste = (unsigned long *) (ptep + PTRS_PER_PTE);
+	page = virt_to_page(pte_val(*ptep));
+	skey = page_get_storage_key(page_to_phys(page));
+	if (skey & _PAGE_CHANGED) {
+		set_bit_simple(RCP_GC_BIT, pgste);
+		set_bit_simple(KVM_UD_BIT, pgste);
+	}
+	if (test_and_clear_bit_simple(RCP_HC_BIT, pgste)) {
+		SetPageDirty(page);
+		set_bit_simple(KVM_UD_BIT, pgste);
+	}
+	dirty = test_and_clear_bit_simple(KVM_UD_BIT, pgste);
+	if (skey & _PAGE_CHANGED)
+		page_clear_dirty(page);
+	rcp_unlock(ptep);
+	return dirty;
+}
+#endif
+
 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
 static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
 					    unsigned long addr, pte_t *ptep)
-- 
cgit v1.2.3-70-g09d2


From 4a672cfa3a7fcbc6f2adc558f34148be1096c561 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 10 Oct 2008 21:33:29 +0200
Subject: [S390] fix initialization of stp

chsc_sstpc returns -EIO on error and 0 on success but stp_reset checks
against 1 instead of 0. chsc_sstpc used to return 1 on success, one
call location has not been updated ..

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/time.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/s390')

diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 06acb1a18bbc..b94e9e3b694a 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -1356,7 +1356,7 @@ static void __init stp_reset(void)
 
 	stp_page = alloc_bootmem_pages(PAGE_SIZE);
 	rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000);
-	if (rc == 1)
+	if (rc == 0)
 		set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags);
 	else if (stp_online) {
 		printk(KERN_WARNING "Running on non STP capable machine.\n");
-- 
cgit v1.2.3-70-g09d2