summaryrefslogtreecommitdiff
path: root/tools/testing/selftests/powerpc/math/vmx_asm.S
blob: 1b8c248b3ac18770a23a30c3946da27d92aa708a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
/*
 * Copyright 2015, Cyril Bur, IBM Corp.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 */

#include "../basic_asm.h"

# POS MUST BE 16 ALIGNED!
#define PUSH_VMX(pos,reg) \
	li	reg,pos; \
	stvx	v20,reg,sp; \
	addi	reg,reg,16; \
	stvx	v21,reg,sp; \
	addi	reg,reg,16; \
	stvx	v22,reg,sp; \
	addi	reg,reg,16; \
	stvx	v23,reg,sp; \
	addi	reg,reg,16; \
	stvx	v24,reg,sp; \
	addi	reg,reg,16; \
	stvx	v25,reg,sp; \
	addi	reg,reg,16; \
	stvx	v26,reg,sp; \
	addi	reg,reg,16; \
	stvx	v27,reg,sp; \
	addi	reg,reg,16; \
	stvx	v28,reg,sp; \
	addi	reg,reg,16; \
	stvx	v29,reg,sp; \
	addi	reg,reg,16; \
	stvx	v30,reg,sp; \
	addi	reg,reg,16; \
	stvx	v31,reg,sp;

# POS MUST BE 16 ALIGNED!
#define POP_VMX(pos,reg) \
	li	reg,pos; \
	lvx	v20,reg,sp; \
	addi	reg,reg,16; \
	lvx	v21,reg,sp; \
	addi	reg,reg,16; \
	lvx	v22,reg,sp; \
	addi	reg,reg,16; \
	lvx	v23,reg,sp; \
	addi	reg,reg,16; \
	lvx	v24,reg,sp; \
	addi	reg,reg,16; \
	lvx	v25,reg,sp; \
	addi	reg,reg,16; \
	lvx	v26,reg,sp; \
	addi	reg,reg,16; \
	lvx	v27,reg,sp; \
	addi	reg,reg,16; \
	lvx	v28,reg,sp; \
	addi	reg,reg,16; \
	lvx	v29,reg,sp; \
	addi	reg,reg,16; \
	lvx	v30,reg,sp; \
	addi	reg,reg,16; \
	lvx	v31,reg,sp;

# Carefull this will 'clobber' vmx (by design)
# Don't call this from C
FUNC_START(load_vmx)
	li	r5,0
	lvx	v20,r5,r3
	addi	r5,r5,16
	lvx	v21,r5,r3
	addi	r5,r5,16
	lvx	v22,r5,r3
	addi	r5,r5,16
	lvx	v23,r5,r3
	addi	r5,r5,16
	lvx	v24,r5,r3
	addi	r5,r5,16
	lvx	v25,r5,r3
	addi	r5,r5,16
	lvx	v26,r5,r3
	addi	r5,r5,16
	lvx	v27,r5,r3
	addi	r5,r5,16
	lvx	v28,r5,r3
	addi	r5,r5,16
	lvx	v29,r5,r3
	addi	r5,r5,16
	lvx	v30,r5,r3
	addi	r5,r5,16
	lvx	v31,r5,r3
	blr
FUNC_END(load_vmx)

# Should be safe from C, only touches r4, r5 and v0,v1,v2
FUNC_START(check_vmx)
	PUSH_BASIC_STACK(32)
	mr r4,r3
	li	r3,1 # assume a bad result
	li	r5,0
	lvx	v0,r5,r4
	vcmpequd.	v1,v0,v20
	vmr	v2,v1

	addi	r5,r5,16
	lvx	v0,r5,r4
	vcmpequd.	v1,v0,v21
	vand	v2,v2,v1

	addi	r5,r5,16
	lvx	v0,r5,r4
	vcmpequd.	v1,v0,v22
	vand	v2,v2,v1

	addi	r5,r5,16
	lvx	v0,r5,r4
	vcmpequd.	v1,v0,v23
	vand	v2,v2,v1

	addi	r5,r5,16
	lvx	v0,r5,r4
	vcmpequd.	v1,v0,v24
	vand	v2,v2,v1

	addi	r5,r5,16
	lvx	v0,r5,r4
	vcmpequd.	v1,v0,v25
	vand	v2,v2,v1

	addi	r5,r5,16
	lvx	v0,r5,r4
	vcmpequd.	v1,v0,v26
	vand	v2,v2,v1

	addi	r5,r5,16
	lvx	v0,r5,r4
	vcmpequd.	v1,v0,v27
	vand	v2,v2,v1

	addi	r5,r5,16
	lvx	v0,r5,r4
	vcmpequd.	v1,v0,v28
	vand	v2,v2,v1

	addi	r5,r5,16
	lvx	v0,r5,r4
	vcmpequd.	v1,v0,v29
	vand	v2,v2,v1

	addi	r5,r5,16
	lvx	v0,r5,r4
	vcmpequd.	v1,v0,v30
	vand	v2,v2,v1

	addi	r5,r5,16
	lvx	v0,r5,r4
	vcmpequd.	v1,v0,v31
	vand	v2,v2,v1

	li	r5,STACK_FRAME_LOCAL(0,0)
	stvx	v2,r5,sp
	ldx	r0,r5,sp
	cmpdi	r0,0xffffffffffffffff
	bne	1f
	li	r3,0
1:	POP_BASIC_STACK(32)
	blr
FUNC_END(check_vmx)

# Safe from C
FUNC_START(test_vmx)
	# r3 holds pointer to where to put the result of fork
	# r4 holds pointer to the pid
	# v20-v31 are non-volatile
	PUSH_BASIC_STACK(512)
	std	r3,STACK_FRAME_PARAM(0)(sp) # Address of varray
	std r4,STACK_FRAME_PARAM(1)(sp) # address of pid
	PUSH_VMX(STACK_FRAME_LOCAL(2,0),r4)

	bl load_vmx
	nop

	li	r0,__NR_fork
	sc
	# Pass the result of fork back to the caller
	ld	r9,STACK_FRAME_PARAM(1)(sp)
	std	r3,0(r9)

	ld r3,STACK_FRAME_PARAM(0)(sp)
	bl check_vmx
	nop

	POP_VMX(STACK_FRAME_LOCAL(2,0),r4)
	POP_BASIC_STACK(512)
	blr
FUNC_END(test_vmx)

# int preempt_vmx(vector int *varray, int *threads_starting, int *running)
# On starting will (atomically) decrement threads_starting as a signal that
# the VMX have been loaded with varray. Will proceed to check the validity of
# the VMX registers while running is not zero.
FUNC_START(preempt_vmx)
	PUSH_BASIC_STACK(512)
	std r3,STACK_FRAME_PARAM(0)(sp) # vector int *varray
	std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting
	std r5,STACK_FRAME_PARAM(2)(sp) # int *running
	# VMX need to write to 16 byte aligned addresses, skip STACK_FRAME_LOCAL(3,0)
	PUSH_VMX(STACK_FRAME_LOCAL(4,0),r4)

	bl load_vmx
	nop

	sync
	# Atomic DEC
	ld r3,STACK_FRAME_PARAM(1)(sp)
1:	lwarx r4,0,r3
	addi r4,r4,-1
	stwcx. r4,0,r3
	bne- 1b

2:	ld r3,STACK_FRAME_PARAM(0)(sp)
	bl check_vmx
	nop
	cmpdi r3,0
	bne 3f
	ld r4,STACK_FRAME_PARAM(2)(sp)
	ld r5,0(r4)
	cmpwi r5,0
	bne 2b

3:	POP_VMX(STACK_FRAME_LOCAL(4,0),r4)
	POP_BASIC_STACK(512)
	blr
FUNC_END(preempt_vmx)