blob: 7ff39df233f17558e4a8a47e62368abac33857d8 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
|
//#pragma OPENCL EXTENSION cl_intel_printf : enable
typedef ulong u64;
typedef uint u32;
__kernel void check(__global u64* permutations, __global u64* results,
__local u64* instructions, __local u64* i_buffer, u32 n, u32 w, u64 offset) {
int gid = get_global_id(0);
int wid = get_local_id(0);
int gsize = get_global_size(0);
int wsize = get_local_size(0);
//int wesize = get_enqueued_local_size(0);
//int times = wsize / wesize;
//times += !(wsize % wesize);
//for (int i = 0; i < times; i++) {
int i = 0;
i_buffer[wid + i] = instructions[wid + i];
//}
barrier(CLK_LOCAL_MEM_FENCE);
u64 result = 0;
unsigned long own = permutations[gid + offset];
for (int j = 0; j < wsize; j++) {
unsigned long curr_mask = i_buffer[j] | own;
unsigned long tmask, sum, stones;
stones = tmask = sum = 0;
for (int i = 1; i <= w + 1; i++) {
if ((curr_mask & (1 << i)) == 0) {
stones += 1;
tmask |= 1 << (i - sum);
sum = i;
}
}
if (tmask == (1 << (n + 1)) - 2 && stones == n) {
result |= 1 << (j & 63);
}
if (j & !(j & 63)) {
results[gid * (wsize / 64) + j / 64] = result;
result = 0;
}
}
}
|