blob: af9bc51532628197e610e9e7a437b0f5da823c05 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
|
//#pragma OPENCL EXTENSION cl_intel_printf : enable
typedef ulong u64;
typedef uint u32;
__kernel void check(__global u64* permutations, __global u64* results,
__global u64* instructions, __local u64* i_buffer, u32 n, u32 w, u64 offset) {
int wid = get_local_id(0);
i_buffer[wid] = instructions[wid];
barrier(CLK_LOCAL_MEM_FENCE);
int gid = get_global_id(0);
int gsize = get_global_size(0);
int wsize = get_local_size(0);
u32 w64 = wsize / 64 + !!(wsize & 63);
u64 result = 0;
u64 own = permutations[gid + offset];
u64 correct_mask = (1 << (n + 1)) - 2;
int j;
for (j = 0; j < wsize; j++) {
unsigned long curr_mask = i_buffer[j] | own;
unsigned long tmask, sum, stones;
stones = tmask = sum = 0;
for (int i = 1; i <= w + 1; i++) {
if (!(curr_mask & (1 << i))) {
stones++;
tmask |= 1 << (i - sum);
sum = i;
}
}
if (tmask == correct_mask && stones == n) {
result |= 1 << (j & 63);
}
if (j % 64 == 0 && j) {
results[gid * w64 + j / 64] = result;
result = 0;
}
}
results[gid * w64 + j / 64] = result;
}
|