summaryrefslogtreecommitdiff
path: root/src/solvers/gpu/check.cl
blob: af9bc51532628197e610e9e7a437b0f5da823c05 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
//#pragma OPENCL EXTENSION cl_intel_printf : enable
typedef ulong u64;
typedef uint u32;

__kernel void check(__global u64* permutations, __global u64* results, 
    __global u64* instructions, __local u64* i_buffer, u32 n, u32 w, u64 offset) {
    int wid = get_local_id(0);
    i_buffer[wid] = instructions[wid];
    barrier(CLK_LOCAL_MEM_FENCE);

    int gid = get_global_id(0);
    int gsize = get_global_size(0);
    int wsize = get_local_size(0);

    u32 w64 = wsize / 64 + !!(wsize & 63);
    u64 result = 0;
    u64 own =  permutations[gid + offset];
    u64 correct_mask = (1 << (n + 1)) - 2;


    int j;
    for (j = 0; j < wsize; j++) {
        unsigned long curr_mask = i_buffer[j] | own;
        unsigned long tmask, sum, stones;
        stones = tmask = sum = 0;
        for (int i = 1; i <= w + 1; i++) {
            if (!(curr_mask & (1 << i))) {
                stones++;
                tmask |= 1 << (i - sum);
                sum = i;
            }
        }
        if (tmask == correct_mask &&  stones == n) {
            result |= 1 << (j & 63);   
        }
        if (j % 64 == 0 && j) {
            results[gid * w64 + j / 64] = result;
            result = 0;
        }
    }
    results[gid * w64 + j / 64] =  result;
}