diff options
author | Dennis Kobert <dennis@kobert.dev> | 2020-01-07 03:12:55 +0100 |
---|---|---|
committer | Dennis Kobert <dennis@kobert.dev> | 2020-01-07 03:12:55 +0100 |
commit | c36035c0667fdd224da914b50c30a9366e1a5c38 (patch) | |
tree | 4d67f21f89a138e9859540eb5d2446dec5660aef /src/solvers/check.cl | |
parent | 2750313bab1bff5b94734f949e633f04391577ab (diff) |
Fix global worgsize calculation
Diffstat (limited to 'src/solvers/check.cl')
-rw-r--r-- | src/solvers/check.cl | 29 |
1 files changed, 17 insertions, 12 deletions
diff --git a/src/solvers/check.cl b/src/solvers/check.cl index 15069b9..af9bc51 100644 --- a/src/solvers/check.cl +++ b/src/solvers/check.cl @@ -3,36 +3,41 @@ typedef ulong u64; typedef uint u32; __kernel void check(__global u64* permutations, __global u64* results, - __local u64* instructions, __local u64* i_buffer, u32 n, u32 w, u64 offset) { - int gid = get_global_id(0); + __global u64* instructions, __local u64* i_buffer, u32 n, u32 w, u64 offset) { int wid = get_local_id(0); + i_buffer[wid] = instructions[wid]; + barrier(CLK_LOCAL_MEM_FENCE); + + int gid = get_global_id(0); int gsize = get_global_size(0); int wsize = get_local_size(0); + u32 w64 = wsize / 64 + !!(wsize & 63); + u64 result = 0; + u64 own = permutations[gid + offset]; + u64 correct_mask = (1 << (n + 1)) - 2; - i_buffer[wid] = instructions[wid]; - barrier(CLK_LOCAL_MEM_FENCE); - u64 result = 0; - unsigned long own = permutations[gid + offset]; - for (int j = 0; j < wsize; j++) { + int j; + for (j = 0; j < wsize; j++) { unsigned long curr_mask = i_buffer[j] | own; unsigned long tmask, sum, stones; stones = tmask = sum = 0; for (int i = 1; i <= w + 1; i++) { - if ((curr_mask & (1 << i)) == 0) { - stones += 1; + if (!(curr_mask & (1 << i))) { + stones++; tmask |= 1 << (i - sum); sum = i; } } - if (tmask == (1 << (n + 1)) - 2 && stones == n) { + if (tmask == correct_mask && stones == n) { result |= 1 << (j & 63); } - if (j & !(j & 63)) { - results[gid * (wsize / 64) + j / 64] = result; + if (j % 64 == 0 && j) { + results[gid * w64 + j / 64] = result; result = 0; } } + results[gid * w64 + j / 64] = result; } |