diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/solvers/check.cl | 56 | ||||
-rw-r--r-- | src/solvers/opencl.rs | 1 |
2 files changed, 40 insertions, 17 deletions
diff --git a/src/solvers/check.cl b/src/solvers/check.cl index 0ad187a..2ef6406 100644 --- a/src/solvers/check.cl +++ b/src/solvers/check.cl @@ -1,21 +1,43 @@ +typedef ulong u64 +typedef uint u32 + #pragma OPENCL EXTENSION cl_intel_printf : enable -__kernel void check(__global unsigned long* permutations, __global int* results, - unsigned long* instructions, unsigned int n, unsigned int w, unsigned long offset) { - int id = get_global_id(0); - unsigned long curr_mask = mask | permutations[id + offset]; +__kernel void check(__global u64* permutations, __global u64* results, + u64* instructions, __local u64* i_buffer, u32 n, u32 w, u64 offset) { + int gid = get_global_id(0); + int wid = get_local_id(0); + int gsize = get_global_size(0); + int wsize = get_local_size(0); + int wesize = get_enqueued_local_size(0); + + + int times = wsize / wesize; + times += !(wsize % wesize); + for (int i = 0; i < times; i++) { + i_buffer[wid + i] = instructions[wid + i] + } + work_group_barrier(mem_fence::local); - unsigned long tmask, sum, stones; - stones = tmask = sum = 0; - for (int i = 1; i <= w + 1; i++) { - if ((curr_mask & (1 << i)) == 0) { - stones += 1; - tmask |= 1 << (i - sum); - sum = i; - } - } - if (tmask == (1 << (n + 1)) - 2 && stones == n) { - printf("test"); - results[id] = id; - } + u64 result = 0; + unsigned long own = permutations[gid + offset]; + for (int j = 0; j < wsize; j++) { + unsigned long curr_mask = i_buffer[j] | own; + unsigned long tmask, sum, stones; + stones = tmask = sum = 0; + for (int i = 1; i <= w + 1; i++) { + if ((curr_mask & (1 << i)) == 0) { + stones += 1; + tmask |= 1 << (i - sum); + sum = i; + } + } + if (tmask == (1 << (n + 1)) - 2 && stones == n) { + result |= 1 << (j & 63); + } + if (j & !(j & 63)) { + results[gid * (wsize / 64) + j / 64] = result; + result = 0; + } + } } diff --git a/src/solvers/opencl.rs b/src/solvers/opencl.rs index 3406fd0..1293295 100644 --- a/src/solvers/opencl.rs +++ b/src/solvers/opencl.rs @@ -146,6 +146,7 @@ impl GpuSolver { .arg(&self.permutations) .arg(&result_buffer[i]) .arg(&instruction_buffer[i]) + .arg_local::<u64>(self.wg_size) .arg(self.n) .arg(self.w) .arg((self.n as u64 - i as u64 - 1) * chunk as u64) |