summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDennis Kobert <dennis@kobert.dev>2020-01-06 02:51:11 +0100
committerDennis Kobert <dennis@kobert.dev>2020-01-06 02:51:11 +0100
commit17d9a626b6a3110cec3697407c6690522798cb30 (patch)
tree091b98bdfca72f15340dd7bd0301b13253ca30c9
parent66cc693a770cf8668ddbddd60d35c8e8dacd55a9 (diff)
Rework opencl code
-rw-r--r--src/solvers/check.cl56
-rw-r--r--src/solvers/opencl.rs1
2 files changed, 40 insertions, 17 deletions
diff --git a/src/solvers/check.cl b/src/solvers/check.cl
index 0ad187a..2ef6406 100644
--- a/src/solvers/check.cl
+++ b/src/solvers/check.cl
@@ -1,21 +1,43 @@
+typedef ulong u64
+typedef uint u32
+
#pragma OPENCL EXTENSION cl_intel_printf : enable
-__kernel void check(__global unsigned long* permutations, __global int* results,
- unsigned long* instructions, unsigned int n, unsigned int w, unsigned long offset) {
- int id = get_global_id(0);
- unsigned long curr_mask = mask | permutations[id + offset];
+__kernel void check(__global u64* permutations, __global u64* results,
+ u64* instructions, __local u64* i_buffer, u32 n, u32 w, u64 offset) {
+ int gid = get_global_id(0);
+ int wid = get_local_id(0);
+ int gsize = get_global_size(0);
+ int wsize = get_local_size(0);
+ int wesize = get_enqueued_local_size(0);
+
+
+ int times = wsize / wesize;
+ times += !(wsize % wesize);
+ for (int i = 0; i < times; i++) {
+ i_buffer[wid + i] = instructions[wid + i]
+ }
+ work_group_barrier(mem_fence::local);
- unsigned long tmask, sum, stones;
- stones = tmask = sum = 0;
- for (int i = 1; i <= w + 1; i++) {
- if ((curr_mask & (1 << i)) == 0) {
- stones += 1;
- tmask |= 1 << (i - sum);
- sum = i;
- }
- }
- if (tmask == (1 << (n + 1)) - 2 && stones == n) {
- printf("test");
- results[id] = id;
- }
+ u64 result = 0;
+ unsigned long own = permutations[gid + offset];
+ for (int j = 0; j < wsize; j++) {
+ unsigned long curr_mask = i_buffer[j] | own;
+ unsigned long tmask, sum, stones;
+ stones = tmask = sum = 0;
+ for (int i = 1; i <= w + 1; i++) {
+ if ((curr_mask & (1 << i)) == 0) {
+ stones += 1;
+ tmask |= 1 << (i - sum);
+ sum = i;
+ }
+ }
+ if (tmask == (1 << (n + 1)) - 2 && stones == n) {
+ result |= 1 << (j & 63);
+ }
+ if (j & !(j & 63)) {
+ results[gid * (wsize / 64) + j / 64] = result;
+ result = 0;
+ }
+ }
}
diff --git a/src/solvers/opencl.rs b/src/solvers/opencl.rs
index 3406fd0..1293295 100644
--- a/src/solvers/opencl.rs
+++ b/src/solvers/opencl.rs
@@ -146,6 +146,7 @@ impl GpuSolver {
.arg(&self.permutations)
.arg(&result_buffer[i])
.arg(&instruction_buffer[i])
+ .arg_local::<u64>(self.wg_size)
.arg(self.n)
.arg(self.w)
.arg((self.n as u64 - i as u64 - 1) * chunk as u64)