//#pragma OPENCL EXTENSION cl_intel_printf : enable typedef ulong u64; typedef uint u32; __kernel void check(__global u64* permutations, __global u64* results, __local u64* instructions, __local u64* i_buffer, u32 n, u32 w, u64 offset) { int gid = get_global_id(0); int wid = get_local_id(0); int gsize = get_global_size(0); int wsize = get_local_size(0); //int wesize = get_enqueued_local_size(0); //int times = wsize / wesize; //times += !(wsize % wesize); //for (int i = 0; i < times; i++) { int i = 0; i_buffer[wid + i] = instructions[wid + i]; //} barrier(CLK_LOCAL_MEM_FENCE); u64 result = 0; unsigned long own = permutations[gid + offset]; for (int j = 0; j < wsize; j++) { unsigned long curr_mask = i_buffer[j] | own; unsigned long tmask, sum, stones; stones = tmask = sum = 0; for (int i = 1; i <= w + 1; i++) { if ((curr_mask & (1 << i)) == 0) { stones += 1; tmask |= 1 << (i - sum); sum = i; } } if (tmask == (1 << (n + 1)) - 2 && stones == n) { result |= 1 << (j & 63); } if (j & !(j & 63)) { results[gid * (wsize / 64) + j / 64] = result; result = 0; } } }