From 1a9d0d91e08d0dee9b48c05ef0ac72cc732a42ab Mon Sep 17 00:00:00 2001 From: Dennis Kobert Date: Mon, 6 Jan 2020 08:33:57 +0100 Subject: Backport to opencl 1.2 --- src/solvers/check.cl | 23 ++++++++++++----------- src/solvers/gpusolver.rs | 29 ++++++++++++++++++++++------- src/solvers/opencl.rs | 42 ++++++++++++++++++++++++++++++------------ 3 files changed, 64 insertions(+), 30 deletions(-) (limited to 'src') diff --git a/src/solvers/check.cl b/src/solvers/check.cl index 2ef6406..7ff39df 100644 --- a/src/solvers/check.cl +++ b/src/solvers/check.cl @@ -1,22 +1,23 @@ -typedef ulong u64 -typedef uint u32 +//#pragma OPENCL EXTENSION cl_intel_printf : enable +typedef ulong u64; +typedef uint u32; -#pragma OPENCL EXTENSION cl_intel_printf : enable __kernel void check(__global u64* permutations, __global u64* results, - u64* instructions, __local u64* i_buffer, u32 n, u32 w, u64 offset) { + __local u64* instructions, __local u64* i_buffer, u32 n, u32 w, u64 offset) { int gid = get_global_id(0); int wid = get_local_id(0); int gsize = get_global_size(0); int wsize = get_local_size(0); - int wesize = get_enqueued_local_size(0); + //int wesize = get_enqueued_local_size(0); - int times = wsize / wesize; - times += !(wsize % wesize); - for (int i = 0; i < times; i++) { - i_buffer[wid + i] = instructions[wid + i] - } - work_group_barrier(mem_fence::local); + //int times = wsize / wesize; + //times += !(wsize % wesize); + //for (int i = 0; i < times; i++) { + int i = 0; + i_buffer[wid + i] = instructions[wid + i]; + //} + barrier(CLK_LOCAL_MEM_FENCE); u64 result = 0; unsigned long own = permutations[gid + offset]; diff --git a/src/solvers/gpusolver.rs b/src/solvers/gpusolver.rs index 2b9eb4a..69c400e 100644 --- a/src/solvers/gpusolver.rs +++ b/src/solvers/gpusolver.rs @@ -1,16 +1,29 @@ -use crate::solver::{wall_stats, Solver, IteratorSolver}; -use crate::structs::StoneWall; use crate::permutations::PermutationGenerator; +use crate::solver::{wall_stats, IteratorSolver, Solver}; +use crate::structs::StoneWall; #[derive(Debug)] pub struct GpuSolver { - n: u32, h: u32, w: u32, + n: u32, + h: u32, + w: u32, permutations: Vec>, masks: Vec, } impl GpuSolver { fn solve_to_vec(&mut self) -> Vec { + let src = + std::fs::read_to_string("src/solvers/check.cl").expect("failed to open kernel file"); + + crate::solvers::opencl::GpuSolver::launch_sevice( + &self.masks, + self.n, + self.h, + self.w, + src.as_ref(), + ) + .unwrap(); vec![] } } @@ -25,9 +38,9 @@ fn generate_masks(permutations: &[Vec]) -> Vec { let mut v = 0; let mut x = 0u64; for i in p.iter().take(p.len() - 1).map(|i| { - v += i; - v - }) { + v += i; + v + }) { x |= 1 << i } masks.push(x) @@ -41,7 +54,9 @@ impl Solver for GpuSolver { let permutations = generate_permutations(n); let masks = generate_masks(&permutations); Self { - n, h, w, + n, + h, + w, permutations, masks, } diff --git a/src/solvers/opencl.rs b/src/solvers/opencl.rs index 1293295..8421d53 100644 --- a/src/solvers/opencl.rs +++ b/src/solvers/opencl.rs @@ -7,8 +7,11 @@ pub struct Job { } pub struct GpuSolver { + #[allow(unused)] platform: Platform, + #[allow(unused)] device: Device, + #[allow(unused)] context: Context, program: Program, queue: Queue, @@ -17,19 +20,19 @@ pub struct GpuSolver { w: u32, wg_size: usize, permutations: Buffer, - rec_queues: Vec, + rec_queues: Vec, } -struct ReqestBuffer { +struct RequestBuffer { mask_buff: Vec, row_buff: Vec>, pointer: usize, receiver: Receiver, } -impl ReqestBuffer { - pub fn new(size: usize, receiver: Receiver) -> ReqestBuffer { - Self { +impl RequestBuffer { + pub fn new(size: usize, receiver: Receiver) -> Self { + RequestBuffer { mask_buff: Vec::with_capacity(size), row_buff: Vec::with_capacity(size), pointer: 0, @@ -48,13 +51,14 @@ impl ReqestBuffer { } None } + #[allow(unused)] pub fn get_rows(&self) -> &[Vec] { self.row_buff.as_ref() } } impl GpuSolver { - pub fn new( + pub fn launch_sevice( permutation_masks: &[u64], n: u32, h: u32, @@ -86,7 +90,7 @@ impl GpuSolver { for _ in 0..h { let (sx, rx) = std::sync::mpsc::channel(); senders.push(sx); - receivers.push(ReqestBuffer::new(wg_size, rx)); + receivers.push(RequestBuffer::new(wg_size, rx)); } let solver = Self { @@ -108,6 +112,22 @@ impl GpuSolver { Ok(senders) } + fn get_dim(&self, queue: usize) -> usize { + let chunk = self.permutations.len() / self.n as usize; + let dim = (queue + 1) * chunk; + if dim % self.wg_size == 0 { + dim + } else { + let dim_small = dim / self.wg_size; + ((dim_small + 1) * chunk) + } + } + fn get_off(&self, queue: usize) -> u64 { + let chunk = self.permutations.len() / self.n as usize; + let dim = (queue + 1) * chunk; + ((self.n as usize - queue - 1) * chunk - self.wg_size + dim % self.wg_size) as u64 + } + fn run(mut self) -> ! { let queues = self.rec_queues.len(); let mut instruction_buffer = Vec::with_capacity((self.n - self.h) as usize); @@ -123,10 +143,9 @@ impl GpuSolver { .unwrap(); instruction_buffer.push(buffer); - let dim = (i + 1) * chunk; let results: Buffer = Buffer::builder() .queue(self.queue.clone()) - .len(self.wg_size * dim / 64) + .len(self.wg_size * self.get_dim(i) / 64) .flags(flags::MEM_READ_WRITE) .build() .unwrap(); @@ -135,8 +154,7 @@ impl GpuSolver { for i in (0..self.rec_queues.len()).cycle() { if let Some(buffer) = self.rec_queues[i].read() { instruction_buffer[i].write(buffer).enq().unwrap(); - println!("hello world"); - let dim = (i + 1) * chunk; + let dim = self.get_dim(i); let kernel = Kernel::builder() .program(&self.program) @@ -149,7 +167,7 @@ impl GpuSolver { .arg_local::(self.wg_size) .arg(self.n) .arg(self.w) - .arg((self.n as u64 - i as u64 - 1) * chunk as u64) + .arg(self.get_off(i)) .build() .unwrap(); -- cgit v1.2.3-54-g00ecf