From 1a9d0d91e08d0dee9b48c05ef0ac72cc732a42ab Mon Sep 17 00:00:00 2001 From: Dennis Kobert Date: Mon, 6 Jan 2020 08:33:57 +0100 Subject: Backport to opencl 1.2 --- src/solvers/opencl.rs | 42 ++++++++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 12 deletions(-) (limited to 'src/solvers/opencl.rs') diff --git a/src/solvers/opencl.rs b/src/solvers/opencl.rs index 1293295..8421d53 100644 --- a/src/solvers/opencl.rs +++ b/src/solvers/opencl.rs @@ -7,8 +7,11 @@ pub struct Job { } pub struct GpuSolver { + #[allow(unused)] platform: Platform, + #[allow(unused)] device: Device, + #[allow(unused)] context: Context, program: Program, queue: Queue, @@ -17,19 +20,19 @@ pub struct GpuSolver { w: u32, wg_size: usize, permutations: Buffer, - rec_queues: Vec, + rec_queues: Vec, } -struct ReqestBuffer { +struct RequestBuffer { mask_buff: Vec, row_buff: Vec>, pointer: usize, receiver: Receiver, } -impl ReqestBuffer { - pub fn new(size: usize, receiver: Receiver) -> ReqestBuffer { - Self { +impl RequestBuffer { + pub fn new(size: usize, receiver: Receiver) -> Self { + RequestBuffer { mask_buff: Vec::with_capacity(size), row_buff: Vec::with_capacity(size), pointer: 0, @@ -48,13 +51,14 @@ impl ReqestBuffer { } None } + #[allow(unused)] pub fn get_rows(&self) -> &[Vec] { self.row_buff.as_ref() } } impl GpuSolver { - pub fn new( + pub fn launch_sevice( permutation_masks: &[u64], n: u32, h: u32, @@ -86,7 +90,7 @@ impl GpuSolver { for _ in 0..h { let (sx, rx) = std::sync::mpsc::channel(); senders.push(sx); - receivers.push(ReqestBuffer::new(wg_size, rx)); + receivers.push(RequestBuffer::new(wg_size, rx)); } let solver = Self { @@ -108,6 +112,22 @@ impl GpuSolver { Ok(senders) } + fn get_dim(&self, queue: usize) -> usize { + let chunk = self.permutations.len() / self.n as usize; + let dim = (queue + 1) * chunk; + if dim % self.wg_size == 0 { + dim + } else { + let dim_small = dim / self.wg_size; + ((dim_small + 1) * chunk) + } + } + fn get_off(&self, queue: usize) -> u64 { + let chunk = self.permutations.len() / self.n as usize; + let dim = (queue + 1) * chunk; + ((self.n as usize - queue - 1) * chunk - self.wg_size + dim % self.wg_size) as u64 + } + fn run(mut self) -> ! { let queues = self.rec_queues.len(); let mut instruction_buffer = Vec::with_capacity((self.n - self.h) as usize); @@ -123,10 +143,9 @@ impl GpuSolver { .unwrap(); instruction_buffer.push(buffer); - let dim = (i + 1) * chunk; let results: Buffer = Buffer::builder() .queue(self.queue.clone()) - .len(self.wg_size * dim / 64) + .len(self.wg_size * self.get_dim(i) / 64) .flags(flags::MEM_READ_WRITE) .build() .unwrap(); @@ -135,8 +154,7 @@ impl GpuSolver { for i in (0..self.rec_queues.len()).cycle() { if let Some(buffer) = self.rec_queues[i].read() { instruction_buffer[i].write(buffer).enq().unwrap(); - println!("hello world"); - let dim = (i + 1) * chunk; + let dim = self.get_dim(i); let kernel = Kernel::builder() .program(&self.program) @@ -149,7 +167,7 @@ impl GpuSolver { .arg_local::(self.wg_size) .arg(self.n) .arg(self.w) - .arg((self.n as u64 - i as u64 - 1) * chunk as u64) + .arg(self.get_off(i)) .build() .unwrap(); -- cgit v1.2.3-54-g00ecf