From 66cc693a770cf8668ddbddd60d35c8e8dacd55a9 Mon Sep 17 00:00:00 2001 From: Dennis Kobert Date: Mon, 6 Jan 2020 01:31:50 +0100 Subject: Implement gpu worker --- src/solvers/check.cl | 2 +- src/solvers/opencl.rs | 128 ++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 121 insertions(+), 9 deletions(-) (limited to 'src/solvers') diff --git a/src/solvers/check.cl b/src/solvers/check.cl index 94a81fc..0ad187a 100644 --- a/src/solvers/check.cl +++ b/src/solvers/check.cl @@ -1,6 +1,6 @@ #pragma OPENCL EXTENSION cl_intel_printf : enable __kernel void check(__global unsigned long* permutations, __global int* results, - unsigned long mask, unsigned int n, unsigned int w, unsigned long offset) { + unsigned long* instructions, unsigned int n, unsigned int w, unsigned long offset) { int id = get_global_id(0); unsigned long curr_mask = mask | permutations[id + offset]; diff --git a/src/solvers/opencl.rs b/src/solvers/opencl.rs index 676c5cc..3406fd0 100644 --- a/src/solvers/opencl.rs +++ b/src/solvers/opencl.rs @@ -1,4 +1,4 @@ -use ocl::{Buffer, Context, Device, Platform, Queue}; +use ocl::{flags, Buffer, Context, Device, Kernel, Platform, Program, Queue}; use std::sync::mpsc::{Receiver, Sender}; pub struct Job { @@ -10,16 +10,57 @@ pub struct GpuSolver { platform: Platform, device: Device, context: Context, + program: Program, queue: Queue, n: u32, h: u32, w: u32, + wg_size: usize, permutations: Buffer, - rec_queues: Vec>, + rec_queues: Vec, +} + +struct ReqestBuffer { + mask_buff: Vec, + row_buff: Vec>, + pointer: usize, + receiver: Receiver, +} + +impl ReqestBuffer { + pub fn new(size: usize, receiver: Receiver) -> ReqestBuffer { + Self { + mask_buff: Vec::with_capacity(size), + row_buff: Vec::with_capacity(size), + pointer: 0, + receiver, + } + } + pub fn read(&mut self) -> Option<&[u64]> { + for job in self.receiver.try_iter() { + self.mask_buff[self.pointer] = job.bitmask; + self.row_buff[self.pointer] = job.rows; + self.pointer += 1; + if self.pointer == self.mask_buff.len() { + self.pointer = 0; + return Some(self.mask_buff.as_ref()); + } + } + None + } + pub fn get_rows(&self) -> &[Vec] { + self.row_buff.as_ref() + } } impl GpuSolver { - pub fn new(permutation_masks: &[u64], n: u32, h: u32, w: u32) -> ocl::Result>> { + pub fn new( + permutation_masks: &[u64], + n: u32, + h: u32, + w: u32, + src: &str, + ) -> ocl::Result>> { let platform = ocl::Platform::default(); let device = ocl::Device::first(platform)?; let context = ocl::Context::builder() @@ -28,28 +69,36 @@ impl GpuSolver { .build()?; let queue = ocl::Queue::new(&context, device, None)?; + let program = Program::builder() + .devices(device) + .src(src) + .build(&context)?; let buffer = ocl::Buffer::builder() .queue(queue.clone()) + .flags(flags::MEM_READ_WRITE) .copy_host_slice(permutation_masks) .len(permutation_masks.len()) .build()?; let mut senders = Vec::with_capacity(h as usize); let mut receivers = Vec::with_capacity(h as usize); + let wg_size = device.max_wg_size()?; for _ in 0..h { let (sx, rx) = std::sync::mpsc::channel(); senders.push(sx); - receivers.push(rx); + receivers.push(ReqestBuffer::new(wg_size, rx)); } let solver = Self { platform, device, context, + program, queue, n, h, w, + wg_size, permutations: buffer, rec_queues: receivers, }; @@ -59,11 +108,74 @@ impl GpuSolver { Ok(senders) } - fn run(self) -> ! { - for rec in self.rec_queues.iter().cycle() { - //if rec. + fn run(mut self) -> ! { + let queues = self.rec_queues.len(); + let mut instruction_buffer = Vec::with_capacity((self.n - self.h) as usize); + let mut result_buffer = Vec::with_capacity((self.n - self.h) as usize); + let chunk = self.permutations.len() / self.n as usize; + + for i in 0..queues { + let buffer: Buffer = Buffer::builder() + .queue(self.queue.clone()) + .len(self.wg_size) + .flags(flags::MEM_READ_WRITE) + .build() + .unwrap(); + + instruction_buffer.push(buffer); + let dim = (i + 1) * chunk; + let results: Buffer = Buffer::builder() + .queue(self.queue.clone()) + .len(self.wg_size * dim / 64) + .flags(flags::MEM_READ_WRITE) + .build() + .unwrap(); + result_buffer.push(results); + } + for i in (0..self.rec_queues.len()).cycle() { + if let Some(buffer) = self.rec_queues[i].read() { + instruction_buffer[i].write(buffer).enq().unwrap(); + println!("hello world"); + let dim = (i + 1) * chunk; + + let kernel = Kernel::builder() + .program(&self.program) + .name("check") + .queue(self.queue.clone()) + .global_work_size(dim) + .arg(&self.permutations) + .arg(&result_buffer[i]) + .arg(&instruction_buffer[i]) + .arg(self.n) + .arg(self.w) + .arg((self.n as u64 - i as u64 - 1) * chunk as u64) + .build() + .unwrap(); + + unsafe { + kernel + .cmd() + .queue(&self.queue) + .global_work_offset(kernel.default_global_work_offset()) + .global_work_size(dim) + .local_work_size(self.wg_size) + .enq() + .unwrap(); + } + + // (5) Read results from the device into a vector (`::block` not shown): + let mut result = vec![0u64; dim * self.wg_size / 64]; + result_buffer[i] + .cmd() + .queue(&self.queue) + .offset(0) + .read(&mut result) + .enq() + .unwrap(); + println!("{:?}", result); + } } - loop {} + panic!(); } } /* -- cgit v1.2.3-54-g00ecf