diff options
author | Dennis Kobert <dennis@kobert.dev> | 2020-01-07 03:12:55 +0100 |
---|---|---|
committer | Dennis Kobert <dennis@kobert.dev> | 2020-01-07 03:12:55 +0100 |
commit | c36035c0667fdd224da914b50c30a9366e1a5c38 (patch) | |
tree | 4d67f21f89a138e9859540eb5d2446dec5660aef /src/solvers/opencl.rs | |
parent | 2750313bab1bff5b94734f949e633f04391577ab (diff) |
Fix global worgsize calculation
Diffstat (limited to 'src/solvers/opencl.rs')
-rw-r--r-- | src/solvers/opencl.rs | 75 |
1 files changed, 55 insertions, 20 deletions
diff --git a/src/solvers/opencl.rs b/src/solvers/opencl.rs index cdedd37..550ace9 100644 --- a/src/solvers/opencl.rs +++ b/src/solvers/opencl.rs @@ -6,6 +6,13 @@ pub struct Job { bitmask: u64, } +impl Job { + pub fn new(rows: Vec<u32>, bitmask: u64) -> Self { + Self { rows, bitmask } + } +} + +#[derive(Debug)] pub struct GpuSolver { #[allow(unused)] platform: Platform, @@ -18,11 +25,14 @@ pub struct GpuSolver { n: u32, h: u32, w: u32, + /// Workgroup size, set to 0 for max wg_size: usize, permutations: Buffer<u64>, rec_queues: Vec<RequestBuffer>, + walls: Vec<Vec<u32>>, } +#[derive(Debug)] struct RequestBuffer { mask_buff: Vec<u64>, row_buff: Vec<Vec<u32>>, @@ -33,8 +43,8 @@ struct RequestBuffer { impl RequestBuffer { pub fn new(size: usize, receiver: Receiver<Job>) -> Self { RequestBuffer { - mask_buff: Vec::with_capacity(size), - row_buff: Vec::with_capacity(size), + mask_buff: vec![0; size], + row_buff: vec![Vec::new(); size], pointer: 0, receiver, } @@ -85,15 +95,15 @@ impl GpuSolver { .len(permutation_masks.len()) .build()?; - let mut senders = Vec::with_capacity(h as usize); - let mut receivers = Vec::with_capacity(h as usize); + let mut senders = Vec::with_capacity((n - h + 1) as usize); + let mut receivers = Vec::with_capacity((n - h + 1) as usize); let max_wg_size = device.max_wg_size()?; if wg_size == 0 { wg_size = max_wg_size; } else if wg_size > max_wg_size { return Err(ocl::Error::from("invalid workgroup size")); } - for _ in 0..h { + for _ in 0..=(n - h) { let (sx, rx) = std::sync::mpsc::channel(); senders.push(sx); receivers.push(RequestBuffer::new(wg_size, rx)); @@ -111,34 +121,40 @@ impl GpuSolver { wg_size, permutations: buffer, rec_queues: receivers, + walls: Vec::new(), }; - std::thread::spawn(move || { - solver.run(); - }); + std::thread::Builder::new() + .name("GPU Deamon".into()) + .spawn(move || { + solver.run(); + }) + .unwrap(); + println!("started gpu thread"); Ok(senders) } fn get_dim(&self, queue: usize) -> usize { let chunk = self.permutations.len() / self.n as usize; let dim = (queue + 1) * chunk; - if dim % self.wg_size == 0 { - dim - } else { - let dim_small = dim / self.wg_size; - ((dim_small + 1) * chunk) - } + (dim + self.wg_size - 1) / self.wg_size * self.wg_size } fn get_off(&self, queue: usize) -> u64 { let chunk = self.permutations.len() / self.n as usize; - let dim = (queue + 1) * chunk; - ((self.n as usize - queue - 1) * chunk - self.wg_size + dim % self.wg_size) as u64 + let off = self.permutations.len() - chunk - self.get_dim(queue); + off as u64 + } + fn get_res(&self, queue: usize) -> usize { + let dim = self.get_dim(queue); + dim * self.get_res_save_dim() + } + fn get_res_save_dim(&self) -> usize { + (self.wg_size + 63) / 64 } fn run(mut self) -> ! { let queues = self.rec_queues.len(); let mut instruction_buffer = Vec::with_capacity((self.n - self.h) as usize); let mut result_buffer = Vec::with_capacity((self.n - self.h) as usize); - let chunk = self.permutations.len() / self.n as usize; for i in 0..queues { let buffer: Buffer<u64> = Buffer::builder() @@ -151,17 +167,21 @@ impl GpuSolver { instruction_buffer.push(buffer); let results: Buffer<u64> = Buffer::builder() .queue(self.queue.clone()) - .len(self.wg_size * self.get_dim(i) / 64) + .len(self.get_res(i)) .flags(flags::MEM_READ_WRITE) .build() .unwrap(); result_buffer.push(results); } + println!("finished gpu setup"); for i in (0..self.rec_queues.len()).cycle() { if let Some(buffer) = self.rec_queues[i].read() { instruction_buffer[i].write(buffer).enq().unwrap(); let dim = self.get_dim(i); + //println!("dim: {}", dim); + //println!("off: {}", self.get_off(i)); + //println!("result size: {}", self.get_res_save_dim()); let kernel = Kernel::builder() .program(&self.program) .name("check") @@ -189,7 +209,7 @@ impl GpuSolver { } // (5) Read results from the device into a vector (`::block` not shown): - let mut result = vec![0u64; dim * self.wg_size / 64]; + let mut result = vec![0u64; self.get_res(i)]; result_buffer[i] .cmd() .queue(&self.queue) @@ -197,7 +217,22 @@ impl GpuSolver { .read(&mut result) .enq() .unwrap(); - println!("{:?}", result); + for (j, r) in result.iter().enumerate() { + if j == 0 { + continue; + } + for b in 0..64 { + if r & (1 << b) != 0 { + let permutation = + j / self.get_res_save_dim() + self.get_off(i) as usize; + let instruction = (j % self.get_res_save_dim()) * 64 + b; + let mut wall = self.rec_queues[i].get_rows()[instruction].clone(); + wall.push(permutation as u32); + println!("{:?}", wall); + self.walls.push(wall); + } + } + } } } panic!(); |