diff options
Diffstat (limited to 'src/solvers/gpusolver.rs')
-rw-r--r-- | src/solvers/gpusolver.rs | 95 |
1 files changed, 79 insertions, 16 deletions
diff --git a/src/solvers/gpusolver.rs b/src/solvers/gpusolver.rs index 3bf8429..371f116 100644 --- a/src/solvers/gpusolver.rs +++ b/src/solvers/gpusolver.rs @@ -1,36 +1,97 @@ use crate::permutations::PermutationGenerator; -use crate::solvers::{gpu, wall_stats, IteratorSolver, Solver}; -use crate::structs::StoneWall; +use crate::solvers::gpu::*; +use crate::solvers::{wall_stats, IteratorSolver, Solver}; +use rayon::prelude::*; +use std::sync::mpsc::Sender; #[derive(Debug)] pub struct GpuSolver { n: u32, h: u32, w: u32, + chunk: u32, permutations: Vec<Vec<u32>>, masks: Vec<u64>, } impl GpuSolver { - fn solve_to_vec(&mut self) -> Vec<StoneWall> { - let src = - std::fs::read_to_string("src/solvers/check.cl").expect("failed to open kernel file"); - + fn solve_to_vec(&mut self) -> Vec<RowResult> { + let (sender, receiver) = std::sync::mpsc::channel(); let (sender, handle) = - gpu::OclManager::launch_sevice(&self.permutations, &self.masks, self.n, 4); - for i in 0..12 { + OclManager::launch_sevice(&self.permutations, &self.masks, self.n, 0, sender); + let chunk = permutohedron::factorial(self.n as usize - 1) as u32; + self.permute( + 0, + 0, + ((0..(self.h - 1)).map(|x| x * chunk).collect::<Vec<u32>>()).as_ref(), + sender.clone(), + ); + sender.send(Message::CpuDone).unwrap(); + let mut walls = Vec::new(); + while let Ok(Message::RowResult(wall)) = receiver.recv() { + walls.push(wall); + } + println!("{:?}", walls); + handle.join().unwrap(); + walls + } + + fn permute(&self, index: usize, curr_mask: u64, numbers: &[u32], sender: Sender<Message>) { + if curr_mask.count_ones() < index as u32 * (self.n - 1) { + return; + } + let mut new_num = Vec::from(numbers); + let start = numbers[index as usize] / self.chunk; + if index as usize == numbers.len() - 1 { + let mut info = sys_info::mem_info().unwrap(); + while info.avail < info.total / 8 { + std::thread::sleep(std::time::Duration::from_millis(5)); + info = sys_info::mem_info().unwrap(); + println!("mem wait {:?}", info); + } + let i = self.n - 2 - numbers[index] / self.chunk; sender - .send(gpu::Message::CheckRequest(gpu::CheckRequest::new( - vec![i as u32], - self.masks[i], - 1 - i as u32 / 6, + .send(Message::CheckRequest(CheckRequest::new( + new_num, curr_mask, i, ))) .unwrap(); + return; } - loop { - std::thread::sleep(std::time::Duration::from_secs(5)); + for i in start..self.n - (self.h - 1 - index as u32) { + for n in 1..(numbers.len() - index) { + new_num[n + index] = (n as u32 + i) * self.chunk; + } + if index == 0 { + let senders: Vec<_> = (0..self.chunk).map(|_| sender.clone()).collect(); + (0..self.chunk) + .into_par_iter() + .zip(senders) + .for_each(|(j, sender)| { + let mut new_num = new_num.clone(); + let tmp = i * self.chunk + j; + new_num[index] = tmp; + self.permute( + index + 1, + curr_mask | self.masks[tmp as usize], + &new_num, + sender, + ); + }); + } else { + for j in 0..self.chunk { + new_num[index] = i * self.chunk + j; + if index == 0 { + println!("progress: {}%", j as f64 / self.chunk as f64); + } + self.permute( + index + 1, + curr_mask | self.masks[new_num[index] as usize], + &new_num, + sender.clone(), + ); + } + } } - vec![] } } @@ -59,10 +120,12 @@ impl Solver for GpuSolver { let (h, w) = wall_stats(n); let permutations = generate_permutations(n); let masks = generate_masks(&permutations); + let chunk = permutohedron::factorial(n as usize - 1) as u32; Self { n, h, w, + chunk, permutations, masks, } @@ -79,7 +142,7 @@ impl Solver for GpuSolver { } impl IteratorSolver for GpuSolver { - type IntoIter = std::vec::IntoIter<StoneWall>; + type IntoIter = std::vec::IntoIter<RowResult>; fn solve(mut self) -> Self::IntoIter { self.solve_to_vec().into_iter() } |