use super::gpu::*; /// Solve for a given N and return the resulting wall pub struct NormalSolver { pub n: u32, /// calculated height [might not be correct!] pub h: u32, /// width pub w: u32, pub chunk: u32, pub mask: u64, /// Use to store already used blocks as a bitmask permutations: Vec>, masks: Vec, gpu_sender: std::sync::mpsc::Sender, gpu_handle: Option>, } impl NormalSolver { pub fn new(n: u32) -> Self { let h = n / 2 + 1; let w = h * (n - 1); let mut heap: Vec<_> = (1..=n).collect(); let heap = permutohedron::Heap::new(&mut heap); let n_f = permutohedron::factorial(n as usize); let chunk = n_f as u32 / n; let mut permutations = Vec::with_capacity(n_f); let mut masks: Vec = vec![0; n_f]; println!("Generating permutations"); for (j, data) in heap.enumerate() { permutations.push(data.clone()); let mut sum = 0; for stone in permutations[j].iter().take(n as usize - 1) { //.take(n as usize - 1) { sum += stone; masks[j] |= 1 << sum; } } let (sender, receiver) = std::sync::mpsc::channel(); let (gpu_sender, gpu_handle) = super::gpu::OclManager::launch_sevice(&permutations, &masks, n, 0, sender); Self { n, h, w, chunk, mask: (1 << w) - 2, permutations, masks, gpu_sender, gpu_handle: Some(gpu_handle), } } pub fn solve(&mut self) { //for (n, i) in self.permutations.iter().enumerate() { //let tmp: Vec = i.clone(); //println!("perm {}: {:?}", n, tmp); //println!("perm {}: {:b}", n, self.masks[n]); //} println!("calculate results"); self.permute( permutohedron::factorial(self.n as usize), 0, 0, ((0..(self.h - 1)) .map(|x| x * self.chunk) .collect::>()) .as_ref(), ); self.gpu_sender.send(super::gpu::Message::CpuDone).unwrap(); self.gpu_handle.take().unwrap().join().unwrap(); } fn permute(&self, up: usize, index: usize, curr_mask: u64, numbers: &[u32]) { if curr_mask.count_ones() < index as u32 * (self.n - 1) { return; } let mut new_num = Vec::from(numbers); let start = numbers[index as usize] / self.chunk; if index as usize == numbers.len() - 1 { //#[cfg(feature = "gpu")] //{ let mut info = sys_info::mem_info().unwrap(); while info.avail < info.total / 8 { std::thread::sleep_ms(50); info = sys_info::mem_info().unwrap(); println!("mem wait {:?}", info); } let i = self.n - 2 - numbers[index] / self.chunk; self.gpu_sender .send(Message::CheckRequest(CheckRequest::new( new_num, curr_mask, i, ))) .unwrap(); return; //} } for i in start..self.n - (self.h - 1 - index as u32) { for n in 1..(numbers.len() - index) { new_num[n + index] = (n as u32 + i) * self.chunk; } /*if index == 0 { (0..self.chunk).into_par_iter().for_each(|j| { let mut new_num = new_num.clone(); let tmp = i * self.chunk + j; new_num[index] = tmp; self.permute( up, index + 1, curr_mask | self.masks[tmp as usize], &new_num, ); }); } else {*/ for j in 0..self.chunk { new_num[index] = i * self.chunk + j; if index == 0 { println!("progress: {}%", j as f64 / self.chunk as f64); } self.permute( up, index + 1, curr_mask | self.masks[new_num[index] as usize], &new_num, ); } //} } } }