use crate::permutations::PermutationGenerator; use crate::solvers::gpu::*; use crate::solvers::{wall_stats, IteratorSolver, Solver}; use rayon::prelude::*; use std::sync::mpsc::Sender; use std::sync::{Arc, Mutex}; #[derive(Debug)] pub struct GpuSolver { n: u32, h: u32, w: u32, chunk: u32, permutations: Vec>, masks: Vec, progress: Arc>, } impl GpuSolver { fn solve_to_vec(&mut self) -> Vec { let (sender, receiver) = std::sync::mpsc::channel(); let (sender, handle) = OclManager::launch_sevice(&self.permutations, &self.masks, self.n, 0, sender); let chunk = permutohedron::factorial(self.n as usize - 1) as u32; self.permute( 0, 0, ((0..(self.h - 2)).map(|x| x * chunk).collect::>()).as_ref(), sender.clone(), ); sender.send(Message::CpuDone).unwrap(); let mut walls = Vec::new(); while let Ok(Message::RowResult(wall)) = receiver.recv() { walls.push(wall); } //println!("{:?}", walls); handle.join().unwrap(); walls } fn permute(&self, index: usize, curr_mask: u64, numbers: &[u32], sender: Sender) { if curr_mask.count_ones() < index as u32 * (self.n - 1) { return; } let mut new_num = Vec::from(numbers); if index as usize == numbers.len() { let mut info = sys_info::mem_info().unwrap(); while info.avail < 1024 * 1024 { std::thread::sleep(std::time::Duration::from_millis(5)); info = sys_info::mem_info().unwrap(); println!("mem wait {:?}", info); } let i = self.n - 3 - numbers[index - 1] / self.chunk; sender .send(Message::CheckRequest(CheckRequest::new( new_num, curr_mask, i, ))) .unwrap(); return; } let start = numbers[index as usize] / self.chunk; for i in start..self.n - (self.h - 1 - index as u32) { for n in 1..(numbers.len() - index) { new_num[n + index] = (n as u32 + i) * self.chunk; } if index == 0 { let senders: Vec<_> = (0..self.chunk).map(|_| sender.clone()).collect(); (0..self.chunk) .into_par_iter() .zip(senders) .for_each(|(j, sender)| { let mut new_num = new_num.clone(); let tmp = i * self.chunk + j; new_num[index] = tmp; self.permute( index + 1, curr_mask | self.masks[tmp as usize], &new_num, sender, ); }); } else { for j in 0..self.chunk { let n_i = i * self.chunk + j; if curr_mask & (1 << (self.permutations[n_i as usize][0] + 1)) > 0 { continue; } new_num[index] = n_i; self.permute( index + 1, curr_mask | self.masks[new_num[index] as usize], &new_num, sender.clone(), ); } if index == 1 { let mut data = self.progress.lock().unwrap(); *data += 1; println!("progress: {}%", *data as f64 / self.chunk as f64 * 100.0); } } } } } fn generate_permutations(n: u32) -> Vec> { crate::permutations::HeapsPermutations::permutations(n) } fn generate_masks(permutations: &[Vec]) -> Vec { let mut masks = Vec::with_capacity(permutations.len()); for p in permutations { let mut v = 0; let mut x = 0u64; for i in p.iter().take(p.len() - 1).map(|i| { v += i; v }) { x |= 1 << i } masks.push(x) } masks } impl Solver for GpuSolver { fn new(n: u32) -> Self { let (h, w) = wall_stats(n); let permutations = generate_permutations(n); let masks = generate_masks(&permutations); let chunk = permutohedron::factorial(n as usize - 1) as u32; Self { n, h, w, chunk, permutations, masks, progress: Arc::new(Mutex::new(0)), } } fn n(&self) -> u32 { self.n } fn h(&self) -> u32 { self.h } fn w(&self) -> u32 { self.w } } impl IteratorSolver for GpuSolver { type IntoIter = std::vec::IntoIter; fn solve(mut self) -> Self::IntoIter { self.solve_to_vec().into_iter() } }