From 8708a172ebe59d3189b8b9d756abd9da8dc509a3 Mon Sep 17 00:00:00 2001 From: Dennis Kobert Date: Sun, 12 Jan 2020 09:51:22 +0100 Subject: Allow to use teh iterator for results --- src/main.rs | 10 ++--- src/solvers/gpu/host.rs | 7 +++- src/solvers/gpu/manager.rs | 17 +++++---- src/solvers/gpu/mod.rs | 22 +++++++++++ src/solvers/gpu/output.rs | 34 ++++++++--------- src/solvers/gpusolver.rs | 95 ++++++++++++++++++++++++++++++++++++++-------- src/solvers/mod.rs | 6 +-- src/solvers/single.rs | 7 ++-- 8 files changed, 143 insertions(+), 55 deletions(-) diff --git a/src/main.rs b/src/main.rs index c2643c3..ee7082d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,11 +5,11 @@ use crate::solvers::{IteratorSolver, Solver}; pub static N: u32 = 8; fn main() { - let mut solver = solvers::single::NormalSolver::new(N); - solver.solve(); - /*let solver = solvers::gpusolver::GpuSolver::new(N); - println!("solver: {:?}", solver); + //let mut solver = solvers::single::NormalSolver::new(N); + //solver.solve(); + let solver = solvers::gpusolver::GpuSolver::new(N); + //println!("solver: {:?}", solver); for (i, solution) in solver.solve().enumerate() { println!("{}: {:?}", i, solution); - }*/ + } } diff --git a/src/solvers/gpu/host.rs b/src/solvers/gpu/host.rs index e354263..d67138e 100644 --- a/src/solvers/gpu/host.rs +++ b/src/solvers/gpu/host.rs @@ -123,8 +123,11 @@ impl Host { println!("finished gpu setup"); loop { match self.receiver.recv().expect("Channel to Host broke") { + Message::CpuDone => { + self.output_sender.send(Message::CpuDone); + return; + } Message::Terminate => { - self.output_sender.send(Message::Terminate); return; } Message::HostMessage((id, i, buffer)) => { @@ -184,7 +187,7 @@ impl Host { ))) .unwrap(); } - _ => println!("Invalid MessageType"), + m => println!("Invalid MessageType {:?} recived by host", m), } } } diff --git a/src/solvers/gpu/manager.rs b/src/solvers/gpu/manager.rs index a2253aa..def1f35 100644 --- a/src/solvers/gpu/manager.rs +++ b/src/solvers/gpu/manager.rs @@ -1,4 +1,4 @@ -use super::{CheckRequest, Message}; +use super::{CheckRequest, Message, RowResult}; use std::sync::mpsc::{channel, Receiver, Sender}; use std::thread::JoinHandle; @@ -36,7 +36,7 @@ pub struct OclManager { receiver: Receiver, buffers: Vec, output_handle: JoinHandle<()>, - host_handle: JoinHandle<()>, + host_handle: Option>, } impl OclManager { @@ -46,6 +46,7 @@ impl OclManager { n: u32, // Workgroup size, set to 0 for max mut wg_size: usize, + result_output: Sender, ) -> (Sender, JoinHandle<()>) { let (h, w) = crate::solvers::wall_stats(n); let src = include_str!("check.cl"); @@ -60,7 +61,7 @@ impl OclManager { } let (output_sender, output_handle) = - super::output::Output::launch_sevice(permutations, permutations_mask); + super::output::Output::launch_sevice(permutations, permutations_mask, result_output); let (host_sender, host_handle) = super::host::Host::launch_sevice( permutations_mask, n, @@ -87,7 +88,7 @@ impl OclManager { receiver, buffers, output_handle, - host_handle, + host_handle: Some(host_handle), }; ( sender, @@ -115,10 +116,12 @@ impl OclManager { self.job_id += 1; } } - Message::Terminate => { + Message::CpuDone => { //TODO panic!("flush buffers"); - self.host_sender.send(Message::Terminate); - self.host_handle.join(); + self.host_sender.send(Message::CpuDone); + self.host_handle.take().unwrap().join(); + } + Message::Terminate => { self.output_sender.send(Message::Terminate); self.output_handle.join(); return; diff --git a/src/solvers/gpu/mod.rs b/src/solvers/gpu/mod.rs index 2c7f69d..e89f033 100644 --- a/src/solvers/gpu/mod.rs +++ b/src/solvers/gpu/mod.rs @@ -7,14 +7,19 @@ pub use manager::*; type MaskMessage = (u64, u32, Vec); type RowMessage = (u64, Vec>); +#[derive(Debug)] pub enum Message { CheckRequest(CheckRequest), HostMessage(MaskMessage), OutputMessage(RowMessage), ResultMessage(ResultMessage), + RowResult(RowResult), Terminate, + CpuDone, + GpuDone, } +#[derive(Debug)] pub struct ResultMessage { data: Vec, offset: usize, @@ -48,6 +53,7 @@ impl ResultMessage { } } +#[derive(Debug)] pub struct CheckRequest { rows: Vec, bitmask: u64, @@ -63,3 +69,19 @@ impl CheckRequest { } } } + +#[derive(Clone, PartialEq, Eq, Hash, Debug)] +pub struct RowResult { + rows: Vec, +} + +impl RowResult { + fn new(mut rows: Vec) -> Self { + rows.push(0); + rows.sort(); + Self { rows } + } + fn output(&self) { + println!("{:?}", self.rows); + } +} diff --git a/src/solvers/gpu/output.rs b/src/solvers/gpu/output.rs index 43e0d98..6bf7bcb 100644 --- a/src/solvers/gpu/output.rs +++ b/src/solvers/gpu/output.rs @@ -1,4 +1,4 @@ -use super::{Message, ResultMessage}; +use super::{Message, ResultMessage, RowResult}; use std::collections::{HashMap, HashSet}; use std::sync::mpsc::{channel, Receiver, Sender}; use std::thread::JoinHandle; @@ -44,6 +44,9 @@ impl InBuffer { self.row_requests.insert(id, output); } } + Message::CpuDone => { + return None; + } Message::Terminate => { return None; } @@ -66,33 +69,19 @@ impl InBuffer { } } -#[derive(PartialEq, Eq, Hash)] -pub struct RowResult { - rows: Vec, -} - -impl RowResult { - fn new(mut rows: Vec) -> Self { - rows.push(0); - rows.sort(); - Self { rows } - } - fn output(&self) { - println!("{:?}", self.rows); - } -} - pub struct Output { input: InBuffer, permutations: Vec>, permutations_mask: Vec, results: HashSet, + result_sender: Sender, } impl Output { pub fn launch_sevice( permutations: &[Vec], permutations_mask: &[u64], + result_sender: Sender, ) -> (Sender, JoinHandle<()>) { let (sender, receiver) = channel(); let input = InBuffer::new(receiver); @@ -102,11 +91,12 @@ impl Output { permutations: permutations.into(), permutations_mask: permutations_mask.into(), results: HashSet::new(), + result_sender, }; ( sender, std::thread::Builder::new() - .name("GPU Manager Deamon".into()) + .name("GPU Output Deamon".into()) .spawn(move || { output.run(); }) @@ -118,12 +108,20 @@ impl Output { loop { if let Some(walls) = self.input.read() { for wall in walls { + if !self.results.contains(&wall) { + self.result_sender + .send(Message::RowResult(wall.clone())) + .or_else(|_| Err(println!("Failed to transmit result back"))); + } self.results.insert(wall); } } else { for wall in self.results { wall.output() } + self.result_sender.send(Message::GpuDone).unwrap(); + // wait for second exit signal + self.input.read(); return; } } diff --git a/src/solvers/gpusolver.rs b/src/solvers/gpusolver.rs index 3bf8429..371f116 100644 --- a/src/solvers/gpusolver.rs +++ b/src/solvers/gpusolver.rs @@ -1,36 +1,97 @@ use crate::permutations::PermutationGenerator; -use crate::solvers::{gpu, wall_stats, IteratorSolver, Solver}; -use crate::structs::StoneWall; +use crate::solvers::gpu::*; +use crate::solvers::{wall_stats, IteratorSolver, Solver}; +use rayon::prelude::*; +use std::sync::mpsc::Sender; #[derive(Debug)] pub struct GpuSolver { n: u32, h: u32, w: u32, + chunk: u32, permutations: Vec>, masks: Vec, } impl GpuSolver { - fn solve_to_vec(&mut self) -> Vec { - let src = - std::fs::read_to_string("src/solvers/check.cl").expect("failed to open kernel file"); - + fn solve_to_vec(&mut self) -> Vec { + let (sender, receiver) = std::sync::mpsc::channel(); let (sender, handle) = - gpu::OclManager::launch_sevice(&self.permutations, &self.masks, self.n, 4); - for i in 0..12 { + OclManager::launch_sevice(&self.permutations, &self.masks, self.n, 0, sender); + let chunk = permutohedron::factorial(self.n as usize - 1) as u32; + self.permute( + 0, + 0, + ((0..(self.h - 1)).map(|x| x * chunk).collect::>()).as_ref(), + sender.clone(), + ); + sender.send(Message::CpuDone).unwrap(); + let mut walls = Vec::new(); + while let Ok(Message::RowResult(wall)) = receiver.recv() { + walls.push(wall); + } + println!("{:?}", walls); + handle.join().unwrap(); + walls + } + + fn permute(&self, index: usize, curr_mask: u64, numbers: &[u32], sender: Sender) { + if curr_mask.count_ones() < index as u32 * (self.n - 1) { + return; + } + let mut new_num = Vec::from(numbers); + let start = numbers[index as usize] / self.chunk; + if index as usize == numbers.len() - 1 { + let mut info = sys_info::mem_info().unwrap(); + while info.avail < info.total / 8 { + std::thread::sleep(std::time::Duration::from_millis(5)); + info = sys_info::mem_info().unwrap(); + println!("mem wait {:?}", info); + } + let i = self.n - 2 - numbers[index] / self.chunk; sender - .send(gpu::Message::CheckRequest(gpu::CheckRequest::new( - vec![i as u32], - self.masks[i], - 1 - i as u32 / 6, + .send(Message::CheckRequest(CheckRequest::new( + new_num, curr_mask, i, ))) .unwrap(); + return; } - loop { - std::thread::sleep(std::time::Duration::from_secs(5)); + for i in start..self.n - (self.h - 1 - index as u32) { + for n in 1..(numbers.len() - index) { + new_num[n + index] = (n as u32 + i) * self.chunk; + } + if index == 0 { + let senders: Vec<_> = (0..self.chunk).map(|_| sender.clone()).collect(); + (0..self.chunk) + .into_par_iter() + .zip(senders) + .for_each(|(j, sender)| { + let mut new_num = new_num.clone(); + let tmp = i * self.chunk + j; + new_num[index] = tmp; + self.permute( + index + 1, + curr_mask | self.masks[tmp as usize], + &new_num, + sender, + ); + }); + } else { + for j in 0..self.chunk { + new_num[index] = i * self.chunk + j; + if index == 0 { + println!("progress: {}%", j as f64 / self.chunk as f64); + } + self.permute( + index + 1, + curr_mask | self.masks[new_num[index] as usize], + &new_num, + sender.clone(), + ); + } + } } - vec![] } } @@ -59,10 +120,12 @@ impl Solver for GpuSolver { let (h, w) = wall_stats(n); let permutations = generate_permutations(n); let masks = generate_masks(&permutations); + let chunk = permutohedron::factorial(n as usize - 1) as u32; Self { n, h, w, + chunk, permutations, masks, } @@ -79,7 +142,7 @@ impl Solver for GpuSolver { } impl IteratorSolver for GpuSolver { - type IntoIter = std::vec::IntoIter; + type IntoIter = std::vec::IntoIter; fn solve(mut self) -> Self::IntoIter { self.solve_to_vec().into_iter() } diff --git a/src/solvers/mod.rs b/src/solvers/mod.rs index 189deca..5b7a495 100644 --- a/src/solvers/mod.rs +++ b/src/solvers/mod.rs @@ -3,7 +3,7 @@ pub mod gpu; pub mod gpusolver; pub mod single; -use crate::structs::StoneWall; +//use crate::structs::StoneWall; pub use gpu::*; /// calculate h and w @@ -20,10 +20,10 @@ pub trait Solver { } pub trait FirstSolver { - fn solve(self) -> StoneWall; + fn solve(self) -> RowResult; } pub trait IteratorSolver: Solver { - type IntoIter: Iterator; + type IntoIter: Iterator; fn solve(self) -> Self::IntoIter; } diff --git a/src/solvers/single.rs b/src/solvers/single.rs index 6bfdd5f..471c283 100644 --- a/src/solvers/single.rs +++ b/src/solvers/single.rs @@ -37,8 +37,9 @@ impl NormalSolver { } } + let (sender, receiver) = std::sync::mpsc::channel(); let (gpu_sender, gpu_handle) = - super::gpu::OclManager::launch_sevice(&permutations, &masks, n, 0); + super::gpu::OclManager::launch_sevice(&permutations, &masks, n, 0, sender); Self { n, h, @@ -68,9 +69,7 @@ impl NormalSolver { .collect::>()) .as_ref(), ); - self.gpu_sender - .send(super::gpu::Message::Terminate) - .unwrap(); + self.gpu_sender.send(super::gpu::Message::CpuDone).unwrap(); self.gpu_handle.take().unwrap().join().unwrap(); } -- cgit v1.2.3-54-g00ecf