diff options
Diffstat (limited to 'src')
-rw-r--r--[-rwxr-xr-x] | src/main.rs | 5 | ||||
-rwxr-xr-x | src/solver.rs | 23 | ||||
-rw-r--r-- | src/solvers/gpu/check.cl (renamed from src/solvers/check.cl) | 0 | ||||
-rw-r--r-- | src/solvers/gpu/host.rs | 235 | ||||
-rw-r--r-- | src/solvers/gpu/manager.rs | 104 | ||||
-rw-r--r-- | src/solvers/gpu/mod.rs | 56 | ||||
-rw-r--r-- | src/solvers/gpu/output.rs | 96 | ||||
-rw-r--r-- | src/solvers/gpusolver.rs | 3 | ||||
-rw-r--r--[-rwxr-xr-x] | src/solvers/mod.rs | 30 | ||||
-rw-r--r--[-rwxr-xr-x] | src/solvers/single.rs (renamed from src/solvers/intuitive.rs) | 0 | ||||
-rw-r--r--[-rwxr-xr-x] | src/structs.rs | 0 |
11 files changed, 521 insertions, 31 deletions
diff --git a/src/main.rs b/src/main.rs index f9775ed..c2643c3 100755..100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,12 +1,11 @@ mod permutations; -mod solver; mod solvers; mod structs; -use crate::solver::{IteratorSolver, Solver}; +use crate::solvers::{IteratorSolver, Solver}; pub static N: u32 = 8; fn main() { - let mut solver = solvers::intuitive::NormalSolver::new(N); + let mut solver = solvers::single::NormalSolver::new(N); solver.solve(); /*let solver = solvers::gpusolver::GpuSolver::new(N); println!("solver: {:?}", solver); diff --git a/src/solver.rs b/src/solver.rs deleted file mode 100755 index db4e732..0000000 --- a/src/solver.rs +++ /dev/null @@ -1,23 +0,0 @@ -use crate::structs::StoneWall; - -/// calculate h and w -pub fn wall_stats(n: u32) -> (u32, u32) { - let h = (n >> 1) + 1; - (h, (n - 1) * h) -} - -pub trait Solver { - fn new(n: u32) -> Self; - fn n(&self) -> u32; - fn h(&self) -> u32; - fn w(&self) -> u32; -} - -pub trait FirstSolver { - fn solve(self) -> StoneWall; -} - -pub trait IteratorSolver: Solver { - type IntoIter: Iterator<Item=StoneWall>; - fn solve(self) -> Self::IntoIter; -} diff --git a/src/solvers/check.cl b/src/solvers/gpu/check.cl index af9bc51..af9bc51 100644 --- a/src/solvers/check.cl +++ b/src/solvers/gpu/check.cl diff --git a/src/solvers/gpu/host.rs b/src/solvers/gpu/host.rs new file mode 100644 index 0000000..6b79078 --- /dev/null +++ b/src/solvers/gpu/host.rs @@ -0,0 +1,235 @@ +use ocl::{flags, Buffer, Context, Device, Kernel, Platform, Program, Queue}; +use std::sync::mpsc::{Receiver, Sender}; + +#[derive(Debug)] +pub struct Host { + #[allow(unused)] + platform: Platform, + #[allow(unused)] + device: Device, + #[allow(unused)] + context: Context, + program: Program, + queue: Queue, + n: u32, + h: u32, + w: u32, + /// Workgroup size, set to 0 for max + wg_size: usize, + permutations: Buffer<u64>, + rec_queues: Vec<RequestBuffer>, + walls: Vec<Vec<u32>>, +} + +impl Host { + pub fn launch_sevice( + permutation_masks: &[u64], + n: u32, + h: u32, + w: u32, + mut wg_size: usize, + src: &str, + ) -> ocl::Result<Vec<Sender<Job>>> { + let platform = ocl::Platform::default(); + let device = ocl::Device::first(platform)?; + let context = ocl::Context::builder() + .platform(platform) + .devices(device.clone()) + .build()?; + let queue = ocl::Queue::new(&context, device, None)?; + + let program = Program::builder() + .devices(device) + .src(src) + .build(&context)?; + let buffer = ocl::Buffer::builder() + .queue(queue.clone()) + .flags(flags::MEM_READ_WRITE) + .copy_host_slice(permutation_masks) + .len(permutation_masks.len()) + .build()?; + + let mut senders = Vec::with_capacity((n - h + 1) as usize); + let mut receivers = Vec::with_capacity((n - h + 1) as usize); + let max_wg_size = device.max_wg_size()?; + if wg_size == 0 { + wg_size = max_wg_size; + } else if wg_size > max_wg_size { + return Err(ocl::Error::from("invalid workgroup size")); + } + for _ in 0..=(n - h) { + let (sx, rx) = std::sync::mpsc::channel(); + senders.push(sx); + receivers.push(RequestBuffer::new(wg_size, rx)); + } + + let solver = Self { + platform, + device, + context, + program, + queue, + n, + h, + w, + wg_size, + permutations: buffer, + rec_queues: receivers, + walls: Vec::new(), + }; + std::thread::Builder::new() + .name("GPU Deamon".into()) + .spawn(move || { + solver.run(); + }) + .unwrap(); + println!("started gpu thread"); + Ok(senders) + } + + fn get_dim(&self, queue: usize) -> usize { + let chunk = self.permutations.len() / self.n as usize; + let dim = (queue + 1) * chunk; + (dim + self.wg_size - 1) / self.wg_size * self.wg_size + } + fn get_off(&self, queue: usize) -> u64 { + let chunk = self.permutations.len() / self.n as usize; + let off = self.permutations.len() - chunk - self.get_dim(queue); + if off > isize::max_value() as usize { + panic!("workgroup size to big, offset underflow") + } + off as u64 + } + fn get_res(&self, queue: usize) -> usize { + let dim = self.get_dim(queue); + dim * self.get_res_save_dim() + } + fn get_res_save_dim(&self) -> usize { + (self.wg_size + 63) / 64 + } + + fn run(mut self) -> ! { + let queues = self.rec_queues.len(); + let mut instruction_buffer = Vec::with_capacity((self.n - self.h) as usize); + let mut result_buffer = Vec::with_capacity((self.n - self.h) as usize); + + for i in 0..queues { + let buffer: Buffer<u64> = Buffer::builder() + .queue(self.queue.clone()) + .len(self.wg_size) + .flags(flags::MEM_READ_WRITE) + .build() + .unwrap(); + + instruction_buffer.push(buffer); + let results: Buffer<u64> = Buffer::builder() + .queue(self.queue.clone()) + .len(self.get_res(i)) + .flags(flags::MEM_READ_WRITE) + .build() + .unwrap(); + result_buffer.push(results); + } + println!("finished gpu setup"); + for i in (0..self.rec_queues.len()).cycle() { + if let Some(buffer) = self.rec_queues[i].read() { + instruction_buffer[i].write(buffer).enq().unwrap(); + let dim = self.get_dim(i); + + //println!("dim: {}", dim); + //println!("off: {}", self.get_off(i)); + //println!("result size: {}", self.get_res_save_dim()); + let kernel = Kernel::builder() + .program(&self.program) + .name("check") + .queue(self.queue.clone()) + .global_work_size(dim) + .arg(&self.permutations) + .arg(&result_buffer[i]) + .arg(&instruction_buffer[i]) + .arg_local::<u64>(self.wg_size) + .arg(self.n) + .arg(self.w) + .arg(self.get_off(i)) + .build() + .unwrap(); + + unsafe { + kernel + .cmd() + .queue(&self.queue) + .global_work_offset(kernel.default_global_work_offset()) + .global_work_size(dim) + .local_work_size(self.wg_size) + .enq() + .unwrap(); + } + + // (5) Read results from the device into a vector (`::block` not shown): + let mut result = vec![0u64; self.get_res(i)]; + result_buffer[i] + .cmd() + .queue(&self.queue) + .offset(0) + .read(&mut result) + .enq() + .unwrap(); + for (j, r) in result.iter().enumerate() { + if j == 0 { + continue; + } + for b in 0..64 { + if r & (1 << b) != 0 { + let permutation = + j / self.get_res_save_dim() + self.get_off(i) as usize; + let instruction = (j % self.get_res_save_dim()) * 64 + b; + let mut wall = self.rec_queues[i].get_rows()[instruction].clone(); + wall.push(permutation as u32); + println!("{:?}", wall); + self.walls.push(wall); + } + } + } + } + } + panic!(); + } +} +/* +pub fn check(permutations: &[u64], w: u32, n: u32, mask: u64, offset: usize) -> ocl::Result<()> { + //println!("read src!"); + let src = std::fs::read_to_string("src/solvers/check.cl").expect("failed to open kernel file"); + + //println!("created queue!"); + println!("offset: {}", offset); + println!("length: {}", permutations.len() - offset); + let pro_que = ocl::ProQue::builder() + .src(src) + .dims(permutations.len() - offset) + .build()?; + + let results = pro_que.create_buffer::<i32>()?; + let kernel = pro_que + .kernel_builder("check") + .arg(get_buffer()) + .arg(&results) + .arg(mask) + .arg(n) + .arg(w) + .arg(offset as u64) + //.global_work_offset(offset) + .build()?; + + //println!("starting calculation"); + unsafe { + kernel.enq()?; + } + + let mut vec = vec![0; results.len()]; + results.read(&mut vec).enq()?; + + if vec.iter().any(|x| *x != 0) { + println!("The resuts are now '{:?}'!", vec); + } + Ok(()) +}*/ diff --git a/src/solvers/gpu/manager.rs b/src/solvers/gpu/manager.rs new file mode 100644 index 0000000..1dd6a4d --- /dev/null +++ b/src/solvers/gpu/manager.rs @@ -0,0 +1,104 @@ +use std::sync::mpsc::{Receiver, Sender, channel}; +use std::thread::JoinHandle; +use super::*; + +#[derive(Debug)] +struct RequestBuffer { + mask_buff: Vec<u64>, + pointer: usize, +} + +impl RequestBuffer { + pub fn new(size: usize) -> Self { + RequestBuffer { + mask_buff: vec![0; size], + pointer: 0, + } + } + pub fn read(&mut self, request: CheckRequest) -> Option<&[u64]> { + self.mask_buff[self.pointer] = request.bitmask; + self.pointer += 1; + if self.pointer == self.mask_buff.len() { + self.pointer = 0; + return Some(self.mask_buff.as_ref()); + } + None + } +} + +pub struct OclManager { + job_id: u64, + host_sender: Sender<Message>, + output_sender: Sender<Message>, + reciever: Receiver<Message>, + buffers: Vec<RequestBuffer>, + output_handle: JoinHandle<String>, + host_handle: JoinHandle<String>, +} + +impl OclManager { + pub fn launch_sevice( + permutations: &[&[u32]], + permutations_mask: &[u64], + n: u32, + // Workgroup size, set to 0 for max + wg_size: u32, + ) -> (Sender<Message>, JoinHandle<String>) { + let (h, w) = crate::solvers::wall_stats(n); + let src = include_str!("check.cl"); + let (output_sender, output_handle) = + super::output::Output::launch_sevice(permutations, permutations_mask, n, h, w); + let (host_sender, host_handle) = + super::host::Host::launch_sevice(permutations_mask, n, h, w, wg_size as usize, src); + + let (receiver, sender) = channel(); + + let mut buffers = Vec::with_capacity((n - h + 1) as usize); + for _ in 0..=(n - h) { + buffers.push(RequestBuffer::new(wg_size as usize)); + } + + let manager = Self { + 0, + host_sender, + output_sender, + receiver, + buffers, + output_handle, + host_handle, + } + (sender, + std::thread::Builder::new() + .name("GPU Manager Deamon".into()) + .spawn(move || { + manager.run(); + }) + .unwrap()) + + } + + fn run(mut self) { + loop { + match self.reciever.recv().expect("Channel to GPU Manager broke") { + Message::CheckRequest(request) => { + if let Some(buffer) = self.buffers[request.queue as usize].read(request) { + self.host_sender + .send(Message::HostMessage((self.job_id, buffer.0.into()))); + self.output_sender + .send(Message::OutputMessage((self.job_id, buffer.1.into()))); + self.job_id += 1; + } + } + Message::Terminate => { + panic!("flush buffers"); + self.host_sender.send(Message::Terminate); + self.host_handle.join(); + self.output_sender.send(Message::Terminate); + self.output_handle.join(); + return; + } + _ => println!("Invalid MessageType"), + } + } + } +} diff --git a/src/solvers/gpu/mod.rs b/src/solvers/gpu/mod.rs new file mode 100644 index 0000000..f9ab711 --- /dev/null +++ b/src/solvers/gpu/mod.rs @@ -0,0 +1,56 @@ +pub mod host; +pub mod manager; +pub mod output; + +pub use manager::*; + +type MaskMessage = (u64, Vec<u64>); +type RowMessage = (u64, Vec<Vec<u32>>); + +pub enum Message { + CheckRequest(CheckRequest), + HostMessage(MaskMessage), + OutputMessage(RowMessage), + Terminate, +} + +pub struct ResultMessage { + data: Vec<u64>, + offset: usize, + size: usize, +} + +impl ResultMessage { + fn new(data: Vec<u64>, offset: usize, size: usize) -> Self { + Self { data, offset, size } + } + fn valid_walls(&self, wg_size: usize) -> &[Vec<u32>] { + let mut result = vec![Vec::new(); wg_size]; + for (j, r) in self.data.iter().enumerate() { + for b in 0..64 { + if r & (1 << b) != 0 { + let permutation = j / self.size + self.offset; + let instruction = (j % self.size) * 64 + b; + result[instruction].push(permutation as u32); + } + } + } + result.as_ref() + } +} + +pub struct CheckRequest { + rows: Vec<u32>, + bitmask: u64, + queue: u32, +} + +impl CheckRequest { + pub fn new(rows: Vec<u32>, bitmask: u64, queue: u32) -> Self { + Self { + rows, + bitmask, + queue, + } + } +} diff --git a/src/solvers/gpu/output.rs b/src/solvers/gpu/output.rs new file mode 100644 index 0000000..58a4aa5 --- /dev/null +++ b/src/solvers/gpu/output.rs @@ -0,0 +1,96 @@ +use super::Message; +use std::collections::{HashSet, HashMap}; +use std::sync::mpsc::{channel, Receiver, Sender}; +use std::thread::JoinHandle; + +struct InBuffer { + receiver: Receiver<Message>, + row_requests: HashMap<u64, Vec<u32>>, + results_requests: HashMap<u64, Vec<u64>>, + +} + +impl InBuffer { + fn new(receiver: Receiver<Message>) -> Self { + Self { + receiver, + row_requests: HashMap::new(), + results_requests: HashMap::new(), + } + } + fn read(&mut self) -> Option<Result> { + loop { + match self.receiver.recv() { + Message::OutputMessage((id, ResultMessage)) => { + if Some(result) = self.results_requests.get(id) { + Some(RowResult::new() + } + else { + self.row_requests.insert(id, walls);} + } + } + } + +} + +#[derive(PartialEq, Eq, Hash)] +struct RowResult { + rows: Vec<u32>, +} + +impl RowResult { + fn new(rows: Vec<u32>) -> Self { + rows.push(0); + Self { rows } + } + fn output(&self) { + println!("{:?}", self.rows); + } +} + +pub struct Output { + input: InBuffer, + receiver: Receiver<Message>, + permutations: Vec<Vec<u32>>, + permutations_mask: Vec<u64>, + results: HashSet<RowResult>, +} + +impl Output { + fn launch_sevice( + permutations: &[Vec<u32>], + permutation_masks: &[u64], + ) -> (Sender<Message>, JoinHandle<String>) { + let (sender, receiver) = channel(); + let input = InBuffer::new(receiver); + + let output = Self { + input, + permutations: permutations.into(), + permutation_masks: permutation_masks.into(), + HashSet::new(), + } + ( + sender, + std::thread::Builder::new() + .name("GPU Manager Deamon".into()) + .spawn(move || { + output.run(); + }) + .unwrap(), + ) + } + + fn run(mut self) { + loop { + match self.receiver.recv() { + Message::OutputMessage((id, ResultMessage)) => { + if Some(result) = self.results_requests.get(id) { + Some(RowResult::new() + } + else { + self.row_requests.insert(id, walls);} + } + } + } +} diff --git a/src/solvers/gpusolver.rs b/src/solvers/gpusolver.rs index 3a96568..41de7e7 100644 --- a/src/solvers/gpusolver.rs +++ b/src/solvers/gpusolver.rs @@ -1,6 +1,5 @@ use crate::permutations::PermutationGenerator; -use crate::solver::{wall_stats, IteratorSolver, Solver}; -use crate::solvers::opencl; +use crate::solvers::{opencl, wall_stats, IteratorSolver, Solver}; use crate::structs::StoneWall; #[derive(Debug)] diff --git a/src/solvers/mod.rs b/src/solvers/mod.rs index 1bdc228..189deca 100755..100644 --- a/src/solvers/mod.rs +++ b/src/solvers/mod.rs @@ -1,5 +1,29 @@ //pub mod incremental_block; -pub mod intuitive; -//#[cfg(feature = "gpu")] +pub mod gpu; pub mod gpusolver; -pub mod opencl; +pub mod single; + +use crate::structs::StoneWall; +pub use gpu::*; + +/// calculate h and w +pub fn wall_stats(n: u32) -> (u32, u32) { + let h = (n >> 1) + 1; + (h, (n - 1) * h) +} + +pub trait Solver { + fn new(n: u32) -> Self; + fn n(&self) -> u32; + fn h(&self) -> u32; + fn w(&self) -> u32; +} + +pub trait FirstSolver { + fn solve(self) -> StoneWall; +} + +pub trait IteratorSolver: Solver { + type IntoIter: Iterator<Item = StoneWall>; + fn solve(self) -> Self::IntoIter; +} diff --git a/src/solvers/intuitive.rs b/src/solvers/single.rs index ad3e5b7..ad3e5b7 100755..100644 --- a/src/solvers/intuitive.rs +++ b/src/solvers/single.rs diff --git a/src/structs.rs b/src/structs.rs index c61bb14..c61bb14 100755..100644 --- a/src/structs.rs +++ b/src/structs.rs |