summaryrefslogtreecommitdiff
path: root/src/solvers/gpu/host.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/solvers/gpu/host.rs')
-rw-r--r--src/solvers/gpu/host.rs172
1 files changed, 70 insertions, 102 deletions
diff --git a/src/solvers/gpu/host.rs b/src/solvers/gpu/host.rs
index c53c42c..39843c4 100644
--- a/src/solvers/gpu/host.rs
+++ b/src/solvers/gpu/host.rs
@@ -1,6 +1,6 @@
-use super::{HostMessage, Message, ResultMessage};
+use super::{Message, ResultMessage};
use ocl::{flags, Buffer, Context, Device, Kernel, Platform, Program, Queue};
-use std::sync::mpsc::{Receiver, Sender};
+use std::sync::mpsc::{channel, Receiver, Sender};
#[derive(Debug)]
pub struct Host {
@@ -19,7 +19,7 @@ pub struct Host {
wg_size: usize,
permutations: Buffer<u64>,
receiver: Receiver<Message>,
- walls: Vec<Vec<u32>>,
+ output_sender: Sender<Message>,
}
impl Host {
@@ -30,6 +30,7 @@ impl Host {
w: u32,
mut wg_size: usize,
src: &str,
+ output_sender: Sender<Message>,
) -> ocl::Result<(Sender<Message>, std::thread::JoinHandle<()>)> {
let platform = ocl::Platform::default();
let device = ocl::Device::first(platform)?;
@@ -50,6 +51,7 @@ impl Host {
.len(permutation_masks.len())
.build()?;
+ let (sender, receiver) = channel();
let max_wg_size = device.max_wg_size()?;
if wg_size == 0 {
wg_size = max_wg_size;
@@ -68,17 +70,17 @@ impl Host {
w,
wg_size,
permutations: buffer,
- rec_queues: receivers,
- walls: Vec::new(),
+ receiver,
+ output_sender,
};
- std::thread::Builder::new()
+ let handle = std::thread::Builder::new()
.name("GPU Deamon".into())
.spawn(move || {
solver.run();
})
.unwrap();
println!("started gpu thread");
- Ok(senders)
+ Ok((sender, handle))
}
fn get_dim(&self, queue: usize) -> usize {
@@ -86,13 +88,13 @@ impl Host {
let dim = (queue + 1) * chunk;
(dim + self.wg_size - 1) / self.wg_size * self.wg_size
}
- fn get_off(&self, queue: usize) -> u64 {
+ fn get_off(&self, queue: usize) -> usize {
let chunk = self.permutations.len() / self.n as usize;
let off = self.permutations.len() - chunk - self.get_dim(queue);
if off > isize::max_value() as usize {
panic!("workgroup size to big, offset underflow")
}
- off as u64
+ off
}
fn get_res(&self, queue: usize) -> usize {
let dim = self.get_dim(queue);
@@ -102,8 +104,8 @@ impl Host {
(self.wg_size + 63) / 64
}
- fn run(mut self) -> ! {
- let queues = self.rec_queues.len();
+ fn run(self) {
+ let queues = (self.n - self.h + 1) as usize;
let mut instruction_buffer = Vec::with_capacity((self.n - self.h) as usize);
let mut result_buffer = Vec::with_capacity((self.n - self.h) as usize);
@@ -125,105 +127,71 @@ impl Host {
result_buffer.push(results);
}
println!("finished gpu setup");
- for i in (0..self.rec_queues.len()).cycle() {
- if let Some(buffer) = self.rec_queues[i].read() {
- instruction_buffer[i].write(buffer).enq().unwrap();
- let dim = self.get_dim(i);
+ loop {
+ match self.receiver.recv().expect("Channel to Host broke") {
+ Message::Terminate => {
+ self.output_sender.send(Message::Terminate);
+ return;
+ }
+ Message::HostMessage((id, i, buffer)) => {
+ let i = i as usize;
+ let off = self.get_off(i);
+ let dim = self.get_dim(i);
+ let res = self.get_res(i);
+ let res_size = self.get_res_save_dim();
+
+ instruction_buffer[i].write(&buffer).enq().unwrap();
+
+ //println!("dim: {}", dim);
+ //println!("off: {}", self.get_off(i));
+ //println!("result size: {}", self.get_res_save_dim());
+ let kernel = Kernel::builder()
+ .program(&self.program)
+ .name("check")
+ .queue(self.queue.clone())
+ .global_work_size(dim)
+ .arg(&self.permutations)
+ .arg(&result_buffer[i])
+ .arg(&instruction_buffer[i])
+ .arg_local::<u64>(self.wg_size)
+ .arg(self.n)
+ .arg(self.w)
+ .arg(off)
+ .build()
+ .unwrap();
- //println!("dim: {}", dim);
- //println!("off: {}", self.get_off(i));
- //println!("result size: {}", self.get_res_save_dim());
- let kernel = Kernel::builder()
- .program(&self.program)
- .name("check")
- .queue(self.queue.clone())
- .global_work_size(dim)
- .arg(&self.permutations)
- .arg(&result_buffer[i])
- .arg(&instruction_buffer[i])
- .arg_local::<u64>(self.wg_size)
- .arg(self.n)
- .arg(self.w)
- .arg(self.get_off(i))
- .build()
- .unwrap();
+ unsafe {
+ kernel
+ .cmd()
+ .queue(&self.queue)
+ .global_work_offset(kernel.default_global_work_offset())
+ .global_work_size(dim)
+ .local_work_size(self.wg_size)
+ .enq()
+ .unwrap();
+ }
- unsafe {
- kernel
+ // (5) Read results from the device into a vector (`::block` not shown):
+ let mut data = vec![0u64; res];
+ result_buffer[i]
.cmd()
.queue(&self.queue)
- .global_work_offset(kernel.default_global_work_offset())
- .global_work_size(dim)
- .local_work_size(self.wg_size)
+ .offset(0)
+ .read(&mut data)
.enq()
.unwrap();
+ self.output_sender
+ .send(Message::ResultMessage(ResultMessage::new(
+ data,
+ off,
+ res_size,
+ self.wg_size,
+ id,
+ )))
+ .unwrap();
}
-
- // (5) Read results from the device into a vector (`::block` not shown):
- let mut result = vec![0u64; self.get_res(i)];
- result_buffer[i]
- .cmd()
- .queue(&self.queue)
- .offset(0)
- .read(&mut result)
- .enq()
- .unwrap();
- for (j, r) in result.iter().enumerate() {
- if j == 0 {
- continue;
- }
- for b in 0..64 {
- if r & (1 << b) != 0 {
- let permutation =
- j / self.get_res_save_dim() + self.get_off(i) as usize;
- let instruction = (j % self.get_res_save_dim()) * 64 + b;
- let mut wall = self.rec_queues[i].get_rows()[instruction].clone();
- wall.push(permutation as u32);
- println!("{:?}", wall);
- self.walls.push(wall);
- }
- }
- }
+ _ => println!("Invalid MessageType"),
}
}
- panic!();
}
}
-/*
-pub fn check(permutations: &[u64], w: u32, n: u32, mask: u64, offset: usize) -> ocl::Result<()> {
- //println!("read src!");
- let src = std::fs::read_to_string("src/solvers/check.cl").expect("failed to open kernel file");
-
- //println!("created queue!");
- println!("offset: {}", offset);
- println!("length: {}", permutations.len() - offset);
- let pro_que = ocl::ProQue::builder()
- .src(src)
- .dims(permutations.len() - offset)
- .build()?;
-
- let results = pro_que.create_buffer::<i32>()?;
- let kernel = pro_que
- .kernel_builder("check")
- .arg(get_buffer())
- .arg(&results)
- .arg(mask)
- .arg(n)
- .arg(w)
- .arg(offset as u64)
- //.global_work_offset(offset)
- .build()?;
-
- //println!("starting calculation");
- unsafe {
- kernel.enq()?;
- }
-
- let mut vec = vec![0; results.len()];
- results.read(&mut vec).enq()?;
-
- if vec.iter().any(|x| *x != 0) {
- println!("The resuts are now '{:?}'!", vec);
- }
- Ok(())
-}*/