summaryrefslogtreecommitdiff
path: root/src/solvers/opencl.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/solvers/opencl.rs')
-rw-r--r--src/solvers/opencl.rs75
1 files changed, 55 insertions, 20 deletions
diff --git a/src/solvers/opencl.rs b/src/solvers/opencl.rs
index cdedd37..550ace9 100644
--- a/src/solvers/opencl.rs
+++ b/src/solvers/opencl.rs
@@ -6,6 +6,13 @@ pub struct Job {
bitmask: u64,
}
+impl Job {
+ pub fn new(rows: Vec<u32>, bitmask: u64) -> Self {
+ Self { rows, bitmask }
+ }
+}
+
+#[derive(Debug)]
pub struct GpuSolver {
#[allow(unused)]
platform: Platform,
@@ -18,11 +25,14 @@ pub struct GpuSolver {
n: u32,
h: u32,
w: u32,
+ /// Workgroup size, set to 0 for max
wg_size: usize,
permutations: Buffer<u64>,
rec_queues: Vec<RequestBuffer>,
+ walls: Vec<Vec<u32>>,
}
+#[derive(Debug)]
struct RequestBuffer {
mask_buff: Vec<u64>,
row_buff: Vec<Vec<u32>>,
@@ -33,8 +43,8 @@ struct RequestBuffer {
impl RequestBuffer {
pub fn new(size: usize, receiver: Receiver<Job>) -> Self {
RequestBuffer {
- mask_buff: Vec::with_capacity(size),
- row_buff: Vec::with_capacity(size),
+ mask_buff: vec![0; size],
+ row_buff: vec![Vec::new(); size],
pointer: 0,
receiver,
}
@@ -85,15 +95,15 @@ impl GpuSolver {
.len(permutation_masks.len())
.build()?;
- let mut senders = Vec::with_capacity(h as usize);
- let mut receivers = Vec::with_capacity(h as usize);
+ let mut senders = Vec::with_capacity((n - h + 1) as usize);
+ let mut receivers = Vec::with_capacity((n - h + 1) as usize);
let max_wg_size = device.max_wg_size()?;
if wg_size == 0 {
wg_size = max_wg_size;
} else if wg_size > max_wg_size {
return Err(ocl::Error::from("invalid workgroup size"));
}
- for _ in 0..h {
+ for _ in 0..=(n - h) {
let (sx, rx) = std::sync::mpsc::channel();
senders.push(sx);
receivers.push(RequestBuffer::new(wg_size, rx));
@@ -111,34 +121,40 @@ impl GpuSolver {
wg_size,
permutations: buffer,
rec_queues: receivers,
+ walls: Vec::new(),
};
- std::thread::spawn(move || {
- solver.run();
- });
+ std::thread::Builder::new()
+ .name("GPU Deamon".into())
+ .spawn(move || {
+ solver.run();
+ })
+ .unwrap();
+ println!("started gpu thread");
Ok(senders)
}
fn get_dim(&self, queue: usize) -> usize {
let chunk = self.permutations.len() / self.n as usize;
let dim = (queue + 1) * chunk;
- if dim % self.wg_size == 0 {
- dim
- } else {
- let dim_small = dim / self.wg_size;
- ((dim_small + 1) * chunk)
- }
+ (dim + self.wg_size - 1) / self.wg_size * self.wg_size
}
fn get_off(&self, queue: usize) -> u64 {
let chunk = self.permutations.len() / self.n as usize;
- let dim = (queue + 1) * chunk;
- ((self.n as usize - queue - 1) * chunk - self.wg_size + dim % self.wg_size) as u64
+ let off = self.permutations.len() - chunk - self.get_dim(queue);
+ off as u64
+ }
+ fn get_res(&self, queue: usize) -> usize {
+ let dim = self.get_dim(queue);
+ dim * self.get_res_save_dim()
+ }
+ fn get_res_save_dim(&self) -> usize {
+ (self.wg_size + 63) / 64
}
fn run(mut self) -> ! {
let queues = self.rec_queues.len();
let mut instruction_buffer = Vec::with_capacity((self.n - self.h) as usize);
let mut result_buffer = Vec::with_capacity((self.n - self.h) as usize);
- let chunk = self.permutations.len() / self.n as usize;
for i in 0..queues {
let buffer: Buffer<u64> = Buffer::builder()
@@ -151,17 +167,21 @@ impl GpuSolver {
instruction_buffer.push(buffer);
let results: Buffer<u64> = Buffer::builder()
.queue(self.queue.clone())
- .len(self.wg_size * self.get_dim(i) / 64)
+ .len(self.get_res(i))
.flags(flags::MEM_READ_WRITE)
.build()
.unwrap();
result_buffer.push(results);
}
+ println!("finished gpu setup");
for i in (0..self.rec_queues.len()).cycle() {
if let Some(buffer) = self.rec_queues[i].read() {
instruction_buffer[i].write(buffer).enq().unwrap();
let dim = self.get_dim(i);
+ //println!("dim: {}", dim);
+ //println!("off: {}", self.get_off(i));
+ //println!("result size: {}", self.get_res_save_dim());
let kernel = Kernel::builder()
.program(&self.program)
.name("check")
@@ -189,7 +209,7 @@ impl GpuSolver {
}
// (5) Read results from the device into a vector (`::block` not shown):
- let mut result = vec![0u64; dim * self.wg_size / 64];
+ let mut result = vec![0u64; self.get_res(i)];
result_buffer[i]
.cmd()
.queue(&self.queue)
@@ -197,7 +217,22 @@ impl GpuSolver {
.read(&mut result)
.enq()
.unwrap();
- println!("{:?}", result);
+ for (j, r) in result.iter().enumerate() {
+ if j == 0 {
+ continue;
+ }
+ for b in 0..64 {
+ if r & (1 << b) != 0 {
+ let permutation =
+ j / self.get_res_save_dim() + self.get_off(i) as usize;
+ let instruction = (j % self.get_res_save_dim()) * 64 + b;
+ let mut wall = self.rec_queues[i].get_rows()[instruction].clone();
+ wall.push(permutation as u32);
+ println!("{:?}", wall);
+ self.walls.push(wall);
+ }
+ }
+ }
}
}
panic!();