summaryrefslogtreecommitdiff
path: root/src/solvers/opencl.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/solvers/opencl.rs')
-rw-r--r--src/solvers/opencl.rs42
1 files changed, 30 insertions, 12 deletions
diff --git a/src/solvers/opencl.rs b/src/solvers/opencl.rs
index 1293295..8421d53 100644
--- a/src/solvers/opencl.rs
+++ b/src/solvers/opencl.rs
@@ -7,8 +7,11 @@ pub struct Job {
}
pub struct GpuSolver {
+ #[allow(unused)]
platform: Platform,
+ #[allow(unused)]
device: Device,
+ #[allow(unused)]
context: Context,
program: Program,
queue: Queue,
@@ -17,19 +20,19 @@ pub struct GpuSolver {
w: u32,
wg_size: usize,
permutations: Buffer<u64>,
- rec_queues: Vec<ReqestBuffer>,
+ rec_queues: Vec<RequestBuffer>,
}
-struct ReqestBuffer {
+struct RequestBuffer {
mask_buff: Vec<u64>,
row_buff: Vec<Vec<u32>>,
pointer: usize,
receiver: Receiver<Job>,
}
-impl ReqestBuffer {
- pub fn new(size: usize, receiver: Receiver<Job>) -> ReqestBuffer {
- Self {
+impl RequestBuffer {
+ pub fn new(size: usize, receiver: Receiver<Job>) -> Self {
+ RequestBuffer {
mask_buff: Vec::with_capacity(size),
row_buff: Vec::with_capacity(size),
pointer: 0,
@@ -48,13 +51,14 @@ impl ReqestBuffer {
}
None
}
+ #[allow(unused)]
pub fn get_rows(&self) -> &[Vec<u32>] {
self.row_buff.as_ref()
}
}
impl GpuSolver {
- pub fn new(
+ pub fn launch_sevice(
permutation_masks: &[u64],
n: u32,
h: u32,
@@ -86,7 +90,7 @@ impl GpuSolver {
for _ in 0..h {
let (sx, rx) = std::sync::mpsc::channel();
senders.push(sx);
- receivers.push(ReqestBuffer::new(wg_size, rx));
+ receivers.push(RequestBuffer::new(wg_size, rx));
}
let solver = Self {
@@ -108,6 +112,22 @@ impl GpuSolver {
Ok(senders)
}
+ fn get_dim(&self, queue: usize) -> usize {
+ let chunk = self.permutations.len() / self.n as usize;
+ let dim = (queue + 1) * chunk;
+ if dim % self.wg_size == 0 {
+ dim
+ } else {
+ let dim_small = dim / self.wg_size;
+ ((dim_small + 1) * chunk)
+ }
+ }
+ fn get_off(&self, queue: usize) -> u64 {
+ let chunk = self.permutations.len() / self.n as usize;
+ let dim = (queue + 1) * chunk;
+ ((self.n as usize - queue - 1) * chunk - self.wg_size + dim % self.wg_size) as u64
+ }
+
fn run(mut self) -> ! {
let queues = self.rec_queues.len();
let mut instruction_buffer = Vec::with_capacity((self.n - self.h) as usize);
@@ -123,10 +143,9 @@ impl GpuSolver {
.unwrap();
instruction_buffer.push(buffer);
- let dim = (i + 1) * chunk;
let results: Buffer<u64> = Buffer::builder()
.queue(self.queue.clone())
- .len(self.wg_size * dim / 64)
+ .len(self.wg_size * self.get_dim(i) / 64)
.flags(flags::MEM_READ_WRITE)
.build()
.unwrap();
@@ -135,8 +154,7 @@ impl GpuSolver {
for i in (0..self.rec_queues.len()).cycle() {
if let Some(buffer) = self.rec_queues[i].read() {
instruction_buffer[i].write(buffer).enq().unwrap();
- println!("hello world");
- let dim = (i + 1) * chunk;
+ let dim = self.get_dim(i);
let kernel = Kernel::builder()
.program(&self.program)
@@ -149,7 +167,7 @@ impl GpuSolver {
.arg_local::<u64>(self.wg_size)
.arg(self.n)
.arg(self.w)
- .arg((self.n as u64 - i as u64 - 1) * chunk as u64)
+ .arg(self.get_off(i))
.build()
.unwrap();