summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDennis Kobert <dennis@kobert.dev>2020-01-06 01:31:50 +0100
committerDennis Kobert <dennis@kobert.dev>2020-01-06 01:31:50 +0100
commit66cc693a770cf8668ddbddd60d35c8e8dacd55a9 (patch)
tree52a98d83bf193a0982ca45d37bca2264227366cb
parent04a48907c0b6615026e2bc8429fa6a3b6634d177 (diff)
Implement gpu worker
-rw-r--r--src/solvers/check.cl2
-rw-r--r--src/solvers/opencl.rs128
2 files changed, 121 insertions, 9 deletions
diff --git a/src/solvers/check.cl b/src/solvers/check.cl
index 94a81fc..0ad187a 100644
--- a/src/solvers/check.cl
+++ b/src/solvers/check.cl
@@ -1,6 +1,6 @@
#pragma OPENCL EXTENSION cl_intel_printf : enable
__kernel void check(__global unsigned long* permutations, __global int* results,
- unsigned long mask, unsigned int n, unsigned int w, unsigned long offset) {
+ unsigned long* instructions, unsigned int n, unsigned int w, unsigned long offset) {
int id = get_global_id(0);
unsigned long curr_mask = mask | permutations[id + offset];
diff --git a/src/solvers/opencl.rs b/src/solvers/opencl.rs
index 676c5cc..3406fd0 100644
--- a/src/solvers/opencl.rs
+++ b/src/solvers/opencl.rs
@@ -1,4 +1,4 @@
-use ocl::{Buffer, Context, Device, Platform, Queue};
+use ocl::{flags, Buffer, Context, Device, Kernel, Platform, Program, Queue};
use std::sync::mpsc::{Receiver, Sender};
pub struct Job {
@@ -10,16 +10,57 @@ pub struct GpuSolver {
platform: Platform,
device: Device,
context: Context,
+ program: Program,
queue: Queue,
n: u32,
h: u32,
w: u32,
+ wg_size: usize,
permutations: Buffer<u64>,
- rec_queues: Vec<Receiver<Job>>,
+ rec_queues: Vec<ReqestBuffer>,
+}
+
+struct ReqestBuffer {
+ mask_buff: Vec<u64>,
+ row_buff: Vec<Vec<u32>>,
+ pointer: usize,
+ receiver: Receiver<Job>,
+}
+
+impl ReqestBuffer {
+ pub fn new(size: usize, receiver: Receiver<Job>) -> ReqestBuffer {
+ Self {
+ mask_buff: Vec::with_capacity(size),
+ row_buff: Vec::with_capacity(size),
+ pointer: 0,
+ receiver,
+ }
+ }
+ pub fn read(&mut self) -> Option<&[u64]> {
+ for job in self.receiver.try_iter() {
+ self.mask_buff[self.pointer] = job.bitmask;
+ self.row_buff[self.pointer] = job.rows;
+ self.pointer += 1;
+ if self.pointer == self.mask_buff.len() {
+ self.pointer = 0;
+ return Some(self.mask_buff.as_ref());
+ }
+ }
+ None
+ }
+ pub fn get_rows(&self) -> &[Vec<u32>] {
+ self.row_buff.as_ref()
+ }
}
impl GpuSolver {
- pub fn new(permutation_masks: &[u64], n: u32, h: u32, w: u32) -> ocl::Result<Vec<Sender<Job>>> {
+ pub fn new(
+ permutation_masks: &[u64],
+ n: u32,
+ h: u32,
+ w: u32,
+ src: &str,
+ ) -> ocl::Result<Vec<Sender<Job>>> {
let platform = ocl::Platform::default();
let device = ocl::Device::first(platform)?;
let context = ocl::Context::builder()
@@ -28,28 +69,36 @@ impl GpuSolver {
.build()?;
let queue = ocl::Queue::new(&context, device, None)?;
+ let program = Program::builder()
+ .devices(device)
+ .src(src)
+ .build(&context)?;
let buffer = ocl::Buffer::builder()
.queue(queue.clone())
+ .flags(flags::MEM_READ_WRITE)
.copy_host_slice(permutation_masks)
.len(permutation_masks.len())
.build()?;
let mut senders = Vec::with_capacity(h as usize);
let mut receivers = Vec::with_capacity(h as usize);
+ let wg_size = device.max_wg_size()?;
for _ in 0..h {
let (sx, rx) = std::sync::mpsc::channel();
senders.push(sx);
- receivers.push(rx);
+ receivers.push(ReqestBuffer::new(wg_size, rx));
}
let solver = Self {
platform,
device,
context,
+ program,
queue,
n,
h,
w,
+ wg_size,
permutations: buffer,
rec_queues: receivers,
};
@@ -59,11 +108,74 @@ impl GpuSolver {
Ok(senders)
}
- fn run(self) -> ! {
- for rec in self.rec_queues.iter().cycle() {
- //if rec.
+ fn run(mut self) -> ! {
+ let queues = self.rec_queues.len();
+ let mut instruction_buffer = Vec::with_capacity((self.n - self.h) as usize);
+ let mut result_buffer = Vec::with_capacity((self.n - self.h) as usize);
+ let chunk = self.permutations.len() / self.n as usize;
+
+ for i in 0..queues {
+ let buffer: Buffer<u64> = Buffer::builder()
+ .queue(self.queue.clone())
+ .len(self.wg_size)
+ .flags(flags::MEM_READ_WRITE)
+ .build()
+ .unwrap();
+
+ instruction_buffer.push(buffer);
+ let dim = (i + 1) * chunk;
+ let results: Buffer<u64> = Buffer::builder()
+ .queue(self.queue.clone())
+ .len(self.wg_size * dim / 64)
+ .flags(flags::MEM_READ_WRITE)
+ .build()
+ .unwrap();
+ result_buffer.push(results);
+ }
+ for i in (0..self.rec_queues.len()).cycle() {
+ if let Some(buffer) = self.rec_queues[i].read() {
+ instruction_buffer[i].write(buffer).enq().unwrap();
+ println!("hello world");
+ let dim = (i + 1) * chunk;
+
+ let kernel = Kernel::builder()
+ .program(&self.program)
+ .name("check")
+ .queue(self.queue.clone())
+ .global_work_size(dim)
+ .arg(&self.permutations)
+ .arg(&result_buffer[i])
+ .arg(&instruction_buffer[i])
+ .arg(self.n)
+ .arg(self.w)
+ .arg((self.n as u64 - i as u64 - 1) * chunk as u64)
+ .build()
+ .unwrap();
+
+ unsafe {
+ kernel
+ .cmd()
+ .queue(&self.queue)
+ .global_work_offset(kernel.default_global_work_offset())
+ .global_work_size(dim)
+ .local_work_size(self.wg_size)
+ .enq()
+ .unwrap();
+ }
+
+ // (5) Read results from the device into a vector (`::block` not shown):
+ let mut result = vec![0u64; dim * self.wg_size / 64];
+ result_buffer[i]
+ .cmd()
+ .queue(&self.queue)
+ .offset(0)
+ .read(&mut result)
+ .enq()
+ .unwrap();
+ println!("{:?}", result);
+ }
}
- loop {}
+ panic!();
}
}
/*