summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDennis Kobert <dennis@kobert.dev>2020-01-06 08:33:57 +0100
committerDennis Kobert <dennis@kobert.dev>2020-01-06 08:33:57 +0100
commit1a9d0d91e08d0dee9b48c05ef0ac72cc732a42ab (patch)
treebab891f578367a8e07ba0d707c22dc30c43fdd83
parent17d9a626b6a3110cec3697407c6690522798cb30 (diff)
Backport to opencl 1.2
-rw-r--r--src/solvers/check.cl23
-rw-r--r--src/solvers/gpusolver.rs29
-rw-r--r--src/solvers/opencl.rs42
3 files changed, 64 insertions, 30 deletions
diff --git a/src/solvers/check.cl b/src/solvers/check.cl
index 2ef6406..7ff39df 100644
--- a/src/solvers/check.cl
+++ b/src/solvers/check.cl
@@ -1,22 +1,23 @@
-typedef ulong u64
-typedef uint u32
+//#pragma OPENCL EXTENSION cl_intel_printf : enable
+typedef ulong u64;
+typedef uint u32;
-#pragma OPENCL EXTENSION cl_intel_printf : enable
__kernel void check(__global u64* permutations, __global u64* results,
- u64* instructions, __local u64* i_buffer, u32 n, u32 w, u64 offset) {
+ __local u64* instructions, __local u64* i_buffer, u32 n, u32 w, u64 offset) {
int gid = get_global_id(0);
int wid = get_local_id(0);
int gsize = get_global_size(0);
int wsize = get_local_size(0);
- int wesize = get_enqueued_local_size(0);
+ //int wesize = get_enqueued_local_size(0);
- int times = wsize / wesize;
- times += !(wsize % wesize);
- for (int i = 0; i < times; i++) {
- i_buffer[wid + i] = instructions[wid + i]
- }
- work_group_barrier(mem_fence::local);
+ //int times = wsize / wesize;
+ //times += !(wsize % wesize);
+ //for (int i = 0; i < times; i++) {
+ int i = 0;
+ i_buffer[wid + i] = instructions[wid + i];
+ //}
+ barrier(CLK_LOCAL_MEM_FENCE);
u64 result = 0;
unsigned long own = permutations[gid + offset];
diff --git a/src/solvers/gpusolver.rs b/src/solvers/gpusolver.rs
index 2b9eb4a..69c400e 100644
--- a/src/solvers/gpusolver.rs
+++ b/src/solvers/gpusolver.rs
@@ -1,16 +1,29 @@
-use crate::solver::{wall_stats, Solver, IteratorSolver};
-use crate::structs::StoneWall;
use crate::permutations::PermutationGenerator;
+use crate::solver::{wall_stats, IteratorSolver, Solver};
+use crate::structs::StoneWall;
#[derive(Debug)]
pub struct GpuSolver {
- n: u32, h: u32, w: u32,
+ n: u32,
+ h: u32,
+ w: u32,
permutations: Vec<Vec<u32>>,
masks: Vec<u64>,
}
impl GpuSolver {
fn solve_to_vec(&mut self) -> Vec<StoneWall> {
+ let src =
+ std::fs::read_to_string("src/solvers/check.cl").expect("failed to open kernel file");
+
+ crate::solvers::opencl::GpuSolver::launch_sevice(
+ &self.masks,
+ self.n,
+ self.h,
+ self.w,
+ src.as_ref(),
+ )
+ .unwrap();
vec![]
}
}
@@ -25,9 +38,9 @@ fn generate_masks(permutations: &[Vec<u32>]) -> Vec<u64> {
let mut v = 0;
let mut x = 0u64;
for i in p.iter().take(p.len() - 1).map(|i| {
- v += i;
- v
- }) {
+ v += i;
+ v
+ }) {
x |= 1 << i
}
masks.push(x)
@@ -41,7 +54,9 @@ impl Solver for GpuSolver {
let permutations = generate_permutations(n);
let masks = generate_masks(&permutations);
Self {
- n, h, w,
+ n,
+ h,
+ w,
permutations,
masks,
}
diff --git a/src/solvers/opencl.rs b/src/solvers/opencl.rs
index 1293295..8421d53 100644
--- a/src/solvers/opencl.rs
+++ b/src/solvers/opencl.rs
@@ -7,8 +7,11 @@ pub struct Job {
}
pub struct GpuSolver {
+ #[allow(unused)]
platform: Platform,
+ #[allow(unused)]
device: Device,
+ #[allow(unused)]
context: Context,
program: Program,
queue: Queue,
@@ -17,19 +20,19 @@ pub struct GpuSolver {
w: u32,
wg_size: usize,
permutations: Buffer<u64>,
- rec_queues: Vec<ReqestBuffer>,
+ rec_queues: Vec<RequestBuffer>,
}
-struct ReqestBuffer {
+struct RequestBuffer {
mask_buff: Vec<u64>,
row_buff: Vec<Vec<u32>>,
pointer: usize,
receiver: Receiver<Job>,
}
-impl ReqestBuffer {
- pub fn new(size: usize, receiver: Receiver<Job>) -> ReqestBuffer {
- Self {
+impl RequestBuffer {
+ pub fn new(size: usize, receiver: Receiver<Job>) -> Self {
+ RequestBuffer {
mask_buff: Vec::with_capacity(size),
row_buff: Vec::with_capacity(size),
pointer: 0,
@@ -48,13 +51,14 @@ impl ReqestBuffer {
}
None
}
+ #[allow(unused)]
pub fn get_rows(&self) -> &[Vec<u32>] {
self.row_buff.as_ref()
}
}
impl GpuSolver {
- pub fn new(
+ pub fn launch_sevice(
permutation_masks: &[u64],
n: u32,
h: u32,
@@ -86,7 +90,7 @@ impl GpuSolver {
for _ in 0..h {
let (sx, rx) = std::sync::mpsc::channel();
senders.push(sx);
- receivers.push(ReqestBuffer::new(wg_size, rx));
+ receivers.push(RequestBuffer::new(wg_size, rx));
}
let solver = Self {
@@ -108,6 +112,22 @@ impl GpuSolver {
Ok(senders)
}
+ fn get_dim(&self, queue: usize) -> usize {
+ let chunk = self.permutations.len() / self.n as usize;
+ let dim = (queue + 1) * chunk;
+ if dim % self.wg_size == 0 {
+ dim
+ } else {
+ let dim_small = dim / self.wg_size;
+ ((dim_small + 1) * chunk)
+ }
+ }
+ fn get_off(&self, queue: usize) -> u64 {
+ let chunk = self.permutations.len() / self.n as usize;
+ let dim = (queue + 1) * chunk;
+ ((self.n as usize - queue - 1) * chunk - self.wg_size + dim % self.wg_size) as u64
+ }
+
fn run(mut self) -> ! {
let queues = self.rec_queues.len();
let mut instruction_buffer = Vec::with_capacity((self.n - self.h) as usize);
@@ -123,10 +143,9 @@ impl GpuSolver {
.unwrap();
instruction_buffer.push(buffer);
- let dim = (i + 1) * chunk;
let results: Buffer<u64> = Buffer::builder()
.queue(self.queue.clone())
- .len(self.wg_size * dim / 64)
+ .len(self.wg_size * self.get_dim(i) / 64)
.flags(flags::MEM_READ_WRITE)
.build()
.unwrap();
@@ -135,8 +154,7 @@ impl GpuSolver {
for i in (0..self.rec_queues.len()).cycle() {
if let Some(buffer) = self.rec_queues[i].read() {
instruction_buffer[i].write(buffer).enq().unwrap();
- println!("hello world");
- let dim = (i + 1) * chunk;
+ let dim = self.get_dim(i);
let kernel = Kernel::builder()
.program(&self.program)
@@ -149,7 +167,7 @@ impl GpuSolver {
.arg_local::<u64>(self.wg_size)
.arg(self.n)
.arg(self.w)
- .arg((self.n as u64 - i as u64 - 1) * chunk as u64)
+ .arg(self.get_off(i))
.build()
.unwrap();