From 920a6729577d14ba9190abcb3a2c4087652228a4 Mon Sep 17 00:00:00 2001 From: Dennis Kobert Date: Sat, 4 Jan 2020 08:13:54 +0100 Subject: Add profiling extend gpu support --- src/solvers/check.cl | 21 ++++++++++++-- src/solvers/intuitive.rs | 21 ++++++++++++-- src/solvers/mod.rs | 25 +++++++++++++++- src/solvers/ocl.rs | 29 ------------------- src/solvers/opencl.rs | 75 ++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 137 insertions(+), 34 deletions(-) delete mode 100644 src/solvers/ocl.rs create mode 100644 src/solvers/opencl.rs (limited to 'src/solvers') diff --git a/src/solvers/check.cl b/src/solvers/check.cl index d0ab4f0..94a81fc 100644 --- a/src/solvers/check.cl +++ b/src/solvers/check.cl @@ -1,4 +1,21 @@ -__kernel void check(__global int* permutations, __global int* checks, int n) { - buffer[get_global_id(0)] += scalar; +#pragma OPENCL EXTENSION cl_intel_printf : enable +__kernel void check(__global unsigned long* permutations, __global int* results, + unsigned long mask, unsigned int n, unsigned int w, unsigned long offset) { + int id = get_global_id(0); + unsigned long curr_mask = mask | permutations[id + offset]; + + unsigned long tmask, sum, stones; + stones = tmask = sum = 0; + for (int i = 1; i <= w + 1; i++) { + if ((curr_mask & (1 << i)) == 0) { + stones += 1; + tmask |= 1 << (i - sum); + sum = i; + } + } + if (tmask == (1 << (n + 1)) - 2 && stones == n) { + printf("test"); + results[id] = id; + } } diff --git a/src/solvers/intuitive.rs b/src/solvers/intuitive.rs index 6fea69d..3caac73 100755 --- a/src/solvers/intuitive.rs +++ b/src/solvers/intuitive.rs @@ -1,6 +1,7 @@ use rayon::prelude::*; /// Solve for a given N and return the resulting wall +#[derive(Clone)] pub struct NormalSolver { pub n: u32, /// calculated height [might not be correct!] @@ -52,7 +53,7 @@ impl NormalSolver { for (n, i) in self.permutations.iter().enumerate() { let tmp: Vec = i.iter().map(|x| *x).collect(); //println!("perm {}: {:?}", n, tmp); - //println!("perm {}: {:?}", n, self.masks[n]); + println!("perm {}: {:?}", n, self.masks[n]); } println!("calculate results"); self.permute( @@ -84,6 +85,8 @@ impl NormalSolver { } } if tmask == (1 << (self.n + 1)) - 2 && stones == self.n { + println!("tmask: {:b}", tmask); + println!("curr: {:b}", curr_mask); //println!("success"); unsafe { SOLUTIONS += 1; @@ -96,11 +99,25 @@ impl NormalSolver { } let mut new_num = Vec::from(numbers); let start = numbers[index as usize] / self.chunk; + if index as usize == numbers.len() - 1 { + #[cfg(feature = "gpu")] + { + crate::solvers::opencl::check( + self.masks.as_ref(), + self.w, + self.n, + curr_mask, + (start * self.chunk) as usize, + ) + .unwrap(); + return; + } + } for i in start..self.n - (self.h - 1 - index as u32) { for n in 1..(numbers.len() - index) { new_num[n + index] = (n as u32 + i) * self.chunk; } - if index == 0 { + if index == 0 && false { (0..self.chunk).into_par_iter().for_each(|j| { let mut new_num = new_num.clone(); let tmp = i * self.chunk + j; diff --git a/src/solvers/mod.rs b/src/solvers/mod.rs index 9dc210b..0615eab 100755 --- a/src/solvers/mod.rs +++ b/src/solvers/mod.rs @@ -1,4 +1,27 @@ //pub mod incremental_block; pub mod intuitive; #[cfg(feature = "gpu")] -pub mod ocl; +pub mod opencl; + +lazy_static! { + pub static ref PERMUTATIONS: (Vec>, Vec) = { + let n = crate::N; + let mut heap = (1..=n).collect::>(); + let heap = permutohedron::Heap::new(&mut heap); + let n_f = permutohedron::factorial(n as usize); + let mut permutations = Vec::with_capacity(n_f); + + let mut masks: Vec = vec![0; n_f]; + println!("Generating permutations"); + for (j, data) in heap.enumerate() { + let mut sum = 0; + permutations.push(data.clone()); + for stone in data.iter().take(n as usize - 1) { + sum += stone; + masks[j] |= 1 << sum; + } + } + (permutations, masks) + }; +} + diff --git a/src/solvers/ocl.rs b/src/solvers/ocl.rs deleted file mode 100644 index 7c6bb16..0000000 --- a/src/solvers/ocl.rs +++ /dev/null @@ -1,29 +0,0 @@ -use ocl::ProQue; - -pub fn trivial() -> ocl::Result<()> { - let src = r#" - __kernel void add(__global float* buffer, float scalar) { - buffer[get_global_id(0)] += scalar; - } - "#; - - let pro_que = ProQue::builder().src(src).dims(1 << 20).build()?; - - let buffer = pro_que.create_buffer::()?; - - let kernel = pro_que - .kernel_builder("add") - .arg(&buffer) - .arg(10.0f32) - .build()?; - - unsafe { - kernel.enq()?; - } - - let mut vec = vec![0.0f32; buffer.len()]; - buffer.read(&mut vec).enq()?; - - println!("The value at index [{}] is now '{}'!", 200007, vec[200007]); - Ok(()) -} diff --git a/src/solvers/opencl.rs b/src/solvers/opencl.rs new file mode 100644 index 0000000..25b8119 --- /dev/null +++ b/src/solvers/opencl.rs @@ -0,0 +1,75 @@ +use lazy_static::*; +use ocl::ProQue; +#[macro_use] + +lazy_static! { + static ref BUFFER: ocl::Buffer = { + let platform = ocl::Platform::default(); + let device = ocl::Device::first(platform).unwrap(); + let context = ocl::Context::builder() + .platform(platform) + .devices(device.clone()) + .build() + .unwrap(); + let queue = ocl::Queue::new(&context, device, None).unwrap(); + + println!("created buffer!"); + ocl::Buffer::builder() + .queue(queue) + .copy_host_slice(crate::solvers::PERMUTATIONS.1.as_ref()) + .len(crate::solvers::PERMUTATIONS.1.len()) + .build() + .unwrap() + }; + static ref QUEUE: ocl::ProQue = { + let src = + std::fs::read_to_string("src/solvers/check.cl").expect("failed to open kernel file"); + ProQue::builder() + .src(src) + .dims(crate::solvers::PERMUTATIONS.0.len()) + .build() + .unwrap() + }; +} + +pub fn get_buffer() -> &'static ocl::Buffer { + &BUFFER +} + +pub fn check(permutations: &[u64], w: u32, n: u32, mask: u64, offset: usize) -> ocl::Result<()> { + //println!("read src!"); + let src = std::fs::read_to_string("src/solvers/check.cl").expect("failed to open kernel file"); + + //println!("created queue!"); + //println!("offset: {}", offset); + //println!("length: {}", permutations.len() - offset); + let pro_que = ProQue::builder() + .src(src) + .dims(permutations.len() - offset) + .build()?; + + let results = pro_que.create_buffer::()?; + let kernel = pro_que + .kernel_builder("check") + .arg(get_buffer()) + .arg(&results) + .arg(mask) + .arg(n) + .arg(w) + .arg(offset as u64) + //.global_work_offset(offset) + .build()?; + + //println!("starting calculation"); + unsafe { + kernel.enq()?; + } + + let mut vec = vec![0; results.len()]; + results.read(&mut vec).enq()?; + + if vec.iter().any(|x| *x != 0) { + println!("The resuts are now '{:?}'!", vec); + } + Ok(()) +} -- cgit v1.2.3-54-g00ecf