summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDennis Kobert <dennis@kobert.dev>2020-01-04 08:13:54 +0100
committerDennis Kobert <dennis@kobert.dev>2020-01-04 08:13:54 +0100
commit920a6729577d14ba9190abcb3a2c4087652228a4 (patch)
tree8790c755a933f46bbe414cfd1eb1b1be0fea12ad
parent862695a7374bc60368d09a7e695ae0b8aa3b97c2 (diff)
Add profiling extend gpu support
-rwxr-xr-xCargo.toml9
-rw-r--r--benches/simple.rs10
-rw-r--r--src/lib.rs12
-rwxr-xr-xsrc/main.rs13
-rw-r--r--src/solvers/check.cl21
-rwxr-xr-xsrc/solvers/intuitive.rs21
-rwxr-xr-xsrc/solvers/mod.rs25
-rw-r--r--src/solvers/ocl.rs29
-rw-r--r--src/solvers/opencl.rs75
9 files changed, 173 insertions, 42 deletions
diff --git a/Cargo.toml b/Cargo.toml
index 45650fb..3e2783d 100755
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -9,10 +9,19 @@ edition = "2018"
gpu = ["ocl"]
+[dev-dependencies]
+criterion = "0.2"
+
[dependencies]
num = "0.2"
rayon = "1.3"
permutohedron = "0.2"
permutation_way = { git = "https://github.com/corentinway/permutation_way_rs" }
+lazy_static = { version = "1.4", optional = false}
ocl = { version = "0.19", optional = true}
+
+
+[[bench]]
+name = "simple"
+harness = false
diff --git a/benches/simple.rs b/benches/simple.rs
new file mode 100644
index 0000000..cd00c9c
--- /dev/null
+++ b/benches/simple.rs
@@ -0,0 +1,10 @@
+use criterion::*;
+
+use babel::solve;
+
+pub fn criterion_benchmark(c: &mut Criterion) {
+ c.bench_function("wall 6", |b| b.iter(|| solve(black_box(6))));
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..5c239cc
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,12 @@
+pub mod solver;
+pub mod solvers;
+pub mod structs;
+#[macro_use]
+extern crate lazy_static;
+
+pub static N: u32 = 8;
+pub fn solve(n: u32) {
+ let mut solver = solvers::intuitive::NormalSolver::new(n);
+ solver.solve();
+}
+
diff --git a/src/main.rs b/src/main.rs
index 8a96f05..30acc60 100755
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,14 +1,11 @@
mod solver;
mod solvers;
mod structs;
+#[macro_use]
+pub extern crate lazy_static;
+pub static N: u32 = 8;
fn main() {
- #[cfg(feature = "gpu")]
- solvers::ocl::trivial();
- #[cfg(not(feature = "gpu"))]
- //let mut solver = solvers::incremental_block::IncrementalBlockSover::new(4);
- {
- let mut solver = solvers::intuitive::NormalSolver::new(8);
- solver.solve();
- }
+ let mut solver = solvers::intuitive::NormalSolver::new(N);
+ solver.solve();
}
diff --git a/src/solvers/check.cl b/src/solvers/check.cl
index d0ab4f0..94a81fc 100644
--- a/src/solvers/check.cl
+++ b/src/solvers/check.cl
@@ -1,4 +1,21 @@
-__kernel void check(__global int* permutations, __global int* checks, int n) {
- buffer[get_global_id(0)] += scalar;
+#pragma OPENCL EXTENSION cl_intel_printf : enable
+__kernel void check(__global unsigned long* permutations, __global int* results,
+ unsigned long mask, unsigned int n, unsigned int w, unsigned long offset) {
+ int id = get_global_id(0);
+ unsigned long curr_mask = mask | permutations[id + offset];
+
+ unsigned long tmask, sum, stones;
+ stones = tmask = sum = 0;
+ for (int i = 1; i <= w + 1; i++) {
+ if ((curr_mask & (1 << i)) == 0) {
+ stones += 1;
+ tmask |= 1 << (i - sum);
+ sum = i;
+ }
+ }
+ if (tmask == (1 << (n + 1)) - 2 && stones == n) {
+ printf("test");
+ results[id] = id;
+ }
}
diff --git a/src/solvers/intuitive.rs b/src/solvers/intuitive.rs
index 6fea69d..3caac73 100755
--- a/src/solvers/intuitive.rs
+++ b/src/solvers/intuitive.rs
@@ -1,6 +1,7 @@
use rayon::prelude::*;
/// Solve for a given N and return the resulting wall
+#[derive(Clone)]
pub struct NormalSolver {
pub n: u32,
/// calculated height [might not be correct!]
@@ -52,7 +53,7 @@ impl NormalSolver {
for (n, i) in self.permutations.iter().enumerate() {
let tmp: Vec<u32> = i.iter().map(|x| *x).collect();
//println!("perm {}: {:?}", n, tmp);
- //println!("perm {}: {:?}", n, self.masks[n]);
+ println!("perm {}: {:?}", n, self.masks[n]);
}
println!("calculate results");
self.permute(
@@ -84,6 +85,8 @@ impl NormalSolver {
}
}
if tmask == (1 << (self.n + 1)) - 2 && stones == self.n {
+ println!("tmask: {:b}", tmask);
+ println!("curr: {:b}", curr_mask);
//println!("success");
unsafe {
SOLUTIONS += 1;
@@ -96,11 +99,25 @@ impl NormalSolver {
}
let mut new_num = Vec::from(numbers);
let start = numbers[index as usize] / self.chunk;
+ if index as usize == numbers.len() - 1 {
+ #[cfg(feature = "gpu")]
+ {
+ crate::solvers::opencl::check(
+ self.masks.as_ref(),
+ self.w,
+ self.n,
+ curr_mask,
+ (start * self.chunk) as usize,
+ )
+ .unwrap();
+ return;
+ }
+ }
for i in start..self.n - (self.h - 1 - index as u32) {
for n in 1..(numbers.len() - index) {
new_num[n + index] = (n as u32 + i) * self.chunk;
}
- if index == 0 {
+ if index == 0 && false {
(0..self.chunk).into_par_iter().for_each(|j| {
let mut new_num = new_num.clone();
let tmp = i * self.chunk + j;
diff --git a/src/solvers/mod.rs b/src/solvers/mod.rs
index 9dc210b..0615eab 100755
--- a/src/solvers/mod.rs
+++ b/src/solvers/mod.rs
@@ -1,4 +1,27 @@
//pub mod incremental_block;
pub mod intuitive;
#[cfg(feature = "gpu")]
-pub mod ocl;
+pub mod opencl;
+
+lazy_static! {
+ pub static ref PERMUTATIONS: (Vec<Vec<u32>>, Vec<u64>) = {
+ let n = crate::N;
+ let mut heap = (1..=n).collect::<Vec<u32>>();
+ let heap = permutohedron::Heap::new(&mut heap);
+ let n_f = permutohedron::factorial(n as usize);
+ let mut permutations = Vec::with_capacity(n_f);
+
+ let mut masks: Vec<u64> = vec![0; n_f];
+ println!("Generating permutations");
+ for (j, data) in heap.enumerate() {
+ let mut sum = 0;
+ permutations.push(data.clone());
+ for stone in data.iter().take(n as usize - 1) {
+ sum += stone;
+ masks[j] |= 1 << sum;
+ }
+ }
+ (permutations, masks)
+ };
+}
+
diff --git a/src/solvers/ocl.rs b/src/solvers/ocl.rs
deleted file mode 100644
index 7c6bb16..0000000
--- a/src/solvers/ocl.rs
+++ /dev/null
@@ -1,29 +0,0 @@
-use ocl::ProQue;
-
-pub fn trivial() -> ocl::Result<()> {
- let src = r#"
- __kernel void add(__global float* buffer, float scalar) {
- buffer[get_global_id(0)] += scalar;
- }
- "#;
-
- let pro_que = ProQue::builder().src(src).dims(1 << 20).build()?;
-
- let buffer = pro_que.create_buffer::<f32>()?;
-
- let kernel = pro_que
- .kernel_builder("add")
- .arg(&buffer)
- .arg(10.0f32)
- .build()?;
-
- unsafe {
- kernel.enq()?;
- }
-
- let mut vec = vec![0.0f32; buffer.len()];
- buffer.read(&mut vec).enq()?;
-
- println!("The value at index [{}] is now '{}'!", 200007, vec[200007]);
- Ok(())
-}
diff --git a/src/solvers/opencl.rs b/src/solvers/opencl.rs
new file mode 100644
index 0000000..25b8119
--- /dev/null
+++ b/src/solvers/opencl.rs
@@ -0,0 +1,75 @@
+use lazy_static::*;
+use ocl::ProQue;
+#[macro_use]
+
+lazy_static! {
+ static ref BUFFER: ocl::Buffer<u64> = {
+ let platform = ocl::Platform::default();
+ let device = ocl::Device::first(platform).unwrap();
+ let context = ocl::Context::builder()
+ .platform(platform)
+ .devices(device.clone())
+ .build()
+ .unwrap();
+ let queue = ocl::Queue::new(&context, device, None).unwrap();
+
+ println!("created buffer!");
+ ocl::Buffer::builder()
+ .queue(queue)
+ .copy_host_slice(crate::solvers::PERMUTATIONS.1.as_ref())
+ .len(crate::solvers::PERMUTATIONS.1.len())
+ .build()
+ .unwrap()
+ };
+ static ref QUEUE: ocl::ProQue = {
+ let src =
+ std::fs::read_to_string("src/solvers/check.cl").expect("failed to open kernel file");
+ ProQue::builder()
+ .src(src)
+ .dims(crate::solvers::PERMUTATIONS.0.len())
+ .build()
+ .unwrap()
+ };
+}
+
+pub fn get_buffer() -> &'static ocl::Buffer<u64> {
+ &BUFFER
+}
+
+pub fn check(permutations: &[u64], w: u32, n: u32, mask: u64, offset: usize) -> ocl::Result<()> {
+ //println!("read src!");
+ let src = std::fs::read_to_string("src/solvers/check.cl").expect("failed to open kernel file");
+
+ //println!("created queue!");
+ //println!("offset: {}", offset);
+ //println!("length: {}", permutations.len() - offset);
+ let pro_que = ProQue::builder()
+ .src(src)
+ .dims(permutations.len() - offset)
+ .build()?;
+
+ let results = pro_que.create_buffer::<i32>()?;
+ let kernel = pro_que
+ .kernel_builder("check")
+ .arg(get_buffer())
+ .arg(&results)
+ .arg(mask)
+ .arg(n)
+ .arg(w)
+ .arg(offset as u64)
+ //.global_work_offset(offset)
+ .build()?;
+
+ //println!("starting calculation");
+ unsafe {
+ kernel.enq()?;
+ }
+
+ let mut vec = vec![0; results.len()];
+ results.read(&mut vec).enq()?;
+
+ if vec.iter().any(|x| *x != 0) {
+ println!("The resuts are now '{:?}'!", vec);
+ }
+ Ok(())
+}