summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDennis Kobert <dennis@kobert.dev>2020-01-12 03:47:14 +0100
committerDennis Kobert <dennis@kobert.dev>2020-01-12 03:47:14 +0100
commit1650906f010574e8810c8b0b98334e22fac5894d (patch)
treefe27a9d727e143353c1fcf0286890d549c443303
parent6b6f830f8e6d4c0b0d1328b7b22f810ad039d038 (diff)
Restructuring
-rw-r--r--[-rwxr-xr-x].gitignore0
-rw-r--r--[-rwxr-xr-x]Cargo.toml0
l---------gpu1
-rw-r--r--[-rwxr-xr-x]src/main.rs5
-rwxr-xr-xsrc/solver.rs23
-rw-r--r--src/solvers/gpu/check.cl (renamed from src/solvers/check.cl)0
-rw-r--r--src/solvers/gpu/host.rs235
-rw-r--r--src/solvers/gpu/manager.rs104
-rw-r--r--src/solvers/gpu/mod.rs56
-rw-r--r--src/solvers/gpu/output.rs96
-rw-r--r--src/solvers/gpusolver.rs3
-rw-r--r--[-rwxr-xr-x]src/solvers/mod.rs30
-rw-r--r--[-rwxr-xr-x]src/solvers/single.rs (renamed from src/solvers/intuitive.rs)0
-rw-r--r--[-rwxr-xr-x]src/structs.rs0
14 files changed, 522 insertions, 31 deletions
diff --git a/.gitignore b/.gitignore
index 2f7cd6f..2f7cd6f 100755..100644
--- a/.gitignore
+++ b/.gitignore
diff --git a/Cargo.toml b/Cargo.toml
index 0813fa4..0813fa4 100755..100644
--- a/Cargo.toml
+++ b/Cargo.toml
diff --git a/gpu b/gpu
new file mode 120000
index 0000000..188e3ab
--- /dev/null
+++ b/gpu
@@ -0,0 +1 @@
+src/solvers/gpu \ No newline at end of file
diff --git a/src/main.rs b/src/main.rs
index f9775ed..c2643c3 100755..100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,12 +1,11 @@
mod permutations;
-mod solver;
mod solvers;
mod structs;
-use crate::solver::{IteratorSolver, Solver};
+use crate::solvers::{IteratorSolver, Solver};
pub static N: u32 = 8;
fn main() {
- let mut solver = solvers::intuitive::NormalSolver::new(N);
+ let mut solver = solvers::single::NormalSolver::new(N);
solver.solve();
/*let solver = solvers::gpusolver::GpuSolver::new(N);
println!("solver: {:?}", solver);
diff --git a/src/solver.rs b/src/solver.rs
deleted file mode 100755
index db4e732..0000000
--- a/src/solver.rs
+++ /dev/null
@@ -1,23 +0,0 @@
-use crate::structs::StoneWall;
-
-/// calculate h and w
-pub fn wall_stats(n: u32) -> (u32, u32) {
- let h = (n >> 1) + 1;
- (h, (n - 1) * h)
-}
-
-pub trait Solver {
- fn new(n: u32) -> Self;
- fn n(&self) -> u32;
- fn h(&self) -> u32;
- fn w(&self) -> u32;
-}
-
-pub trait FirstSolver {
- fn solve(self) -> StoneWall;
-}
-
-pub trait IteratorSolver: Solver {
- type IntoIter: Iterator<Item=StoneWall>;
- fn solve(self) -> Self::IntoIter;
-}
diff --git a/src/solvers/check.cl b/src/solvers/gpu/check.cl
index af9bc51..af9bc51 100644
--- a/src/solvers/check.cl
+++ b/src/solvers/gpu/check.cl
diff --git a/src/solvers/gpu/host.rs b/src/solvers/gpu/host.rs
new file mode 100644
index 0000000..6b79078
--- /dev/null
+++ b/src/solvers/gpu/host.rs
@@ -0,0 +1,235 @@
+use ocl::{flags, Buffer, Context, Device, Kernel, Platform, Program, Queue};
+use std::sync::mpsc::{Receiver, Sender};
+
+#[derive(Debug)]
+pub struct Host {
+ #[allow(unused)]
+ platform: Platform,
+ #[allow(unused)]
+ device: Device,
+ #[allow(unused)]
+ context: Context,
+ program: Program,
+ queue: Queue,
+ n: u32,
+ h: u32,
+ w: u32,
+ /// Workgroup size, set to 0 for max
+ wg_size: usize,
+ permutations: Buffer<u64>,
+ rec_queues: Vec<RequestBuffer>,
+ walls: Vec<Vec<u32>>,
+}
+
+impl Host {
+ pub fn launch_sevice(
+ permutation_masks: &[u64],
+ n: u32,
+ h: u32,
+ w: u32,
+ mut wg_size: usize,
+ src: &str,
+ ) -> ocl::Result<Vec<Sender<Job>>> {
+ let platform = ocl::Platform::default();
+ let device = ocl::Device::first(platform)?;
+ let context = ocl::Context::builder()
+ .platform(platform)
+ .devices(device.clone())
+ .build()?;
+ let queue = ocl::Queue::new(&context, device, None)?;
+
+ let program = Program::builder()
+ .devices(device)
+ .src(src)
+ .build(&context)?;
+ let buffer = ocl::Buffer::builder()
+ .queue(queue.clone())
+ .flags(flags::MEM_READ_WRITE)
+ .copy_host_slice(permutation_masks)
+ .len(permutation_masks.len())
+ .build()?;
+
+ let mut senders = Vec::with_capacity((n - h + 1) as usize);
+ let mut receivers = Vec::with_capacity((n - h + 1) as usize);
+ let max_wg_size = device.max_wg_size()?;
+ if wg_size == 0 {
+ wg_size = max_wg_size;
+ } else if wg_size > max_wg_size {
+ return Err(ocl::Error::from("invalid workgroup size"));
+ }
+ for _ in 0..=(n - h) {
+ let (sx, rx) = std::sync::mpsc::channel();
+ senders.push(sx);
+ receivers.push(RequestBuffer::new(wg_size, rx));
+ }
+
+ let solver = Self {
+ platform,
+ device,
+ context,
+ program,
+ queue,
+ n,
+ h,
+ w,
+ wg_size,
+ permutations: buffer,
+ rec_queues: receivers,
+ walls: Vec::new(),
+ };
+ std::thread::Builder::new()
+ .name("GPU Deamon".into())
+ .spawn(move || {
+ solver.run();
+ })
+ .unwrap();
+ println!("started gpu thread");
+ Ok(senders)
+ }
+
+ fn get_dim(&self, queue: usize) -> usize {
+ let chunk = self.permutations.len() / self.n as usize;
+ let dim = (queue + 1) * chunk;
+ (dim + self.wg_size - 1) / self.wg_size * self.wg_size
+ }
+ fn get_off(&self, queue: usize) -> u64 {
+ let chunk = self.permutations.len() / self.n as usize;
+ let off = self.permutations.len() - chunk - self.get_dim(queue);
+ if off > isize::max_value() as usize {
+ panic!("workgroup size to big, offset underflow")
+ }
+ off as u64
+ }
+ fn get_res(&self, queue: usize) -> usize {
+ let dim = self.get_dim(queue);
+ dim * self.get_res_save_dim()
+ }
+ fn get_res_save_dim(&self) -> usize {
+ (self.wg_size + 63) / 64
+ }
+
+ fn run(mut self) -> ! {
+ let queues = self.rec_queues.len();
+ let mut instruction_buffer = Vec::with_capacity((self.n - self.h) as usize);
+ let mut result_buffer = Vec::with_capacity((self.n - self.h) as usize);
+
+ for i in 0..queues {
+ let buffer: Buffer<u64> = Buffer::builder()
+ .queue(self.queue.clone())
+ .len(self.wg_size)
+ .flags(flags::MEM_READ_WRITE)
+ .build()
+ .unwrap();
+
+ instruction_buffer.push(buffer);
+ let results: Buffer<u64> = Buffer::builder()
+ .queue(self.queue.clone())
+ .len(self.get_res(i))
+ .flags(flags::MEM_READ_WRITE)
+ .build()
+ .unwrap();
+ result_buffer.push(results);
+ }
+ println!("finished gpu setup");
+ for i in (0..self.rec_queues.len()).cycle() {
+ if let Some(buffer) = self.rec_queues[i].read() {
+ instruction_buffer[i].write(buffer).enq().unwrap();
+ let dim = self.get_dim(i);
+
+ //println!("dim: {}", dim);
+ //println!("off: {}", self.get_off(i));
+ //println!("result size: {}", self.get_res_save_dim());
+ let kernel = Kernel::builder()
+ .program(&self.program)
+ .name("check")
+ .queue(self.queue.clone())
+ .global_work_size(dim)
+ .arg(&self.permutations)
+ .arg(&result_buffer[i])
+ .arg(&instruction_buffer[i])
+ .arg_local::<u64>(self.wg_size)
+ .arg(self.n)
+ .arg(self.w)
+ .arg(self.get_off(i))
+ .build()
+ .unwrap();
+
+ unsafe {
+ kernel
+ .cmd()
+ .queue(&self.queue)
+ .global_work_offset(kernel.default_global_work_offset())
+ .global_work_size(dim)
+ .local_work_size(self.wg_size)
+ .enq()
+ .unwrap();
+ }
+
+ // (5) Read results from the device into a vector (`::block` not shown):
+ let mut result = vec![0u64; self.get_res(i)];
+ result_buffer[i]
+ .cmd()
+ .queue(&self.queue)
+ .offset(0)
+ .read(&mut result)
+ .enq()
+ .unwrap();
+ for (j, r) in result.iter().enumerate() {
+ if j == 0 {
+ continue;
+ }
+ for b in 0..64 {
+ if r & (1 << b) != 0 {
+ let permutation =
+ j / self.get_res_save_dim() + self.get_off(i) as usize;
+ let instruction = (j % self.get_res_save_dim()) * 64 + b;
+ let mut wall = self.rec_queues[i].get_rows()[instruction].clone();
+ wall.push(permutation as u32);
+ println!("{:?}", wall);
+ self.walls.push(wall);
+ }
+ }
+ }
+ }
+ }
+ panic!();
+ }
+}
+/*
+pub fn check(permutations: &[u64], w: u32, n: u32, mask: u64, offset: usize) -> ocl::Result<()> {
+ //println!("read src!");
+ let src = std::fs::read_to_string("src/solvers/check.cl").expect("failed to open kernel file");
+
+ //println!("created queue!");
+ println!("offset: {}", offset);
+ println!("length: {}", permutations.len() - offset);
+ let pro_que = ocl::ProQue::builder()
+ .src(src)
+ .dims(permutations.len() - offset)
+ .build()?;
+
+ let results = pro_que.create_buffer::<i32>()?;
+ let kernel = pro_que
+ .kernel_builder("check")
+ .arg(get_buffer())
+ .arg(&results)
+ .arg(mask)
+ .arg(n)
+ .arg(w)
+ .arg(offset as u64)
+ //.global_work_offset(offset)
+ .build()?;
+
+ //println!("starting calculation");
+ unsafe {
+ kernel.enq()?;
+ }
+
+ let mut vec = vec![0; results.len()];
+ results.read(&mut vec).enq()?;
+
+ if vec.iter().any(|x| *x != 0) {
+ println!("The resuts are now '{:?}'!", vec);
+ }
+ Ok(())
+}*/
diff --git a/src/solvers/gpu/manager.rs b/src/solvers/gpu/manager.rs
new file mode 100644
index 0000000..1dd6a4d
--- /dev/null
+++ b/src/solvers/gpu/manager.rs
@@ -0,0 +1,104 @@
+use std::sync::mpsc::{Receiver, Sender, channel};
+use std::thread::JoinHandle;
+use super::*;
+
+#[derive(Debug)]
+struct RequestBuffer {
+ mask_buff: Vec<u64>,
+ pointer: usize,
+}
+
+impl RequestBuffer {
+ pub fn new(size: usize) -> Self {
+ RequestBuffer {
+ mask_buff: vec![0; size],
+ pointer: 0,
+ }
+ }
+ pub fn read(&mut self, request: CheckRequest) -> Option<&[u64]> {
+ self.mask_buff[self.pointer] = request.bitmask;
+ self.pointer += 1;
+ if self.pointer == self.mask_buff.len() {
+ self.pointer = 0;
+ return Some(self.mask_buff.as_ref());
+ }
+ None
+ }
+}
+
+pub struct OclManager {
+ job_id: u64,
+ host_sender: Sender<Message>,
+ output_sender: Sender<Message>,
+ reciever: Receiver<Message>,
+ buffers: Vec<RequestBuffer>,
+ output_handle: JoinHandle<String>,
+ host_handle: JoinHandle<String>,
+}
+
+impl OclManager {
+ pub fn launch_sevice(
+ permutations: &[&[u32]],
+ permutations_mask: &[u64],
+ n: u32,
+ // Workgroup size, set to 0 for max
+ wg_size: u32,
+ ) -> (Sender<Message>, JoinHandle<String>) {
+ let (h, w) = crate::solvers::wall_stats(n);
+ let src = include_str!("check.cl");
+ let (output_sender, output_handle) =
+ super::output::Output::launch_sevice(permutations, permutations_mask, n, h, w);
+ let (host_sender, host_handle) =
+ super::host::Host::launch_sevice(permutations_mask, n, h, w, wg_size as usize, src);
+
+ let (receiver, sender) = channel();
+
+ let mut buffers = Vec::with_capacity((n - h + 1) as usize);
+ for _ in 0..=(n - h) {
+ buffers.push(RequestBuffer::new(wg_size as usize));
+ }
+
+ let manager = Self {
+ 0,
+ host_sender,
+ output_sender,
+ receiver,
+ buffers,
+ output_handle,
+ host_handle,
+ }
+ (sender,
+ std::thread::Builder::new()
+ .name("GPU Manager Deamon".into())
+ .spawn(move || {
+ manager.run();
+ })
+ .unwrap())
+
+ }
+
+ fn run(mut self) {
+ loop {
+ match self.reciever.recv().expect("Channel to GPU Manager broke") {
+ Message::CheckRequest(request) => {
+ if let Some(buffer) = self.buffers[request.queue as usize].read(request) {
+ self.host_sender
+ .send(Message::HostMessage((self.job_id, buffer.0.into())));
+ self.output_sender
+ .send(Message::OutputMessage((self.job_id, buffer.1.into())));
+ self.job_id += 1;
+ }
+ }
+ Message::Terminate => {
+ panic!("flush buffers");
+ self.host_sender.send(Message::Terminate);
+ self.host_handle.join();
+ self.output_sender.send(Message::Terminate);
+ self.output_handle.join();
+ return;
+ }
+ _ => println!("Invalid MessageType"),
+ }
+ }
+ }
+}
diff --git a/src/solvers/gpu/mod.rs b/src/solvers/gpu/mod.rs
new file mode 100644
index 0000000..f9ab711
--- /dev/null
+++ b/src/solvers/gpu/mod.rs
@@ -0,0 +1,56 @@
+pub mod host;
+pub mod manager;
+pub mod output;
+
+pub use manager::*;
+
+type MaskMessage = (u64, Vec<u64>);
+type RowMessage = (u64, Vec<Vec<u32>>);
+
+pub enum Message {
+ CheckRequest(CheckRequest),
+ HostMessage(MaskMessage),
+ OutputMessage(RowMessage),
+ Terminate,
+}
+
+pub struct ResultMessage {
+ data: Vec<u64>,
+ offset: usize,
+ size: usize,
+}
+
+impl ResultMessage {
+ fn new(data: Vec<u64>, offset: usize, size: usize) -> Self {
+ Self { data, offset, size }
+ }
+ fn valid_walls(&self, wg_size: usize) -> &[Vec<u32>] {
+ let mut result = vec![Vec::new(); wg_size];
+ for (j, r) in self.data.iter().enumerate() {
+ for b in 0..64 {
+ if r & (1 << b) != 0 {
+ let permutation = j / self.size + self.offset;
+ let instruction = (j % self.size) * 64 + b;
+ result[instruction].push(permutation as u32);
+ }
+ }
+ }
+ result.as_ref()
+ }
+}
+
+pub struct CheckRequest {
+ rows: Vec<u32>,
+ bitmask: u64,
+ queue: u32,
+}
+
+impl CheckRequest {
+ pub fn new(rows: Vec<u32>, bitmask: u64, queue: u32) -> Self {
+ Self {
+ rows,
+ bitmask,
+ queue,
+ }
+ }
+}
diff --git a/src/solvers/gpu/output.rs b/src/solvers/gpu/output.rs
new file mode 100644
index 0000000..58a4aa5
--- /dev/null
+++ b/src/solvers/gpu/output.rs
@@ -0,0 +1,96 @@
+use super::Message;
+use std::collections::{HashSet, HashMap};
+use std::sync::mpsc::{channel, Receiver, Sender};
+use std::thread::JoinHandle;
+
+struct InBuffer {
+ receiver: Receiver<Message>,
+ row_requests: HashMap<u64, Vec<u32>>,
+ results_requests: HashMap<u64, Vec<u64>>,
+
+}
+
+impl InBuffer {
+ fn new(receiver: Receiver<Message>) -> Self {
+ Self {
+ receiver,
+ row_requests: HashMap::new(),
+ results_requests: HashMap::new(),
+ }
+ }
+ fn read(&mut self) -> Option<Result> {
+ loop {
+ match self.receiver.recv() {
+ Message::OutputMessage((id, ResultMessage)) => {
+ if Some(result) = self.results_requests.get(id) {
+ Some(RowResult::new()
+ }
+ else {
+ self.row_requests.insert(id, walls);}
+ }
+ }
+ }
+
+}
+
+#[derive(PartialEq, Eq, Hash)]
+struct RowResult {
+ rows: Vec<u32>,
+}
+
+impl RowResult {
+ fn new(rows: Vec<u32>) -> Self {
+ rows.push(0);
+ Self { rows }
+ }
+ fn output(&self) {
+ println!("{:?}", self.rows);
+ }
+}
+
+pub struct Output {
+ input: InBuffer,
+ receiver: Receiver<Message>,
+ permutations: Vec<Vec<u32>>,
+ permutations_mask: Vec<u64>,
+ results: HashSet<RowResult>,
+}
+
+impl Output {
+ fn launch_sevice(
+ permutations: &[Vec<u32>],
+ permutation_masks: &[u64],
+ ) -> (Sender<Message>, JoinHandle<String>) {
+ let (sender, receiver) = channel();
+ let input = InBuffer::new(receiver);
+
+ let output = Self {
+ input,
+ permutations: permutations.into(),
+ permutation_masks: permutation_masks.into(),
+ HashSet::new(),
+ }
+ (
+ sender,
+ std::thread::Builder::new()
+ .name("GPU Manager Deamon".into())
+ .spawn(move || {
+ output.run();
+ })
+ .unwrap(),
+ )
+ }
+
+ fn run(mut self) {
+ loop {
+ match self.receiver.recv() {
+ Message::OutputMessage((id, ResultMessage)) => {
+ if Some(result) = self.results_requests.get(id) {
+ Some(RowResult::new()
+ }
+ else {
+ self.row_requests.insert(id, walls);}
+ }
+ }
+ }
+}
diff --git a/src/solvers/gpusolver.rs b/src/solvers/gpusolver.rs
index 3a96568..41de7e7 100644
--- a/src/solvers/gpusolver.rs
+++ b/src/solvers/gpusolver.rs
@@ -1,6 +1,5 @@
use crate::permutations::PermutationGenerator;
-use crate::solver::{wall_stats, IteratorSolver, Solver};
-use crate::solvers::opencl;
+use crate::solvers::{opencl, wall_stats, IteratorSolver, Solver};
use crate::structs::StoneWall;
#[derive(Debug)]
diff --git a/src/solvers/mod.rs b/src/solvers/mod.rs
index 1bdc228..189deca 100755..100644
--- a/src/solvers/mod.rs
+++ b/src/solvers/mod.rs
@@ -1,5 +1,29 @@
//pub mod incremental_block;
-pub mod intuitive;
-//#[cfg(feature = "gpu")]
+pub mod gpu;
pub mod gpusolver;
-pub mod opencl;
+pub mod single;
+
+use crate::structs::StoneWall;
+pub use gpu::*;
+
+/// calculate h and w
+pub fn wall_stats(n: u32) -> (u32, u32) {
+ let h = (n >> 1) + 1;
+ (h, (n - 1) * h)
+}
+
+pub trait Solver {
+ fn new(n: u32) -> Self;
+ fn n(&self) -> u32;
+ fn h(&self) -> u32;
+ fn w(&self) -> u32;
+}
+
+pub trait FirstSolver {
+ fn solve(self) -> StoneWall;
+}
+
+pub trait IteratorSolver: Solver {
+ type IntoIter: Iterator<Item = StoneWall>;
+ fn solve(self) -> Self::IntoIter;
+}
diff --git a/src/solvers/intuitive.rs b/src/solvers/single.rs
index ad3e5b7..ad3e5b7 100755..100644
--- a/src/solvers/intuitive.rs
+++ b/src/solvers/single.rs
diff --git a/src/structs.rs b/src/structs.rs
index c61bb14..c61bb14 100755..100644
--- a/src/structs.rs
+++ b/src/structs.rs