summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDennis Kobert <dennis@kobert.dev>2020-01-12 06:36:06 +0100
committerDennis Kobert <dennis@kobert.dev>2020-01-12 06:36:06 +0100
commitcf6c285d0960526843f849a9033ff1895fc8cab7 (patch)
treecaa08ac9f44284852e395c4d9e41bdf8ed16e763
parent29bffc6f6c794fee886904ad3960c4cb770deb11 (diff)
Fix borrow mustakesrefactoring
-rw-r--r--src/main.rs2
-rw-r--r--src/solvers/gpu/host.rs172
-rw-r--r--src/solvers/gpu/manager.rs22
-rw-r--r--src/solvers/gpu/mod.rs6
-rw-r--r--src/solvers/gpu/output.rs17
-rw-r--r--src/solvers/gpusolver.rs15
-rw-r--r--src/solvers/single.rs43
7 files changed, 132 insertions, 145 deletions
diff --git a/src/main.rs b/src/main.rs
index c2643c3..f18f2f8 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -3,7 +3,7 @@ mod solvers;
mod structs;
use crate::solvers::{IteratorSolver, Solver};
-pub static N: u32 = 8;
+pub static N: u32 = 4;
fn main() {
let mut solver = solvers::single::NormalSolver::new(N);
solver.solve();
diff --git a/src/solvers/gpu/host.rs b/src/solvers/gpu/host.rs
index c53c42c..39843c4 100644
--- a/src/solvers/gpu/host.rs
+++ b/src/solvers/gpu/host.rs
@@ -1,6 +1,6 @@
-use super::{HostMessage, Message, ResultMessage};
+use super::{Message, ResultMessage};
use ocl::{flags, Buffer, Context, Device, Kernel, Platform, Program, Queue};
-use std::sync::mpsc::{Receiver, Sender};
+use std::sync::mpsc::{channel, Receiver, Sender};
#[derive(Debug)]
pub struct Host {
@@ -19,7 +19,7 @@ pub struct Host {
wg_size: usize,
permutations: Buffer<u64>,
receiver: Receiver<Message>,
- walls: Vec<Vec<u32>>,
+ output_sender: Sender<Message>,
}
impl Host {
@@ -30,6 +30,7 @@ impl Host {
w: u32,
mut wg_size: usize,
src: &str,
+ output_sender: Sender<Message>,
) -> ocl::Result<(Sender<Message>, std::thread::JoinHandle<()>)> {
let platform = ocl::Platform::default();
let device = ocl::Device::first(platform)?;
@@ -50,6 +51,7 @@ impl Host {
.len(permutation_masks.len())
.build()?;
+ let (sender, receiver) = channel();
let max_wg_size = device.max_wg_size()?;
if wg_size == 0 {
wg_size = max_wg_size;
@@ -68,17 +70,17 @@ impl Host {
w,
wg_size,
permutations: buffer,
- rec_queues: receivers,
- walls: Vec::new(),
+ receiver,
+ output_sender,
};
- std::thread::Builder::new()
+ let handle = std::thread::Builder::new()
.name("GPU Deamon".into())
.spawn(move || {
solver.run();
})
.unwrap();
println!("started gpu thread");
- Ok(senders)
+ Ok((sender, handle))
}
fn get_dim(&self, queue: usize) -> usize {
@@ -86,13 +88,13 @@ impl Host {
let dim = (queue + 1) * chunk;
(dim + self.wg_size - 1) / self.wg_size * self.wg_size
}
- fn get_off(&self, queue: usize) -> u64 {
+ fn get_off(&self, queue: usize) -> usize {
let chunk = self.permutations.len() / self.n as usize;
let off = self.permutations.len() - chunk - self.get_dim(queue);
if off > isize::max_value() as usize {
panic!("workgroup size to big, offset underflow")
}
- off as u64
+ off
}
fn get_res(&self, queue: usize) -> usize {
let dim = self.get_dim(queue);
@@ -102,8 +104,8 @@ impl Host {
(self.wg_size + 63) / 64
}
- fn run(mut self) -> ! {
- let queues = self.rec_queues.len();
+ fn run(self) {
+ let queues = (self.n - self.h + 1) as usize;
let mut instruction_buffer = Vec::with_capacity((self.n - self.h) as usize);
let mut result_buffer = Vec::with_capacity((self.n - self.h) as usize);
@@ -125,105 +127,71 @@ impl Host {
result_buffer.push(results);
}
println!("finished gpu setup");
- for i in (0..self.rec_queues.len()).cycle() {
- if let Some(buffer) = self.rec_queues[i].read() {
- instruction_buffer[i].write(buffer).enq().unwrap();
- let dim = self.get_dim(i);
+ loop {
+ match self.receiver.recv().expect("Channel to Host broke") {
+ Message::Terminate => {
+ self.output_sender.send(Message::Terminate);
+ return;
+ }
+ Message::HostMessage((id, i, buffer)) => {
+ let i = i as usize;
+ let off = self.get_off(i);
+ let dim = self.get_dim(i);
+ let res = self.get_res(i);
+ let res_size = self.get_res_save_dim();
+
+ instruction_buffer[i].write(&buffer).enq().unwrap();
+
+ //println!("dim: {}", dim);
+ //println!("off: {}", self.get_off(i));
+ //println!("result size: {}", self.get_res_save_dim());
+ let kernel = Kernel::builder()
+ .program(&self.program)
+ .name("check")
+ .queue(self.queue.clone())
+ .global_work_size(dim)
+ .arg(&self.permutations)
+ .arg(&result_buffer[i])
+ .arg(&instruction_buffer[i])
+ .arg_local::<u64>(self.wg_size)
+ .arg(self.n)
+ .arg(self.w)
+ .arg(off)
+ .build()
+ .unwrap();
- //println!("dim: {}", dim);
- //println!("off: {}", self.get_off(i));
- //println!("result size: {}", self.get_res_save_dim());
- let kernel = Kernel::builder()
- .program(&self.program)
- .name("check")
- .queue(self.queue.clone())
- .global_work_size(dim)
- .arg(&self.permutations)
- .arg(&result_buffer[i])
- .arg(&instruction_buffer[i])
- .arg_local::<u64>(self.wg_size)
- .arg(self.n)
- .arg(self.w)
- .arg(self.get_off(i))
- .build()
- .unwrap();
+ unsafe {
+ kernel
+ .cmd()
+ .queue(&self.queue)
+ .global_work_offset(kernel.default_global_work_offset())
+ .global_work_size(dim)
+ .local_work_size(self.wg_size)
+ .enq()
+ .unwrap();
+ }
- unsafe {
- kernel
+ // (5) Read results from the device into a vector (`::block` not shown):
+ let mut data = vec![0u64; res];
+ result_buffer[i]
.cmd()
.queue(&self.queue)
- .global_work_offset(kernel.default_global_work_offset())
- .global_work_size(dim)
- .local_work_size(self.wg_size)
+ .offset(0)
+ .read(&mut data)
.enq()
.unwrap();
+ self.output_sender
+ .send(Message::ResultMessage(ResultMessage::new(
+ data,
+ off,
+ res_size,
+ self.wg_size,
+ id,
+ )))
+ .unwrap();
}
-
- // (5) Read results from the device into a vector (`::block` not shown):
- let mut result = vec![0u64; self.get_res(i)];
- result_buffer[i]
- .cmd()
- .queue(&self.queue)
- .offset(0)
- .read(&mut result)
- .enq()
- .unwrap();
- for (j, r) in result.iter().enumerate() {
- if j == 0 {
- continue;
- }
- for b in 0..64 {
- if r & (1 << b) != 0 {
- let permutation =
- j / self.get_res_save_dim() + self.get_off(i) as usize;
- let instruction = (j % self.get_res_save_dim()) * 64 + b;
- let mut wall = self.rec_queues[i].get_rows()[instruction].clone();
- wall.push(permutation as u32);
- println!("{:?}", wall);
- self.walls.push(wall);
- }
- }
- }
+ _ => println!("Invalid MessageType"),
}
}
- panic!();
}
}
-/*
-pub fn check(permutations: &[u64], w: u32, n: u32, mask: u64, offset: usize) -> ocl::Result<()> {
- //println!("read src!");
- let src = std::fs::read_to_string("src/solvers/check.cl").expect("failed to open kernel file");
-
- //println!("created queue!");
- println!("offset: {}", offset);
- println!("length: {}", permutations.len() - offset);
- let pro_que = ocl::ProQue::builder()
- .src(src)
- .dims(permutations.len() - offset)
- .build()?;
-
- let results = pro_que.create_buffer::<i32>()?;
- let kernel = pro_que
- .kernel_builder("check")
- .arg(get_buffer())
- .arg(&results)
- .arg(mask)
- .arg(n)
- .arg(w)
- .arg(offset as u64)
- //.global_work_offset(offset)
- .build()?;
-
- //println!("starting calculation");
- unsafe {
- kernel.enq()?;
- }
-
- let mut vec = vec![0; results.len()];
- results.read(&mut vec).enq()?;
-
- if vec.iter().any(|x| *x != 0) {
- println!("The resuts are now '{:?}'!", vec);
- }
- Ok(())
-}*/
diff --git a/src/solvers/gpu/manager.rs b/src/solvers/gpu/manager.rs
index b3d88b8..e210af2 100644
--- a/src/solvers/gpu/manager.rs
+++ b/src/solvers/gpu/manager.rs
@@ -51,8 +51,16 @@ impl OclManager {
let src = include_str!("check.cl");
let (output_sender, output_handle) =
super::output::Output::launch_sevice(permutations, permutations_mask);
- let (host_sender, host_handle) =
- super::host::Host::launch_sevice(permutations_mask, n, h, w, wg_size as usize, src);
+ let (host_sender, host_handle) = super::host::Host::launch_sevice(
+ permutations_mask,
+ n,
+ h,
+ w,
+ wg_size as usize,
+ src,
+ output_sender.clone(),
+ )
+ .unwrap();
let (sender, receiver) = channel();
@@ -85,11 +93,15 @@ impl OclManager {
loop {
match self.receiver.recv().expect("Channel to GPU Manager broke") {
Message::CheckRequest(request) => {
- if let Some(buffer) = self.buffers[request.queue as usize].read(request) {
+ let queue = request.queue;
+ println!("{}", queue);
+ if let Some(buffer) = self.buffers[queue as usize].read(request) {
self.host_sender
- .send(Message::HostMessage((self.job_id, buffer.0.into())));
+ .send(Message::HostMessage((self.job_id, queue, buffer.0.into())))
+ .unwrap();
self.output_sender
- .send(Message::OutputMessage((self.job_id, buffer.1.into())));
+ .send(Message::OutputMessage((self.job_id, buffer.1.into())))
+ .unwrap();
self.job_id += 1;
}
}
diff --git a/src/solvers/gpu/mod.rs b/src/solvers/gpu/mod.rs
index f147f31..2c7f69d 100644
--- a/src/solvers/gpu/mod.rs
+++ b/src/solvers/gpu/mod.rs
@@ -4,7 +4,7 @@ pub mod output;
pub use manager::*;
-type MaskMessage = (u64, Vec<u64>);
+type MaskMessage = (u64, u32, Vec<u64>);
type RowMessage = (u64, Vec<Vec<u32>>);
pub enum Message {
@@ -33,7 +33,7 @@ impl ResultMessage {
id,
}
}
- fn valid_walls(&self) -> &[Vec<u32>] {
+ fn valid_walls(&self) -> Vec<Vec<u32>> {
let mut result = vec![Vec::new(); self.wg_size];
for (j, r) in self.data.iter().enumerate() {
for b in 0..64 {
@@ -44,7 +44,7 @@ impl ResultMessage {
}
}
}
- result.as_ref()
+ result
}
}
diff --git a/src/solvers/gpu/output.rs b/src/solvers/gpu/output.rs
index a716340..b35d072 100644
--- a/src/solvers/gpu/output.rs
+++ b/src/solvers/gpu/output.rs
@@ -26,14 +26,20 @@ impl InBuffer {
{
Message::ResultMessage(results) => {
if let Some(result_walls) = self.row_requests.get(&results.id) {
- return Some(Self::calc_results(results.valid_walls(), result_walls));
+ return Some(Self::calc_results(
+ results.valid_walls().as_ref(),
+ result_walls,
+ ));
} else {
self.results_requests.insert(results.id, results);
}
}
Message::OutputMessage((id, output)) => {
if let Some(results) = self.results_requests.get(&id) {
- return Some(Self::calc_results(results.valid_walls(), output.as_ref()));
+ return Some(Self::calc_results(
+ results.valid_walls().as_ref(),
+ output.as_ref(),
+ ));
} else {
self.row_requests.insert(id, output);
}
@@ -48,10 +54,10 @@ impl InBuffer {
}
}
fn calc_results(res_req: &[Vec<u32>], row_req: &[Vec<u32>]) -> Vec<RowResult> {
- let out = Vec::new();
+ let mut out = Vec::new();
for (rows, perms) in row_req.iter().zip(res_req.iter()) {
for p in perms {
- let new = rows.clone();
+ let mut new = rows.clone();
new.push(*p);
out.push(RowResult::new(new));
}
@@ -66,7 +72,7 @@ pub struct RowResult {
}
impl RowResult {
- fn new(rows: Vec<u32>) -> Self {
+ fn new(mut rows: Vec<u32>) -> Self {
rows.push(0);
Self { rows }
}
@@ -117,6 +123,7 @@ impl Output {
for wall in self.results {
wall.output()
}
+ return;
}
}
}
diff --git a/src/solvers/gpusolver.rs b/src/solvers/gpusolver.rs
index 41de7e7..3bf8429 100644
--- a/src/solvers/gpusolver.rs
+++ b/src/solvers/gpusolver.rs
@@ -1,5 +1,5 @@
use crate::permutations::PermutationGenerator;
-use crate::solvers::{opencl, wall_stats, IteratorSolver, Solver};
+use crate::solvers::{gpu, wall_stats, IteratorSolver, Solver};
use crate::structs::StoneWall;
#[derive(Debug)]
@@ -16,12 +16,15 @@ impl GpuSolver {
let src =
std::fs::read_to_string("src/solvers/check.cl").expect("failed to open kernel file");
- let senders =
- opencl::GpuSolver::launch_sevice(&self.masks, self.n, self.h, self.w, 4, src.as_ref())
- .unwrap();
+ let (sender, handle) =
+ gpu::OclManager::launch_sevice(&self.permutations, &self.masks, self.n, 4);
for i in 0..12 {
- senders[1 - i / 6]
- .send(opencl::Job::new(vec![i as u32], self.masks[i]))
+ sender
+ .send(gpu::Message::CheckRequest(gpu::CheckRequest::new(
+ vec![i as u32],
+ self.masks[i],
+ 1 - i as u32 / 6,
+ )))
.unwrap();
}
loop {
diff --git a/src/solvers/single.rs b/src/solvers/single.rs
index ad3e5b7..6bfdd5f 100644
--- a/src/solvers/single.rs
+++ b/src/solvers/single.rs
@@ -1,7 +1,6 @@
-use rayon::prelude::*;
+use super::gpu::*;
/// Solve for a given N and return the resulting wall
-#[derive(Clone)]
pub struct NormalSolver {
pub n: u32,
/// calculated height [might not be correct!]
@@ -13,12 +12,10 @@ pub struct NormalSolver {
/// Use to store already used blocks as a bitmask
permutations: Vec<Vec<u32>>,
masks: Vec<u64>,
- senders: Vec<std::sync::mpsc::Sender<super::opencl::Job>>,
+ gpu_sender: std::sync::mpsc::Sender<super::gpu::Message>,
+ gpu_handle: Option<std::thread::JoinHandle<()>>,
}
-static mut TRIES: u32 = 0;
-static mut SOLUTIONS: u32 = 0;
-
impl NormalSolver {
pub fn new(n: u32) -> Self {
let h = n / 2 + 1;
@@ -40,11 +37,8 @@ impl NormalSolver {
}
}
- let src =
- std::fs::read_to_string("src/solvers/check.cl").expect("failed to open kernel file");
-
- let senders =
- super::opencl::GpuSolver::launch_sevice(&masks, n, h, w, 0, src.as_ref()).unwrap();
+ let (gpu_sender, gpu_handle) =
+ super::gpu::OclManager::launch_sevice(&permutations, &masks, n, 0);
Self {
n,
h,
@@ -53,16 +47,17 @@ impl NormalSolver {
mask: (1 << w) - 2,
permutations,
masks,
- senders,
+ gpu_sender,
+ gpu_handle: Some(gpu_handle),
}
}
pub fn solve(&mut self) {
- for (n, i) in self.permutations.iter().enumerate() {
- let tmp: Vec<u32> = i.iter().map(|x| *x).collect();
- //println!("perm {}: {:?}", n, tmp);
- //println!("perm {}: {:b}", n, self.masks[n]);
- }
+ //for (n, i) in self.permutations.iter().enumerate() {
+ //let tmp: Vec<u32> = i.clone();
+ //println!("perm {}: {:?}", n, tmp);
+ //println!("perm {}: {:b}", n, self.masks[n]);
+ //}
println!("calculate results");
self.permute(
permutohedron::factorial(self.n as usize),
@@ -73,10 +68,10 @@ impl NormalSolver {
.collect::<Vec<u32>>())
.as_ref(),
);
- unsafe { println!("tries: {}\nsolutions: {}", TRIES, SOLUTIONS) }
- loop {
- std::thread::sleep(std::time::Duration::from_secs(5));
- }
+ self.gpu_sender
+ .send(super::gpu::Message::Terminate)
+ .unwrap();
+ self.gpu_handle.take().unwrap().join().unwrap();
}
fn permute(&self, up: usize, index: usize, curr_mask: u64, numbers: &[u32]) {
@@ -95,8 +90,10 @@ impl NormalSolver {
println!("mem wait {:?}", info);
}
let i = self.n - 2 - numbers[index] / self.chunk;
- self.senders[i as usize]
- .send(super::opencl::Job::new(new_num, curr_mask))
+ self.gpu_sender
+ .send(Message::CheckRequest(CheckRequest::new(
+ new_num, curr_mask, i,
+ )))
.unwrap();
return;
//}