summaryrefslogtreecommitdiff
path: root/src/solvers/gpusolver.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/solvers/gpusolver.rs')
-rw-r--r--src/solvers/gpusolver.rs95
1 files changed, 79 insertions, 16 deletions
diff --git a/src/solvers/gpusolver.rs b/src/solvers/gpusolver.rs
index 3bf8429..371f116 100644
--- a/src/solvers/gpusolver.rs
+++ b/src/solvers/gpusolver.rs
@@ -1,36 +1,97 @@
use crate::permutations::PermutationGenerator;
-use crate::solvers::{gpu, wall_stats, IteratorSolver, Solver};
-use crate::structs::StoneWall;
+use crate::solvers::gpu::*;
+use crate::solvers::{wall_stats, IteratorSolver, Solver};
+use rayon::prelude::*;
+use std::sync::mpsc::Sender;
#[derive(Debug)]
pub struct GpuSolver {
n: u32,
h: u32,
w: u32,
+ chunk: u32,
permutations: Vec<Vec<u32>>,
masks: Vec<u64>,
}
impl GpuSolver {
- fn solve_to_vec(&mut self) -> Vec<StoneWall> {
- let src =
- std::fs::read_to_string("src/solvers/check.cl").expect("failed to open kernel file");
-
+ fn solve_to_vec(&mut self) -> Vec<RowResult> {
+ let (sender, receiver) = std::sync::mpsc::channel();
let (sender, handle) =
- gpu::OclManager::launch_sevice(&self.permutations, &self.masks, self.n, 4);
- for i in 0..12 {
+ OclManager::launch_sevice(&self.permutations, &self.masks, self.n, 0, sender);
+ let chunk = permutohedron::factorial(self.n as usize - 1) as u32;
+ self.permute(
+ 0,
+ 0,
+ ((0..(self.h - 1)).map(|x| x * chunk).collect::<Vec<u32>>()).as_ref(),
+ sender.clone(),
+ );
+ sender.send(Message::CpuDone).unwrap();
+ let mut walls = Vec::new();
+ while let Ok(Message::RowResult(wall)) = receiver.recv() {
+ walls.push(wall);
+ }
+ println!("{:?}", walls);
+ handle.join().unwrap();
+ walls
+ }
+
+ fn permute(&self, index: usize, curr_mask: u64, numbers: &[u32], sender: Sender<Message>) {
+ if curr_mask.count_ones() < index as u32 * (self.n - 1) {
+ return;
+ }
+ let mut new_num = Vec::from(numbers);
+ let start = numbers[index as usize] / self.chunk;
+ if index as usize == numbers.len() - 1 {
+ let mut info = sys_info::mem_info().unwrap();
+ while info.avail < info.total / 8 {
+ std::thread::sleep(std::time::Duration::from_millis(5));
+ info = sys_info::mem_info().unwrap();
+ println!("mem wait {:?}", info);
+ }
+ let i = self.n - 2 - numbers[index] / self.chunk;
sender
- .send(gpu::Message::CheckRequest(gpu::CheckRequest::new(
- vec![i as u32],
- self.masks[i],
- 1 - i as u32 / 6,
+ .send(Message::CheckRequest(CheckRequest::new(
+ new_num, curr_mask, i,
)))
.unwrap();
+ return;
}
- loop {
- std::thread::sleep(std::time::Duration::from_secs(5));
+ for i in start..self.n - (self.h - 1 - index as u32) {
+ for n in 1..(numbers.len() - index) {
+ new_num[n + index] = (n as u32 + i) * self.chunk;
+ }
+ if index == 0 {
+ let senders: Vec<_> = (0..self.chunk).map(|_| sender.clone()).collect();
+ (0..self.chunk)
+ .into_par_iter()
+ .zip(senders)
+ .for_each(|(j, sender)| {
+ let mut new_num = new_num.clone();
+ let tmp = i * self.chunk + j;
+ new_num[index] = tmp;
+ self.permute(
+ index + 1,
+ curr_mask | self.masks[tmp as usize],
+ &new_num,
+ sender,
+ );
+ });
+ } else {
+ for j in 0..self.chunk {
+ new_num[index] = i * self.chunk + j;
+ if index == 0 {
+ println!("progress: {}%", j as f64 / self.chunk as f64);
+ }
+ self.permute(
+ index + 1,
+ curr_mask | self.masks[new_num[index] as usize],
+ &new_num,
+ sender.clone(),
+ );
+ }
+ }
}
- vec![]
}
}
@@ -59,10 +120,12 @@ impl Solver for GpuSolver {
let (h, w) = wall_stats(n);
let permutations = generate_permutations(n);
let masks = generate_masks(&permutations);
+ let chunk = permutohedron::factorial(n as usize - 1) as u32;
Self {
n,
h,
w,
+ chunk,
permutations,
masks,
}
@@ -79,7 +142,7 @@ impl Solver for GpuSolver {
}
impl IteratorSolver for GpuSolver {
- type IntoIter = std::vec::IntoIter<StoneWall>;
+ type IntoIter = std::vec::IntoIter<RowResult>;
fn solve(mut self) -> Self::IntoIter {
self.solve_to_vec().into_iter()
}