From 1650906f010574e8810c8b0b98334e22fac5894d Mon Sep 17 00:00:00 2001 From: Dennis Kobert Date: Sun, 12 Jan 2020 03:47:14 +0100 Subject: Restructuring --- src/solvers/check.cl | 43 --------- src/solvers/gpu/check.cl | 43 +++++++++ src/solvers/gpu/host.rs | 235 +++++++++++++++++++++++++++++++++++++++++++++ src/solvers/gpu/manager.rs | 104 ++++++++++++++++++++ src/solvers/gpu/mod.rs | 56 +++++++++++ src/solvers/gpu/output.rs | 96 ++++++++++++++++++ src/solvers/gpusolver.rs | 3 +- src/solvers/intuitive.rs | 136 -------------------------- src/solvers/mod.rs | 30 +++++- src/solvers/single.rs | 136 ++++++++++++++++++++++++++ 10 files changed, 698 insertions(+), 184 deletions(-) delete mode 100644 src/solvers/check.cl create mode 100644 src/solvers/gpu/check.cl create mode 100644 src/solvers/gpu/host.rs create mode 100644 src/solvers/gpu/manager.rs create mode 100644 src/solvers/gpu/mod.rs create mode 100644 src/solvers/gpu/output.rs delete mode 100755 src/solvers/intuitive.rs mode change 100755 => 100644 src/solvers/mod.rs create mode 100644 src/solvers/single.rs (limited to 'src/solvers') diff --git a/src/solvers/check.cl b/src/solvers/check.cl deleted file mode 100644 index af9bc51..0000000 --- a/src/solvers/check.cl +++ /dev/null @@ -1,43 +0,0 @@ -//#pragma OPENCL EXTENSION cl_intel_printf : enable -typedef ulong u64; -typedef uint u32; - -__kernel void check(__global u64* permutations, __global u64* results, - __global u64* instructions, __local u64* i_buffer, u32 n, u32 w, u64 offset) { - int wid = get_local_id(0); - i_buffer[wid] = instructions[wid]; - barrier(CLK_LOCAL_MEM_FENCE); - - int gid = get_global_id(0); - int gsize = get_global_size(0); - int wsize = get_local_size(0); - - u32 w64 = wsize / 64 + !!(wsize & 63); - u64 result = 0; - u64 own = permutations[gid + offset]; - u64 correct_mask = (1 << (n + 1)) - 2; - - - int j; - for (j = 0; j < wsize; j++) { - unsigned long curr_mask = i_buffer[j] | own; - unsigned long tmask, sum, stones; - stones = tmask = sum = 0; - for (int i = 1; i <= w + 1; i++) { - if (!(curr_mask & (1 << i))) { - stones++; - tmask |= 1 << (i - sum); - sum = i; - } - } - if (tmask == correct_mask && stones == n) { - result |= 1 << (j & 63); - } - if (j % 64 == 0 && j) { - results[gid * w64 + j / 64] = result; - result = 0; - } - } - results[gid * w64 + j / 64] = result; -} - diff --git a/src/solvers/gpu/check.cl b/src/solvers/gpu/check.cl new file mode 100644 index 0000000..af9bc51 --- /dev/null +++ b/src/solvers/gpu/check.cl @@ -0,0 +1,43 @@ +//#pragma OPENCL EXTENSION cl_intel_printf : enable +typedef ulong u64; +typedef uint u32; + +__kernel void check(__global u64* permutations, __global u64* results, + __global u64* instructions, __local u64* i_buffer, u32 n, u32 w, u64 offset) { + int wid = get_local_id(0); + i_buffer[wid] = instructions[wid]; + barrier(CLK_LOCAL_MEM_FENCE); + + int gid = get_global_id(0); + int gsize = get_global_size(0); + int wsize = get_local_size(0); + + u32 w64 = wsize / 64 + !!(wsize & 63); + u64 result = 0; + u64 own = permutations[gid + offset]; + u64 correct_mask = (1 << (n + 1)) - 2; + + + int j; + for (j = 0; j < wsize; j++) { + unsigned long curr_mask = i_buffer[j] | own; + unsigned long tmask, sum, stones; + stones = tmask = sum = 0; + for (int i = 1; i <= w + 1; i++) { + if (!(curr_mask & (1 << i))) { + stones++; + tmask |= 1 << (i - sum); + sum = i; + } + } + if (tmask == correct_mask && stones == n) { + result |= 1 << (j & 63); + } + if (j % 64 == 0 && j) { + results[gid * w64 + j / 64] = result; + result = 0; + } + } + results[gid * w64 + j / 64] = result; +} + diff --git a/src/solvers/gpu/host.rs b/src/solvers/gpu/host.rs new file mode 100644 index 0000000..6b79078 --- /dev/null +++ b/src/solvers/gpu/host.rs @@ -0,0 +1,235 @@ +use ocl::{flags, Buffer, Context, Device, Kernel, Platform, Program, Queue}; +use std::sync::mpsc::{Receiver, Sender}; + +#[derive(Debug)] +pub struct Host { + #[allow(unused)] + platform: Platform, + #[allow(unused)] + device: Device, + #[allow(unused)] + context: Context, + program: Program, + queue: Queue, + n: u32, + h: u32, + w: u32, + /// Workgroup size, set to 0 for max + wg_size: usize, + permutations: Buffer, + rec_queues: Vec, + walls: Vec>, +} + +impl Host { + pub fn launch_sevice( + permutation_masks: &[u64], + n: u32, + h: u32, + w: u32, + mut wg_size: usize, + src: &str, + ) -> ocl::Result>> { + let platform = ocl::Platform::default(); + let device = ocl::Device::first(platform)?; + let context = ocl::Context::builder() + .platform(platform) + .devices(device.clone()) + .build()?; + let queue = ocl::Queue::new(&context, device, None)?; + + let program = Program::builder() + .devices(device) + .src(src) + .build(&context)?; + let buffer = ocl::Buffer::builder() + .queue(queue.clone()) + .flags(flags::MEM_READ_WRITE) + .copy_host_slice(permutation_masks) + .len(permutation_masks.len()) + .build()?; + + let mut senders = Vec::with_capacity((n - h + 1) as usize); + let mut receivers = Vec::with_capacity((n - h + 1) as usize); + let max_wg_size = device.max_wg_size()?; + if wg_size == 0 { + wg_size = max_wg_size; + } else if wg_size > max_wg_size { + return Err(ocl::Error::from("invalid workgroup size")); + } + for _ in 0..=(n - h) { + let (sx, rx) = std::sync::mpsc::channel(); + senders.push(sx); + receivers.push(RequestBuffer::new(wg_size, rx)); + } + + let solver = Self { + platform, + device, + context, + program, + queue, + n, + h, + w, + wg_size, + permutations: buffer, + rec_queues: receivers, + walls: Vec::new(), + }; + std::thread::Builder::new() + .name("GPU Deamon".into()) + .spawn(move || { + solver.run(); + }) + .unwrap(); + println!("started gpu thread"); + Ok(senders) + } + + fn get_dim(&self, queue: usize) -> usize { + let chunk = self.permutations.len() / self.n as usize; + let dim = (queue + 1) * chunk; + (dim + self.wg_size - 1) / self.wg_size * self.wg_size + } + fn get_off(&self, queue: usize) -> u64 { + let chunk = self.permutations.len() / self.n as usize; + let off = self.permutations.len() - chunk - self.get_dim(queue); + if off > isize::max_value() as usize { + panic!("workgroup size to big, offset underflow") + } + off as u64 + } + fn get_res(&self, queue: usize) -> usize { + let dim = self.get_dim(queue); + dim * self.get_res_save_dim() + } + fn get_res_save_dim(&self) -> usize { + (self.wg_size + 63) / 64 + } + + fn run(mut self) -> ! { + let queues = self.rec_queues.len(); + let mut instruction_buffer = Vec::with_capacity((self.n - self.h) as usize); + let mut result_buffer = Vec::with_capacity((self.n - self.h) as usize); + + for i in 0..queues { + let buffer: Buffer = Buffer::builder() + .queue(self.queue.clone()) + .len(self.wg_size) + .flags(flags::MEM_READ_WRITE) + .build() + .unwrap(); + + instruction_buffer.push(buffer); + let results: Buffer = Buffer::builder() + .queue(self.queue.clone()) + .len(self.get_res(i)) + .flags(flags::MEM_READ_WRITE) + .build() + .unwrap(); + result_buffer.push(results); + } + println!("finished gpu setup"); + for i in (0..self.rec_queues.len()).cycle() { + if let Some(buffer) = self.rec_queues[i].read() { + instruction_buffer[i].write(buffer).enq().unwrap(); + let dim = self.get_dim(i); + + //println!("dim: {}", dim); + //println!("off: {}", self.get_off(i)); + //println!("result size: {}", self.get_res_save_dim()); + let kernel = Kernel::builder() + .program(&self.program) + .name("check") + .queue(self.queue.clone()) + .global_work_size(dim) + .arg(&self.permutations) + .arg(&result_buffer[i]) + .arg(&instruction_buffer[i]) + .arg_local::(self.wg_size) + .arg(self.n) + .arg(self.w) + .arg(self.get_off(i)) + .build() + .unwrap(); + + unsafe { + kernel + .cmd() + .queue(&self.queue) + .global_work_offset(kernel.default_global_work_offset()) + .global_work_size(dim) + .local_work_size(self.wg_size) + .enq() + .unwrap(); + } + + // (5) Read results from the device into a vector (`::block` not shown): + let mut result = vec![0u64; self.get_res(i)]; + result_buffer[i] + .cmd() + .queue(&self.queue) + .offset(0) + .read(&mut result) + .enq() + .unwrap(); + for (j, r) in result.iter().enumerate() { + if j == 0 { + continue; + } + for b in 0..64 { + if r & (1 << b) != 0 { + let permutation = + j / self.get_res_save_dim() + self.get_off(i) as usize; + let instruction = (j % self.get_res_save_dim()) * 64 + b; + let mut wall = self.rec_queues[i].get_rows()[instruction].clone(); + wall.push(permutation as u32); + println!("{:?}", wall); + self.walls.push(wall); + } + } + } + } + } + panic!(); + } +} +/* +pub fn check(permutations: &[u64], w: u32, n: u32, mask: u64, offset: usize) -> ocl::Result<()> { + //println!("read src!"); + let src = std::fs::read_to_string("src/solvers/check.cl").expect("failed to open kernel file"); + + //println!("created queue!"); + println!("offset: {}", offset); + println!("length: {}", permutations.len() - offset); + let pro_que = ocl::ProQue::builder() + .src(src) + .dims(permutations.len() - offset) + .build()?; + + let results = pro_que.create_buffer::()?; + let kernel = pro_que + .kernel_builder("check") + .arg(get_buffer()) + .arg(&results) + .arg(mask) + .arg(n) + .arg(w) + .arg(offset as u64) + //.global_work_offset(offset) + .build()?; + + //println!("starting calculation"); + unsafe { + kernel.enq()?; + } + + let mut vec = vec![0; results.len()]; + results.read(&mut vec).enq()?; + + if vec.iter().any(|x| *x != 0) { + println!("The resuts are now '{:?}'!", vec); + } + Ok(()) +}*/ diff --git a/src/solvers/gpu/manager.rs b/src/solvers/gpu/manager.rs new file mode 100644 index 0000000..1dd6a4d --- /dev/null +++ b/src/solvers/gpu/manager.rs @@ -0,0 +1,104 @@ +use std::sync::mpsc::{Receiver, Sender, channel}; +use std::thread::JoinHandle; +use super::*; + +#[derive(Debug)] +struct RequestBuffer { + mask_buff: Vec, + pointer: usize, +} + +impl RequestBuffer { + pub fn new(size: usize) -> Self { + RequestBuffer { + mask_buff: vec![0; size], + pointer: 0, + } + } + pub fn read(&mut self, request: CheckRequest) -> Option<&[u64]> { + self.mask_buff[self.pointer] = request.bitmask; + self.pointer += 1; + if self.pointer == self.mask_buff.len() { + self.pointer = 0; + return Some(self.mask_buff.as_ref()); + } + None + } +} + +pub struct OclManager { + job_id: u64, + host_sender: Sender, + output_sender: Sender, + reciever: Receiver, + buffers: Vec, + output_handle: JoinHandle, + host_handle: JoinHandle, +} + +impl OclManager { + pub fn launch_sevice( + permutations: &[&[u32]], + permutations_mask: &[u64], + n: u32, + // Workgroup size, set to 0 for max + wg_size: u32, + ) -> (Sender, JoinHandle) { + let (h, w) = crate::solvers::wall_stats(n); + let src = include_str!("check.cl"); + let (output_sender, output_handle) = + super::output::Output::launch_sevice(permutations, permutations_mask, n, h, w); + let (host_sender, host_handle) = + super::host::Host::launch_sevice(permutations_mask, n, h, w, wg_size as usize, src); + + let (receiver, sender) = channel(); + + let mut buffers = Vec::with_capacity((n - h + 1) as usize); + for _ in 0..=(n - h) { + buffers.push(RequestBuffer::new(wg_size as usize)); + } + + let manager = Self { + 0, + host_sender, + output_sender, + receiver, + buffers, + output_handle, + host_handle, + } + (sender, + std::thread::Builder::new() + .name("GPU Manager Deamon".into()) + .spawn(move || { + manager.run(); + }) + .unwrap()) + + } + + fn run(mut self) { + loop { + match self.reciever.recv().expect("Channel to GPU Manager broke") { + Message::CheckRequest(request) => { + if let Some(buffer) = self.buffers[request.queue as usize].read(request) { + self.host_sender + .send(Message::HostMessage((self.job_id, buffer.0.into()))); + self.output_sender + .send(Message::OutputMessage((self.job_id, buffer.1.into()))); + self.job_id += 1; + } + } + Message::Terminate => { + panic!("flush buffers"); + self.host_sender.send(Message::Terminate); + self.host_handle.join(); + self.output_sender.send(Message::Terminate); + self.output_handle.join(); + return; + } + _ => println!("Invalid MessageType"), + } + } + } +} diff --git a/src/solvers/gpu/mod.rs b/src/solvers/gpu/mod.rs new file mode 100644 index 0000000..f9ab711 --- /dev/null +++ b/src/solvers/gpu/mod.rs @@ -0,0 +1,56 @@ +pub mod host; +pub mod manager; +pub mod output; + +pub use manager::*; + +type MaskMessage = (u64, Vec); +type RowMessage = (u64, Vec>); + +pub enum Message { + CheckRequest(CheckRequest), + HostMessage(MaskMessage), + OutputMessage(RowMessage), + Terminate, +} + +pub struct ResultMessage { + data: Vec, + offset: usize, + size: usize, +} + +impl ResultMessage { + fn new(data: Vec, offset: usize, size: usize) -> Self { + Self { data, offset, size } + } + fn valid_walls(&self, wg_size: usize) -> &[Vec] { + let mut result = vec![Vec::new(); wg_size]; + for (j, r) in self.data.iter().enumerate() { + for b in 0..64 { + if r & (1 << b) != 0 { + let permutation = j / self.size + self.offset; + let instruction = (j % self.size) * 64 + b; + result[instruction].push(permutation as u32); + } + } + } + result.as_ref() + } +} + +pub struct CheckRequest { + rows: Vec, + bitmask: u64, + queue: u32, +} + +impl CheckRequest { + pub fn new(rows: Vec, bitmask: u64, queue: u32) -> Self { + Self { + rows, + bitmask, + queue, + } + } +} diff --git a/src/solvers/gpu/output.rs b/src/solvers/gpu/output.rs new file mode 100644 index 0000000..58a4aa5 --- /dev/null +++ b/src/solvers/gpu/output.rs @@ -0,0 +1,96 @@ +use super::Message; +use std::collections::{HashSet, HashMap}; +use std::sync::mpsc::{channel, Receiver, Sender}; +use std::thread::JoinHandle; + +struct InBuffer { + receiver: Receiver, + row_requests: HashMap>, + results_requests: HashMap>, + +} + +impl InBuffer { + fn new(receiver: Receiver) -> Self { + Self { + receiver, + row_requests: HashMap::new(), + results_requests: HashMap::new(), + } + } + fn read(&mut self) -> Option { + loop { + match self.receiver.recv() { + Message::OutputMessage((id, ResultMessage)) => { + if Some(result) = self.results_requests.get(id) { + Some(RowResult::new() + } + else { + self.row_requests.insert(id, walls);} + } + } + } + +} + +#[derive(PartialEq, Eq, Hash)] +struct RowResult { + rows: Vec, +} + +impl RowResult { + fn new(rows: Vec) -> Self { + rows.push(0); + Self { rows } + } + fn output(&self) { + println!("{:?}", self.rows); + } +} + +pub struct Output { + input: InBuffer, + receiver: Receiver, + permutations: Vec>, + permutations_mask: Vec, + results: HashSet, +} + +impl Output { + fn launch_sevice( + permutations: &[Vec], + permutation_masks: &[u64], + ) -> (Sender, JoinHandle) { + let (sender, receiver) = channel(); + let input = InBuffer::new(receiver); + + let output = Self { + input, + permutations: permutations.into(), + permutation_masks: permutation_masks.into(), + HashSet::new(), + } + ( + sender, + std::thread::Builder::new() + .name("GPU Manager Deamon".into()) + .spawn(move || { + output.run(); + }) + .unwrap(), + ) + } + + fn run(mut self) { + loop { + match self.receiver.recv() { + Message::OutputMessage((id, ResultMessage)) => { + if Some(result) = self.results_requests.get(id) { + Some(RowResult::new() + } + else { + self.row_requests.insert(id, walls);} + } + } + } +} diff --git a/src/solvers/gpusolver.rs b/src/solvers/gpusolver.rs index 3a96568..41de7e7 100644 --- a/src/solvers/gpusolver.rs +++ b/src/solvers/gpusolver.rs @@ -1,6 +1,5 @@ use crate::permutations::PermutationGenerator; -use crate::solver::{wall_stats, IteratorSolver, Solver}; -use crate::solvers::opencl; +use crate::solvers::{opencl, wall_stats, IteratorSolver, Solver}; use crate::structs::StoneWall; #[derive(Debug)] diff --git a/src/solvers/intuitive.rs b/src/solvers/intuitive.rs deleted file mode 100755 index ad3e5b7..0000000 --- a/src/solvers/intuitive.rs +++ /dev/null @@ -1,136 +0,0 @@ -use rayon::prelude::*; - -/// Solve for a given N and return the resulting wall -#[derive(Clone)] -pub struct NormalSolver { - pub n: u32, - /// calculated height [might not be correct!] - pub h: u32, - /// width - pub w: u32, - pub chunk: u32, - pub mask: u64, - /// Use to store already used blocks as a bitmask - permutations: Vec>, - masks: Vec, - senders: Vec>, -} - -static mut TRIES: u32 = 0; -static mut SOLUTIONS: u32 = 0; - -impl NormalSolver { - pub fn new(n: u32) -> Self { - let h = n / 2 + 1; - let w = h * (n - 1); - let mut heap: Vec<_> = (1..=n).collect(); - let heap = permutohedron::Heap::new(&mut heap); - let n_f = permutohedron::factorial(n as usize); - let chunk = n_f as u32 / n; - let mut permutations = Vec::with_capacity(n_f); - let mut masks: Vec = vec![0; n_f]; - println!("Generating permutations"); - for (j, data) in heap.enumerate() { - permutations.push(data.clone()); - let mut sum = 0; - for stone in permutations[j].iter().take(n as usize - 1) { - //.take(n as usize - 1) { - sum += stone; - masks[j] |= 1 << sum; - } - } - - let src = - std::fs::read_to_string("src/solvers/check.cl").expect("failed to open kernel file"); - - let senders = - super::opencl::GpuSolver::launch_sevice(&masks, n, h, w, 0, src.as_ref()).unwrap(); - Self { - n, - h, - w, - chunk, - mask: (1 << w) - 2, - permutations, - masks, - senders, - } - } - - pub fn solve(&mut self) { - for (n, i) in self.permutations.iter().enumerate() { - let tmp: Vec = i.iter().map(|x| *x).collect(); - //println!("perm {}: {:?}", n, tmp); - //println!("perm {}: {:b}", n, self.masks[n]); - } - println!("calculate results"); - self.permute( - permutohedron::factorial(self.n as usize), - 0, - 0, - ((0..(self.h - 1)) - .map(|x| x * self.chunk) - .collect::>()) - .as_ref(), - ); - unsafe { println!("tries: {}\nsolutions: {}", TRIES, SOLUTIONS) } - loop { - std::thread::sleep(std::time::Duration::from_secs(5)); - } - } - - fn permute(&self, up: usize, index: usize, curr_mask: u64, numbers: &[u32]) { - if curr_mask.count_ones() < index as u32 * (self.n - 1) { - return; - } - let mut new_num = Vec::from(numbers); - let start = numbers[index as usize] / self.chunk; - if index as usize == numbers.len() - 1 { - //#[cfg(feature = "gpu")] - //{ - let mut info = sys_info::mem_info().unwrap(); - while info.avail < info.total / 8 { - std::thread::sleep_ms(50); - info = sys_info::mem_info().unwrap(); - println!("mem wait {:?}", info); - } - let i = self.n - 2 - numbers[index] / self.chunk; - self.senders[i as usize] - .send(super::opencl::Job::new(new_num, curr_mask)) - .unwrap(); - return; - //} - } - for i in start..self.n - (self.h - 1 - index as u32) { - for n in 1..(numbers.len() - index) { - new_num[n + index] = (n as u32 + i) * self.chunk; - } - /*if index == 0 { - (0..self.chunk).into_par_iter().for_each(|j| { - let mut new_num = new_num.clone(); - let tmp = i * self.chunk + j; - new_num[index] = tmp; - self.permute( - up, - index + 1, - curr_mask | self.masks[tmp as usize], - &new_num, - ); - }); - } else {*/ - for j in 0..self.chunk { - new_num[index] = i * self.chunk + j; - if index == 0 { - println!("progress: {}%", j as f64 / self.chunk as f64); - } - self.permute( - up, - index + 1, - curr_mask | self.masks[new_num[index] as usize], - &new_num, - ); - } - //} - } - } -} diff --git a/src/solvers/mod.rs b/src/solvers/mod.rs old mode 100755 new mode 100644 index 1bdc228..189deca --- a/src/solvers/mod.rs +++ b/src/solvers/mod.rs @@ -1,5 +1,29 @@ //pub mod incremental_block; -pub mod intuitive; -//#[cfg(feature = "gpu")] +pub mod gpu; pub mod gpusolver; -pub mod opencl; +pub mod single; + +use crate::structs::StoneWall; +pub use gpu::*; + +/// calculate h and w +pub fn wall_stats(n: u32) -> (u32, u32) { + let h = (n >> 1) + 1; + (h, (n - 1) * h) +} + +pub trait Solver { + fn new(n: u32) -> Self; + fn n(&self) -> u32; + fn h(&self) -> u32; + fn w(&self) -> u32; +} + +pub trait FirstSolver { + fn solve(self) -> StoneWall; +} + +pub trait IteratorSolver: Solver { + type IntoIter: Iterator; + fn solve(self) -> Self::IntoIter; +} diff --git a/src/solvers/single.rs b/src/solvers/single.rs new file mode 100644 index 0000000..ad3e5b7 --- /dev/null +++ b/src/solvers/single.rs @@ -0,0 +1,136 @@ +use rayon::prelude::*; + +/// Solve for a given N and return the resulting wall +#[derive(Clone)] +pub struct NormalSolver { + pub n: u32, + /// calculated height [might not be correct!] + pub h: u32, + /// width + pub w: u32, + pub chunk: u32, + pub mask: u64, + /// Use to store already used blocks as a bitmask + permutations: Vec>, + masks: Vec, + senders: Vec>, +} + +static mut TRIES: u32 = 0; +static mut SOLUTIONS: u32 = 0; + +impl NormalSolver { + pub fn new(n: u32) -> Self { + let h = n / 2 + 1; + let w = h * (n - 1); + let mut heap: Vec<_> = (1..=n).collect(); + let heap = permutohedron::Heap::new(&mut heap); + let n_f = permutohedron::factorial(n as usize); + let chunk = n_f as u32 / n; + let mut permutations = Vec::with_capacity(n_f); + let mut masks: Vec = vec![0; n_f]; + println!("Generating permutations"); + for (j, data) in heap.enumerate() { + permutations.push(data.clone()); + let mut sum = 0; + for stone in permutations[j].iter().take(n as usize - 1) { + //.take(n as usize - 1) { + sum += stone; + masks[j] |= 1 << sum; + } + } + + let src = + std::fs::read_to_string("src/solvers/check.cl").expect("failed to open kernel file"); + + let senders = + super::opencl::GpuSolver::launch_sevice(&masks, n, h, w, 0, src.as_ref()).unwrap(); + Self { + n, + h, + w, + chunk, + mask: (1 << w) - 2, + permutations, + masks, + senders, + } + } + + pub fn solve(&mut self) { + for (n, i) in self.permutations.iter().enumerate() { + let tmp: Vec = i.iter().map(|x| *x).collect(); + //println!("perm {}: {:?}", n, tmp); + //println!("perm {}: {:b}", n, self.masks[n]); + } + println!("calculate results"); + self.permute( + permutohedron::factorial(self.n as usize), + 0, + 0, + ((0..(self.h - 1)) + .map(|x| x * self.chunk) + .collect::>()) + .as_ref(), + ); + unsafe { println!("tries: {}\nsolutions: {}", TRIES, SOLUTIONS) } + loop { + std::thread::sleep(std::time::Duration::from_secs(5)); + } + } + + fn permute(&self, up: usize, index: usize, curr_mask: u64, numbers: &[u32]) { + if curr_mask.count_ones() < index as u32 * (self.n - 1) { + return; + } + let mut new_num = Vec::from(numbers); + let start = numbers[index as usize] / self.chunk; + if index as usize == numbers.len() - 1 { + //#[cfg(feature = "gpu")] + //{ + let mut info = sys_info::mem_info().unwrap(); + while info.avail < info.total / 8 { + std::thread::sleep_ms(50); + info = sys_info::mem_info().unwrap(); + println!("mem wait {:?}", info); + } + let i = self.n - 2 - numbers[index] / self.chunk; + self.senders[i as usize] + .send(super::opencl::Job::new(new_num, curr_mask)) + .unwrap(); + return; + //} + } + for i in start..self.n - (self.h - 1 - index as u32) { + for n in 1..(numbers.len() - index) { + new_num[n + index] = (n as u32 + i) * self.chunk; + } + /*if index == 0 { + (0..self.chunk).into_par_iter().for_each(|j| { + let mut new_num = new_num.clone(); + let tmp = i * self.chunk + j; + new_num[index] = tmp; + self.permute( + up, + index + 1, + curr_mask | self.masks[tmp as usize], + &new_num, + ); + }); + } else {*/ + for j in 0..self.chunk { + new_num[index] = i * self.chunk + j; + if index == 0 { + println!("progress: {}%", j as f64 / self.chunk as f64); + } + self.permute( + up, + index + 1, + curr_mask | self.masks[new_num[index] as usize], + &new_num, + ); + } + //} + } + } +} -- cgit v1.2.3-54-g00ecf