src/solvers/single.rs


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132

use super::gpu::*;

/// Solve for a given N and return the resulting wall
pub struct NormalSolver {
    pub n: u32,
    /// calculated height [might not be correct!]
    pub h: u32,
    /// width
    pub w: u32,
    pub chunk: u32,
    pub mask: u64,
    /// Use to store already used blocks as a bitmask
    permutations: Vec<Vec<u32>>,
    masks: Vec<u64>,
    gpu_sender: std::sync::mpsc::Sender<super::gpu::Message>,
    gpu_handle: Option<std::thread::JoinHandle<()>>,
}

impl NormalSolver {
    pub fn new(n: u32) -> Self {
        let h = n / 2 + 1;
        let w = h * (n - 1);
        let mut heap: Vec<_> = (1..=n).collect();
        let heap = permutohedron::Heap::new(&mut heap);
        let n_f = permutohedron::factorial(n as usize);
        let chunk = n_f as u32 / n;
        let mut permutations = Vec::with_capacity(n_f);
        let mut masks: Vec<u64> = vec![0; n_f];
        println!("Generating permutations");
        for (j, data) in heap.enumerate() {
            permutations.push(data.clone());
            let mut sum = 0;
            for stone in permutations[j].iter().take(n as usize - 1) {
                //.take(n as usize - 1) {
                sum += stone;
                masks[j] |= 1 << sum;
            }
        }

        let (sender, receiver) = std::sync::mpsc::channel();
        let (gpu_sender, gpu_handle) =
            super::gpu::OclManager::launch_sevice(&permutations, &masks, n, 0, sender);
        Self {
            n,
            h,
            w,
            chunk,
            mask: (1 << w) - 2,
            permutations,
            masks,
            gpu_sender,
            gpu_handle: Some(gpu_handle),
        }
    }

    pub fn solve(&mut self) {
        //for (n, i) in self.permutations.iter().enumerate() {
        //let tmp: Vec<u32> = i.clone();
        //println!("perm {}: {:?}", n, tmp);
        //println!("perm {}: {:b}", n, self.masks[n]);
        //}
        println!("calculate results");
        self.permute(
            permutohedron::factorial(self.n as usize),
            0,
            0,
            ((0..(self.h - 1))
                .map(|x| x * self.chunk)
                .collect::<Vec<u32>>())
            .as_ref(),
        );
        self.gpu_sender.send(super::gpu::Message::CpuDone).unwrap();
        self.gpu_handle.take().unwrap().join().unwrap();
    }

    fn permute(&self, up: usize, index: usize, curr_mask: u64, numbers: &[u32]) {
        if curr_mask.count_ones() < index as u32 * (self.n - 1) {
            return;
        }
        let mut new_num = Vec::from(numbers);
        let start = numbers[index as usize] / self.chunk;
        if index as usize == numbers.len() - 1 {
            //#[cfg(feature = "gpu")]
            //{
            let mut info = sys_info::mem_info().unwrap();
            while info.avail < info.total / 8 {
                std::thread::sleep_ms(50);
                info = sys_info::mem_info().unwrap();
                println!("mem wait {:?}", info);
            }
            let i = self.n - 2 - numbers[index] / self.chunk;
            self.gpu_sender
                .send(Message::CheckRequest(CheckRequest::new(
                    new_num, curr_mask, i,
                )))
                .unwrap();
            return;
            //}
        }
        for i in start..self.n - (self.h - 1 - index as u32) {
            for n in 1..(numbers.len() - index) {
                new_num[n + index] = (n as u32 + i) * self.chunk;
            }
            /*if index == 0 {
                (0..self.chunk).into_par_iter().for_each(|j| {
                    let mut new_num = new_num.clone();
                    let tmp = i * self.chunk + j;
                    new_num[index] = tmp;
                    self.permute(
                        up,
                        index + 1,
                        curr_mask | self.masks[tmp as usize],
                        &new_num,
                    );
                });
            } else {*/
            for j in 0..self.chunk {
                new_num[index] = i * self.chunk + j;
                if index == 0 {
                    println!("progress: {}%", j as f64 / self.chunk as f64);
                }
                self.permute(
                    up,
                    index + 1,
                    curr_mask | self.masks[new_num[index] as usize],
                    &new_num,
                );
            }
            //}
        }
    }
}