1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
use std::mem;

use crate::stats::float::Float;
use crate::stats::rand_util::{new_rng, Rng};
use crate::stats::univariate::Sample;

pub struct Resamples<'a, A>
where
    A: Float,
{
    rng: Rng,
    sample: &'a [A],
    stage: Option<Vec<A>>,
}

#[cfg_attr(feature = "cargo-clippy", allow(clippy::should_implement_trait))]
impl<'a, A> Resamples<'a, A>
where
    A: 'a + Float,
{
    pub fn new(sample: &'a Sample<A>) -> Resamples<'a, A> {
        let slice = sample;

        Resamples {
            rng: new_rng(),
            sample: slice,
            stage: None,
        }
    }

    pub fn next(&mut self) -> &Sample<A> {
        let n = self.sample.len();
        let rng = &mut self.rng;

        match self.stage {
            None => {
                let mut stage = Vec::with_capacity(n);

                for _ in 0..n {
                    let idx = rng.rand_range(0u64..(self.sample.len() as u64));
                    stage.push(self.sample[idx as usize])
                }

                self.stage = Some(stage);
            }
            Some(ref mut stage) => {
                for elem in stage.iter_mut() {
                    let idx = rng.rand_range(0u64..(self.sample.len() as u64));
                    *elem = self.sample[idx as usize]
                }
            }
        }

        if let Some(ref v) = self.stage {
            unsafe { mem::transmute::<&[_], _>(v) }
        } else {
            unreachable!();
        }
    }
}

#[cfg(test)]
mod test {
    use quickcheck::quickcheck;
    use quickcheck::TestResult;
    use std::collections::HashSet;

    use crate::stats::univariate::resamples::Resamples;
    use crate::stats::univariate::Sample;

    // Check that the resample is a subset of the sample
    quickcheck! {
        fn subset(size: usize, nresamples: usize) -> TestResult {
            if size > 1 {
                let v: Vec<_> = (0..size).map(|i| i as f32).collect();
                let sample = Sample::new(&v);
                let mut resamples = Resamples::new(sample);
                let sample = v.iter().map(|&x| x as i64).collect::<HashSet<_>>();

                TestResult::from_bool((0..nresamples).all(|_| {
                    let resample = resamples.next()

                        .iter()
                        .map(|&x| x as i64)
                        .collect::<HashSet<_>>();

                    resample.is_subset(&sample)
                }))
            } else {
                TestResult::discard()
            }
        }
    }

    #[test]
    fn different_subsets() {
        let size = 1000;
        let v: Vec<_> = (0..size).map(|i| i as f32).collect();
        let sample = Sample::new(&v);
        let mut resamples = Resamples::new(sample);

        // Hypothetically, we might see one duplicate, but more than one is likely to be a bug.
        let mut num_duplicated = 0;
        for _ in 0..1000 {
            let sample_1 = resamples.next().iter().cloned().collect::<Vec<_>>();
            let sample_2 = resamples.next().iter().cloned().collect::<Vec<_>>();

            if sample_1 == sample_2 {
                num_duplicated += 1;
            }
        }

        if num_duplicated > 1 {
            panic!("Found {} duplicate samples", num_duplicated);
        }
    }
}