commit 949a196c329608e07518fa618935c2bad3c6f9d2
Author: Sarah Jamie Lewis <sarah@openprivacy.ca>
Date:   Sat Aug 14 10:57:08 2021 -0700

    Initial Commit

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..2e04901
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+/target
+Cargo.lock
+.idea/
\ No newline at end of file
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..754e310
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,10 @@
+[package]
+name = "fuzzyhash"
+version = "0.1.0"
+edition = "2018"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+rand_core = "0.6.0"
+rand ="0.8.3"
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..8079c3a
--- /dev/null
+++ b/README.md
@@ -0,0 +1,88 @@
+## fuzzyhash - A toy (s-t)Detectable Hash Function
+
+This package contains a toy implementation of an (s-t)Detectable Hash Function as described in [The Apple PSI System](https://www.apple.com/child-safety/pdf/Apple_PSI_System_Security_Protocol_and_Analysis.pdf) by Abhishek Bhowmick, Dan Boneh, Steve Myers, Kunal Talwa, and Karl Tarbe.
+
+**WARNING: This should go without saying but do not use package for anything. It is based on
+an unreviewed construct, and it hasn't been reviewed for security issues.**
+
+
+### How it Works
+
+A client creates new `Generator` which instantiates the secret key behind the scenes.
+
+A client can then use this `Generator` to produce two kinds of `Hash`. 
+
+The first kind of hash (`Hash(x)`) "a true hash" multiplies the given `x` by the secret key (treating the secret key as a `s x t` matrix of polynomials (`p_1..p_s`)). `DHF(k,x):=(x,p_1(x),...,p_s(x))`
+
+The second kind of hash is a random hash of the same size as a true hash, but all the elements are selected randomly
+from the domain.
+
+#### The Detection Algorithm
+
+Given a set of hashes, a `Solver` is able to distinguish true hashes from random hashes as long as there are
+at least `t+1` true hashes and a maximum of `s` random hashes.
+
+The solver does this by computing the kernel basis vectors of a Matrix containing an **extended** version of the 
+hashes which transforms the upper portion of the matrix into a Vandermonde matrix, and the lower portion contains
+the evaluated polynomials from the hashes themselves.
+
+Our solver does this by first appending the identity matrix to the lower portion of the matrix, then transposing,
+converting the matrix to row-echelon form, and transposing again - at which point the kernel basis vectors, if there
+are any, are found in the lower portion of matrix. 
+
+(If there are no basis vectors then the algorithm returns an error as there are not enough true hashes
+in the set to compute a solution)
+
+We then check the kernel basis for rows where all values are zero, and eliminate these as random rows. What is left
+is the solution to the detection algorithm - the indexes of the original hashes which are actually true hashes.
+
+### Sample Run
+
+
+    Hash 0: Hash([PrimeOrderDomain { val: 387 }, PrimeOrderDomain { val: 62 }, PrimeOrderDomain { val: 515 }, PrimeOrderDomain { val: 10 }, PrimeOrderDomain { val: 338 }])
+    Hash 1: Hash([PrimeOrderDomain { val: 297 }, PrimeOrderDomain { val: 662 }, PrimeOrderDomain { val: 463 }, PrimeOrderDomain { val: 164 }, PrimeOrderDomain { val: 576 }])
+    Hash 2: Hash([PrimeOrderDomain { val: 330 }, PrimeOrderDomain { val: 364 }, PrimeOrderDomain { val: 765 }, PrimeOrderDomain { val: 139 }, PrimeOrderDomain { val: 96 }])
+    Hash 3: Hash([PrimeOrderDomain { val: 192 }, PrimeOrderDomain { val: 491 }, PrimeOrderDomain { val: 602 }, PrimeOrderDomain { val: 537 }, PrimeOrderDomain { val: 588 }])
+    Hash 4: Hash([PrimeOrderDomain { val: 535 }, PrimeOrderDomain { val: 873 }, PrimeOrderDomain { val: 84 }, PrimeOrderDomain { val: 570 }, PrimeOrderDomain { val: 639 }])
+    Hash 5: Hash([PrimeOrderDomain { val: 731 }, PrimeOrderDomain { val: 440 }, PrimeOrderDomain { val: 694 }, PrimeOrderDomain { val: 235 }, PrimeOrderDomain { val: 753 }])
+    Hash 6: Hash([PrimeOrderDomain { val: 484 }, PrimeOrderDomain { val: 186 }, PrimeOrderDomain { val: 658 }, PrimeOrderDomain { val: 30 }, PrimeOrderDomain { val: 127 }])
+    Hash 7: Hash([PrimeOrderDomain { val: 545 }, PrimeOrderDomain { val: 881 }, PrimeOrderDomain { val: 36 }, PrimeOrderDomain { val: 371 }, PrimeOrderDomain { val: 654 }])
+    Hash 8: Hash([PrimeOrderDomain { val: 370 }, PrimeOrderDomain { val: 883 }, PrimeOrderDomain { val: 24 }, PrimeOrderDomain { val: 543 }, PrimeOrderDomain { val: 436 }])
+    Hash 9: Hash([PrimeOrderDomain { val: 405 }, PrimeOrderDomain { val: 145 }, PrimeOrderDomain { val: 345 }, PrimeOrderDomain { val: 879 }, PrimeOrderDomain { val: 428 }])
+    
+    
+        1   387   753   475   216    62   515    10   338     1     0     0     0     0     0     0     0     0     0 ;
+        1   297   396   528   704   662   463   164   576     0     1     0     0     0     0     0     0     0     0 ;
+        1   330   686   195   486   364   765   139    96     0     0     1     0     0     0     0     0     0     0 ;
+        1   192   497   515   423   491   602   537   588     0     0     0     1     0     0     0     0     0     0 ;
+        1   535   611   469   781   873    84   570   639     0     0     0     0     1     0     0     0     0     0 ;
+        1   731   387   831   753   440   694   235   753     0     0     0     0     0     1     0     0     0     0 ;
+        1   484    88    16   648   186   658    30   127     0     0     0     0     0     0     1     0     0     0 ;
+        1   545   767   238   208   881    36   371   654     0     0     0     0     0     0     0     1     0     0 ;
+        1   370   302   865   730   883    24   543   436     0     0     0     0     0     0     0     0     1     0 ;
+        1   405   817    34   465   145   345   879   428     0     0     0     0     0     0     0     0     0     1 ;
+    
+    
+    
+        1     0     0     0     0     0     0     0     0     0 ;
+    387   870     0     0     0     0     0     0     0     0 ;
+    753   436   630     0     0     0     0     0     0     0 ;
+    475   390   285   802     0     0     0     0     0     0 ;
+    216   514   428   220   710     0     0     0     0     0 ;
+    62   821   274   518    43   643     0     0     0     0 ;
+    515   396   458   151   823   777   532     0     0     0 ;
+    10   533   442   716   677   721   673   760     0     0 ;
+    338    98   559    41   463   754   718   832     0     0 ;
+    ----------------------------------------
+        1   886   311   350   312   413   649   120   867     0 ;
+        0     0     0     0     0     0     0     0     1   303 ;
+        0     0     0     0     0     0     1   851     0     0 ;
+        0     0     0     1   510   173   749   875   641   860 ;
+        0     0     0     0     0     0     0     0     0     1 ;
+        0     0     0     0     0     1   304   795     0     0 ;
+        0     0     0     0     1     7    97   332    71    51 ;
+        0     0     0     0     0     0     0     1   527   156 ;
+        0     1   575    42    58   838   716   213   554   403 ;
+        0     0     1   494     6   342   145   361     0     0 ;
+    Found 2 Kernel Basis Vectors...
+    Solution: [0, 1, 3, 4, 6, 7, 8]
diff --git a/src/domain.rs b/src/domain.rs
new file mode 100644
index 0000000..0ca042a
--- /dev/null
+++ b/src/domain.rs
@@ -0,0 +1,90 @@
+use std::fmt::{Display, Formatter, Result};
+use std::ops::{Add, Mul, Sub};
+
+/// Did I really implement an awfully hacky prime order field to play with this
+/// Yes...yes I did...
+#[derive(Clone, Copy, Debug)]
+pub struct PrimeOrderDomain<const ORDER: u64> {
+    val: u64,
+}
+
+impl<const ORDER: u64> Display for PrimeOrderDomain<{ ORDER }> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        write!(f, "{:5}", self.val)
+    }
+}
+
+impl<const ORDER: u64> PrimeOrderDomain<{ ORDER }> {
+    pub fn new(val: u64) -> PrimeOrderDomain<{ ORDER }> {
+        PrimeOrderDomain { val }
+    }
+
+    pub fn is_zero(&self) -> bool {
+        self.val == 0
+    }
+
+    pub fn gt(&self, rhs: &Self) -> bool {
+        self.val > rhs.val
+    }
+
+    pub fn inverse(&self) -> Self {
+        for b in 0..=ORDER - 1 {
+            if ((self.val as u128 * b as u128) % ORDER as u128) == 1 {
+                return PrimeOrderDomain::new(b);
+            }
+        }
+        panic!("no inverse found {}!", self.val)
+    }
+
+    pub fn pow(&self, exp: u64) -> Self {
+        let mut ret = self.clone();
+
+        if exp == 0 {
+            return PrimeOrderDomain::new(1);
+        }
+        if exp == 1 {
+            return self.clone();
+        }
+
+        for _i in 1..exp {
+            ret = self.clone() * ret;
+        }
+        ret
+    }
+}
+
+impl<const ORDER: u64> Add for PrimeOrderDomain<{ ORDER }> {
+    type Output = Self;
+
+    fn add(self, rhs: Self) -> Self::Output {
+        PrimeOrderDomain {
+            val: ((self.val as u128 + rhs.val as u128) % ORDER as u128) as u64,
+        }
+    }
+}
+
+impl<const ORDER: u64> Sub for PrimeOrderDomain<{ ORDER }> {
+    type Output = Self;
+
+    fn sub(self, rhs: Self) -> Self::Output {
+        if rhs.val > self.val {
+            return PrimeOrderDomain {
+                val: ((ORDER - rhs.val) + self.val) % ORDER,
+            };
+        }
+
+        PrimeOrderDomain {
+            val: (self.val - rhs.val) % ORDER,
+        }
+    }
+}
+
+impl<const ORDER: u64> Mul for PrimeOrderDomain<{ ORDER }> {
+    type Output = Self;
+
+    fn mul(self, rhs: Self) -> Self::Output {
+        PrimeOrderDomain {
+            val: ((self.val as u128 * rhs.val as u128) % ORDER as u128) as u64,
+        }
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..1a12bab
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,245 @@
+use crate::domain::PrimeOrderDomain;
+use crate::matrix::Matrix;
+use rand::Rng;
+use rand_core::{CryptoRng, RngCore};
+
+// One of the many reasons you should not be using the package for anything serious...
+pub const PRIME_ORDER: u64 = 887;
+pub mod domain;
+pub mod matrix;
+
+struct Generator {
+    synthetic_max: u64,
+    threshold: u64,
+    key: Vec<Vec<PrimeOrderDomain<PRIME_ORDER>>>,
+}
+
+#[derive(Debug)]
+struct Hash(Vec<PrimeOrderDomain<PRIME_ORDER>>);
+
+impl Generator {
+    pub fn generate<R: RngCore + CryptoRng>(
+        rng: &mut R,
+        synthetic_max: u64,
+        threshold: u64,
+    ) -> Generator {
+        let mut k = vec![];
+        for _i in 0..synthetic_max {
+            let mut p_i = vec![];
+            for _j in 0..threshold {
+                let p = rng.gen_range(0..PRIME_ORDER);
+                p_i.push(PrimeOrderDomain::<PRIME_ORDER>::new(p));
+            }
+            k.push(p_i);
+        }
+        Generator {
+            synthetic_max,
+            threshold,
+            key: k,
+        }
+    }
+
+    pub fn hash(&self, value: PrimeOrderDomain<PRIME_ORDER>) -> Hash {
+        let mut hash = vec![value];
+        for i in 0..self.synthetic_max as usize {
+            let mut result = PrimeOrderDomain::new(0);
+            for j in 0..self.threshold as usize {
+                result = self.key[i][j].clone() * value.pow(j as u64);
+            }
+            hash.push(result);
+        }
+        Hash(hash)
+    }
+
+    pub fn random<R: RngCore + CryptoRng>(&self, rng: &mut R) -> Hash {
+        let mut hash = vec![PrimeOrderDomain::new(rng.gen_range(0..PRIME_ORDER))];
+        for _i in 0..self.synthetic_max as usize {
+            let mut result = PrimeOrderDomain::new(0);
+            for _j in 0..self.threshold as usize {
+                result = PrimeOrderDomain::new(rng.gen_range(0..PRIME_ORDER))
+            }
+            hash.push(result);
+        }
+        Hash(hash)
+    }
+}
+
+struct Solver {
+    synthetic_max: u64,
+    threshold: u64,
+    expanded_hashes: Vec<Hash>,
+}
+
+impl Solver {
+    pub fn new(synthetic_max: u64, threshold: u64) -> Solver {
+        Solver {
+            synthetic_max,
+            threshold,
+            expanded_hashes: vec![],
+        }
+    }
+
+    /// Add a new hash to be evaluated by the solver...
+    pub fn add_hash(&mut self, hash: Hash) {
+        println!("Hash {}: {:?}", self.expanded_hashes.len(), hash);
+
+        // To expand a Hash we take DHF(x0,p1..ps) and produce DHF(1,x0,x0^1..x0^t-1,p1..ps)
+        let mut expanded_hash = vec![];
+        for i in 0..self.threshold {
+            expanded_hash.push(hash.0[0].pow(i))
+        }
+        for element in hash.0.iter().skip(1) {
+            expanded_hash.push(element.clone())
+        }
+        self.expanded_hashes.push(Hash(expanded_hash))
+    }
+
+    /// Solve the system and return the indexes of the true hashes (or error if the system is unsolvable)
+    pub fn attempt_solve(&self) -> Result<Vec<usize>, ()> {
+        // Arrange the hashes into an augmented for matrix...
+        let mut orginal_matrix = Matrix::new(
+            (self.synthetic_max + self.threshold) as usize,
+            self.expanded_hashes.len(),
+        );
+        let mut augmented_matrix = Matrix::new(
+            (self.synthetic_max + self.threshold) as usize + self.expanded_hashes.len(),
+            self.expanded_hashes.len(),
+        );
+
+        let m = (self.synthetic_max + self.threshold) as usize;
+
+        for j in 0..self.expanded_hashes.len() {
+            for i in 0..m {
+                augmented_matrix.update(i, j, self.expanded_hashes[j as usize].0[i as usize]);
+                orginal_matrix.update(i, j, self.expanded_hashes[j as usize].0[i as usize])
+            }
+        }
+
+        for (i, r) in (m..m + self.expanded_hashes.len()).enumerate() {
+            augmented_matrix.update(r as usize, i, PrimeOrderDomain::new(1));
+        }
+        augmented_matrix = augmented_matrix.transpose();
+
+        for i in 0..augmented_matrix.rows() {
+            for j in 0..augmented_matrix.cols() {
+                print!("{} ", augmented_matrix.at(i, j as usize));
+            }
+            println!(";")
+        }
+
+        let cols = augmented_matrix.cols();
+        let rows = augmented_matrix.rows();
+
+        let mut h = 0;
+        let mut k = 0;
+
+        while h < rows && k < cols {
+            let mut i_max = h;
+            let mut i_max_v = augmented_matrix.at(h, k);
+            for x in h + 1..rows {
+                if augmented_matrix.at(x, k).gt(&i_max_v) {
+                    i_max = x;
+                    i_max_v = augmented_matrix.at(x, k)
+                }
+            }
+            if augmented_matrix.at(i_max, k).is_zero() {
+                k += 1;
+            } else {
+                augmented_matrix.swap_rows(h, i_max);
+                for i in h + 1..rows {
+                    let f = augmented_matrix.at(i, k) * augmented_matrix.at(h, k).inverse();
+                    augmented_matrix.update(i, k, PrimeOrderDomain::new(0));
+                    for j in k + 1..cols {
+                        let val = augmented_matrix.at(i, j) - augmented_matrix.at(h, j) * f;
+                        augmented_matrix.update(i, j, val);
+                    }
+                }
+                h += 1;
+                k += 1;
+            }
+        }
+
+        println!("");
+        println!("");
+        println!("");
+        augmented_matrix = augmented_matrix.transpose();
+
+        for i in 0..augmented_matrix.rows() {
+            for j in 0..augmented_matrix.cols() {
+                print!("{} ", augmented_matrix.at(i, j as usize));
+            }
+            println!(";");
+            if i + 1 == m {
+                println!("----------------------------------------");
+            }
+        }
+
+        // Calculate the Nulls...
+        let mut count = 0;
+        for col in (0..self.expanded_hashes.len()).rev() {
+            let mut is_null = true;
+            for row in 0..m {
+                if !augmented_matrix.at(row, col).is_zero() {
+                    is_null = false;
+                    break;
+                }
+            }
+            if is_null == false {
+                break;
+            } else {
+                count += 1;
+            }
+        }
+
+        if count == 0 {
+            return Err(());
+        }
+
+        println!("Found {} Kernel Basis Vectors...", count);
+        let mut solution = vec![];
+
+        let basis_state = self.expanded_hashes.len() - count;
+        for i in 0..self.expanded_hashes.len() {
+            let mut is_zero = true;
+            for b in 0..count {
+                is_zero = is_zero & augmented_matrix.at(i + m, basis_state + b).is_zero()
+            }
+            if !is_zero {
+                solution.push(i)
+            }
+        }
+
+        println!("Solution: {:?}", solution);
+        Ok(solution)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::domain::PrimeOrderDomain;
+    use crate::{Generator, Solver, PRIME_ORDER};
+    use rand::Rng;
+    use rand_core::OsRng;
+
+    #[test]
+    fn it_works() {
+        let mut rng = OsRng;
+        let s = 4u64;
+        let t = 5u64;
+        let dhf = Generator::generate(&mut rng, s, t);
+
+        let mut solver = Solver::new(s, t);
+        for i in 0..10 {
+            // These are the indexes which are to be random...you can try swapping them around..
+            if i != 5 && i != 2 && i != 9 {
+                let x0: u64 = rng.gen_range(0..PRIME_ORDER);
+                let hash = dhf.hash(PrimeOrderDomain::new(x0));
+                solver.add_hash(hash);
+            } else {
+                solver.add_hash(dhf.random(&mut rng));
+            }
+        }
+
+        assert_eq!(solver.attempt_solve().unwrap(), vec![0, 1, 3, 4, 6, 7, 8]);
+    }
+}
diff --git a/src/matrix.rs b/src/matrix.rs
new file mode 100644
index 0000000..e187732
--- /dev/null
+++ b/src/matrix.rs
@@ -0,0 +1,56 @@
+use crate::domain::PrimeOrderDomain;
+use crate::PRIME_ORDER;
+
+/// A dense matrix. Apple have said t and s are going to be on the order of 30 so I feel
+/// you can probably always get away with a dense representation
+pub struct Matrix {
+    rows: usize,
+    cols: usize,
+    vals: Vec<Vec<PrimeOrderDomain<PRIME_ORDER>>>,
+}
+
+impl Matrix {
+    pub fn new(rows: usize, cols: usize) -> Matrix {
+        let mut vals = vec![];
+        for _i in 0..rows {
+            let mut row = vec![];
+            for _j in 0..cols {
+                row.push(PrimeOrderDomain::new(0))
+            }
+            vals.push(row)
+        }
+        Matrix { rows, cols, vals }
+    }
+
+    pub fn rows(&self) -> usize {
+        self.rows
+    }
+
+    pub fn cols(&self) -> usize {
+        self.cols
+    }
+
+    pub fn update(&mut self, row: usize, col: usize, val: PrimeOrderDomain<PRIME_ORDER>) {
+        self.vals[row][col] = val
+    }
+
+    pub fn at(&self, row: usize, col: usize) -> PrimeOrderDomain<PRIME_ORDER> {
+        self.vals[row][col]
+    }
+
+    pub fn transpose(&self) -> Matrix {
+        let mut m = Matrix::new(self.cols, self.rows);
+        for i in 0..self.cols {
+            for j in 0..self.rows {
+                m.update(i, j, self.vals[j][i]);
+            }
+        }
+        m
+    }
+
+    pub fn swap_rows(&mut self, a: usize, b: usize) {
+        let tmp = self.vals[a].clone();
+        self.vals[a] = self.vals[b].clone();
+        self.vals[b] = tmp;
+    }
+}