commit 949a196c329608e07518fa618935c2bad3c6f9d2 Author: Sarah Jamie Lewis Date: Sat Aug 14 10:57:08 2021 -0700 Initial Commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2e04901 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/target +Cargo.lock +.idea/ \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..754e310 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "fuzzyhash" +version = "0.1.0" +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +rand_core = "0.6.0" +rand ="0.8.3" \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..8079c3a --- /dev/null +++ b/README.md @@ -0,0 +1,88 @@ +## fuzzyhash - A toy (s-t)Detectable Hash Function + +This package contains a toy implementation of an (s-t)Detectable Hash Function as described in [The Apple PSI System](https://www.apple.com/child-safety/pdf/Apple_PSI_System_Security_Protocol_and_Analysis.pdf) by Abhishek Bhowmick, Dan Boneh, Steve Myers, Kunal Talwa, and Karl Tarbe. + +**WARNING: This should go without saying but do not use package for anything. It is based on +an unreviewed construct, and it hasn't been reviewed for security issues.** + + +### How it Works + +A client creates new `Generator` which instantiates the secret key behind the scenes. + +A client can then use this `Generator` to produce two kinds of `Hash`. + +The first kind of hash (`Hash(x)`) "a true hash" multiplies the given `x` by the secret key (treating the secret key as a `s x t` matrix of polynomials (`p_1..p_s`)). `DHF(k,x):=(x,p_1(x),...,p_s(x))` + +The second kind of hash is a random hash of the same size as a true hash, but all the elements are selected randomly +from the domain. + +#### The Detection Algorithm + +Given a set of hashes, a `Solver` is able to distinguish true hashes from random hashes as long as there are +at least `t+1` true hashes and a maximum of `s` random hashes. + +The solver does this by computing the kernel basis vectors of a Matrix containing an **extended** version of the +hashes which transforms the upper portion of the matrix into a Vandermonde matrix, and the lower portion contains +the evaluated polynomials from the hashes themselves. + +Our solver does this by first appending the identity matrix to the lower portion of the matrix, then transposing, +converting the matrix to row-echelon form, and transposing again - at which point the kernel basis vectors, if there +are any, are found in the lower portion of matrix. + +(If there are no basis vectors then the algorithm returns an error as there are not enough true hashes +in the set to compute a solution) + +We then check the kernel basis for rows where all values are zero, and eliminate these as random rows. What is left +is the solution to the detection algorithm - the indexes of the original hashes which are actually true hashes. + +### Sample Run + + + Hash 0: Hash([PrimeOrderDomain { val: 387 }, PrimeOrderDomain { val: 62 }, PrimeOrderDomain { val: 515 }, PrimeOrderDomain { val: 10 }, PrimeOrderDomain { val: 338 }]) + Hash 1: Hash([PrimeOrderDomain { val: 297 }, PrimeOrderDomain { val: 662 }, PrimeOrderDomain { val: 463 }, PrimeOrderDomain { val: 164 }, PrimeOrderDomain { val: 576 }]) + Hash 2: Hash([PrimeOrderDomain { val: 330 }, PrimeOrderDomain { val: 364 }, PrimeOrderDomain { val: 765 }, PrimeOrderDomain { val: 139 }, PrimeOrderDomain { val: 96 }]) + Hash 3: Hash([PrimeOrderDomain { val: 192 }, PrimeOrderDomain { val: 491 }, PrimeOrderDomain { val: 602 }, PrimeOrderDomain { val: 537 }, PrimeOrderDomain { val: 588 }]) + Hash 4: Hash([PrimeOrderDomain { val: 535 }, PrimeOrderDomain { val: 873 }, PrimeOrderDomain { val: 84 }, PrimeOrderDomain { val: 570 }, PrimeOrderDomain { val: 639 }]) + Hash 5: Hash([PrimeOrderDomain { val: 731 }, PrimeOrderDomain { val: 440 }, PrimeOrderDomain { val: 694 }, PrimeOrderDomain { val: 235 }, PrimeOrderDomain { val: 753 }]) + Hash 6: Hash([PrimeOrderDomain { val: 484 }, PrimeOrderDomain { val: 186 }, PrimeOrderDomain { val: 658 }, PrimeOrderDomain { val: 30 }, PrimeOrderDomain { val: 127 }]) + Hash 7: Hash([PrimeOrderDomain { val: 545 }, PrimeOrderDomain { val: 881 }, PrimeOrderDomain { val: 36 }, PrimeOrderDomain { val: 371 }, PrimeOrderDomain { val: 654 }]) + Hash 8: Hash([PrimeOrderDomain { val: 370 }, PrimeOrderDomain { val: 883 }, PrimeOrderDomain { val: 24 }, PrimeOrderDomain { val: 543 }, PrimeOrderDomain { val: 436 }]) + Hash 9: Hash([PrimeOrderDomain { val: 405 }, PrimeOrderDomain { val: 145 }, PrimeOrderDomain { val: 345 }, PrimeOrderDomain { val: 879 }, PrimeOrderDomain { val: 428 }]) + + + 1 387 753 475 216 62 515 10 338 1 0 0 0 0 0 0 0 0 0 ; + 1 297 396 528 704 662 463 164 576 0 1 0 0 0 0 0 0 0 0 ; + 1 330 686 195 486 364 765 139 96 0 0 1 0 0 0 0 0 0 0 ; + 1 192 497 515 423 491 602 537 588 0 0 0 1 0 0 0 0 0 0 ; + 1 535 611 469 781 873 84 570 639 0 0 0 0 1 0 0 0 0 0 ; + 1 731 387 831 753 440 694 235 753 0 0 0 0 0 1 0 0 0 0 ; + 1 484 88 16 648 186 658 30 127 0 0 0 0 0 0 1 0 0 0 ; + 1 545 767 238 208 881 36 371 654 0 0 0 0 0 0 0 1 0 0 ; + 1 370 302 865 730 883 24 543 436 0 0 0 0 0 0 0 0 1 0 ; + 1 405 817 34 465 145 345 879 428 0 0 0 0 0 0 0 0 0 1 ; + + + + 1 0 0 0 0 0 0 0 0 0 ; + 387 870 0 0 0 0 0 0 0 0 ; + 753 436 630 0 0 0 0 0 0 0 ; + 475 390 285 802 0 0 0 0 0 0 ; + 216 514 428 220 710 0 0 0 0 0 ; + 62 821 274 518 43 643 0 0 0 0 ; + 515 396 458 151 823 777 532 0 0 0 ; + 10 533 442 716 677 721 673 760 0 0 ; + 338 98 559 41 463 754 718 832 0 0 ; + ---------------------------------------- + 1 886 311 350 312 413 649 120 867 0 ; + 0 0 0 0 0 0 0 0 1 303 ; + 0 0 0 0 0 0 1 851 0 0 ; + 0 0 0 1 510 173 749 875 641 860 ; + 0 0 0 0 0 0 0 0 0 1 ; + 0 0 0 0 0 1 304 795 0 0 ; + 0 0 0 0 1 7 97 332 71 51 ; + 0 0 0 0 0 0 0 1 527 156 ; + 0 1 575 42 58 838 716 213 554 403 ; + 0 0 1 494 6 342 145 361 0 0 ; + Found 2 Kernel Basis Vectors... + Solution: [0, 1, 3, 4, 6, 7, 8] diff --git a/src/domain.rs b/src/domain.rs new file mode 100644 index 0000000..0ca042a --- /dev/null +++ b/src/domain.rs @@ -0,0 +1,90 @@ +use std::fmt::{Display, Formatter, Result}; +use std::ops::{Add, Mul, Sub}; + +/// Did I really implement an awfully hacky prime order field to play with this +/// Yes...yes I did... +#[derive(Clone, Copy, Debug)] +pub struct PrimeOrderDomain { + val: u64, +} + +impl Display for PrimeOrderDomain<{ ORDER }> { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + write!(f, "{:5}", self.val) + } +} + +impl PrimeOrderDomain<{ ORDER }> { + pub fn new(val: u64) -> PrimeOrderDomain<{ ORDER }> { + PrimeOrderDomain { val } + } + + pub fn is_zero(&self) -> bool { + self.val == 0 + } + + pub fn gt(&self, rhs: &Self) -> bool { + self.val > rhs.val + } + + pub fn inverse(&self) -> Self { + for b in 0..=ORDER - 1 { + if ((self.val as u128 * b as u128) % ORDER as u128) == 1 { + return PrimeOrderDomain::new(b); + } + } + panic!("no inverse found {}!", self.val) + } + + pub fn pow(&self, exp: u64) -> Self { + let mut ret = self.clone(); + + if exp == 0 { + return PrimeOrderDomain::new(1); + } + if exp == 1 { + return self.clone(); + } + + for _i in 1..exp { + ret = self.clone() * ret; + } + ret + } +} + +impl Add for PrimeOrderDomain<{ ORDER }> { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + PrimeOrderDomain { + val: ((self.val as u128 + rhs.val as u128) % ORDER as u128) as u64, + } + } +} + +impl Sub for PrimeOrderDomain<{ ORDER }> { + type Output = Self; + + fn sub(self, rhs: Self) -> Self::Output { + if rhs.val > self.val { + return PrimeOrderDomain { + val: ((ORDER - rhs.val) + self.val) % ORDER, + }; + } + + PrimeOrderDomain { + val: (self.val - rhs.val) % ORDER, + } + } +} + +impl Mul for PrimeOrderDomain<{ ORDER }> { + type Output = Self; + + fn mul(self, rhs: Self) -> Self::Output { + PrimeOrderDomain { + val: ((self.val as u128 * rhs.val as u128) % ORDER as u128) as u64, + } + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..1a12bab --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,245 @@ +use crate::domain::PrimeOrderDomain; +use crate::matrix::Matrix; +use rand::Rng; +use rand_core::{CryptoRng, RngCore}; + +// One of the many reasons you should not be using the package for anything serious... +pub const PRIME_ORDER: u64 = 887; +pub mod domain; +pub mod matrix; + +struct Generator { + synthetic_max: u64, + threshold: u64, + key: Vec>>, +} + +#[derive(Debug)] +struct Hash(Vec>); + +impl Generator { + pub fn generate( + rng: &mut R, + synthetic_max: u64, + threshold: u64, + ) -> Generator { + let mut k = vec![]; + for _i in 0..synthetic_max { + let mut p_i = vec![]; + for _j in 0..threshold { + let p = rng.gen_range(0..PRIME_ORDER); + p_i.push(PrimeOrderDomain::::new(p)); + } + k.push(p_i); + } + Generator { + synthetic_max, + threshold, + key: k, + } + } + + pub fn hash(&self, value: PrimeOrderDomain) -> Hash { + let mut hash = vec![value]; + for i in 0..self.synthetic_max as usize { + let mut result = PrimeOrderDomain::new(0); + for j in 0..self.threshold as usize { + result = self.key[i][j].clone() * value.pow(j as u64); + } + hash.push(result); + } + Hash(hash) + } + + pub fn random(&self, rng: &mut R) -> Hash { + let mut hash = vec![PrimeOrderDomain::new(rng.gen_range(0..PRIME_ORDER))]; + for _i in 0..self.synthetic_max as usize { + let mut result = PrimeOrderDomain::new(0); + for _j in 0..self.threshold as usize { + result = PrimeOrderDomain::new(rng.gen_range(0..PRIME_ORDER)) + } + hash.push(result); + } + Hash(hash) + } +} + +struct Solver { + synthetic_max: u64, + threshold: u64, + expanded_hashes: Vec, +} + +impl Solver { + pub fn new(synthetic_max: u64, threshold: u64) -> Solver { + Solver { + synthetic_max, + threshold, + expanded_hashes: vec![], + } + } + + /// Add a new hash to be evaluated by the solver... + pub fn add_hash(&mut self, hash: Hash) { + println!("Hash {}: {:?}", self.expanded_hashes.len(), hash); + + // To expand a Hash we take DHF(x0,p1..ps) and produce DHF(1,x0,x0^1..x0^t-1,p1..ps) + let mut expanded_hash = vec![]; + for i in 0..self.threshold { + expanded_hash.push(hash.0[0].pow(i)) + } + for element in hash.0.iter().skip(1) { + expanded_hash.push(element.clone()) + } + self.expanded_hashes.push(Hash(expanded_hash)) + } + + /// Solve the system and return the indexes of the true hashes (or error if the system is unsolvable) + pub fn attempt_solve(&self) -> Result, ()> { + // Arrange the hashes into an augmented for matrix... + let mut orginal_matrix = Matrix::new( + (self.synthetic_max + self.threshold) as usize, + self.expanded_hashes.len(), + ); + let mut augmented_matrix = Matrix::new( + (self.synthetic_max + self.threshold) as usize + self.expanded_hashes.len(), + self.expanded_hashes.len(), + ); + + let m = (self.synthetic_max + self.threshold) as usize; + + for j in 0..self.expanded_hashes.len() { + for i in 0..m { + augmented_matrix.update(i, j, self.expanded_hashes[j as usize].0[i as usize]); + orginal_matrix.update(i, j, self.expanded_hashes[j as usize].0[i as usize]) + } + } + + for (i, r) in (m..m + self.expanded_hashes.len()).enumerate() { + augmented_matrix.update(r as usize, i, PrimeOrderDomain::new(1)); + } + augmented_matrix = augmented_matrix.transpose(); + + for i in 0..augmented_matrix.rows() { + for j in 0..augmented_matrix.cols() { + print!("{} ", augmented_matrix.at(i, j as usize)); + } + println!(";") + } + + let cols = augmented_matrix.cols(); + let rows = augmented_matrix.rows(); + + let mut h = 0; + let mut k = 0; + + while h < rows && k < cols { + let mut i_max = h; + let mut i_max_v = augmented_matrix.at(h, k); + for x in h + 1..rows { + if augmented_matrix.at(x, k).gt(&i_max_v) { + i_max = x; + i_max_v = augmented_matrix.at(x, k) + } + } + if augmented_matrix.at(i_max, k).is_zero() { + k += 1; + } else { + augmented_matrix.swap_rows(h, i_max); + for i in h + 1..rows { + let f = augmented_matrix.at(i, k) * augmented_matrix.at(h, k).inverse(); + augmented_matrix.update(i, k, PrimeOrderDomain::new(0)); + for j in k + 1..cols { + let val = augmented_matrix.at(i, j) - augmented_matrix.at(h, j) * f; + augmented_matrix.update(i, j, val); + } + } + h += 1; + k += 1; + } + } + + println!(""); + println!(""); + println!(""); + augmented_matrix = augmented_matrix.transpose(); + + for i in 0..augmented_matrix.rows() { + for j in 0..augmented_matrix.cols() { + print!("{} ", augmented_matrix.at(i, j as usize)); + } + println!(";"); + if i + 1 == m { + println!("----------------------------------------"); + } + } + + // Calculate the Nulls... + let mut count = 0; + for col in (0..self.expanded_hashes.len()).rev() { + let mut is_null = true; + for row in 0..m { + if !augmented_matrix.at(row, col).is_zero() { + is_null = false; + break; + } + } + if is_null == false { + break; + } else { + count += 1; + } + } + + if count == 0 { + return Err(()); + } + + println!("Found {} Kernel Basis Vectors...", count); + let mut solution = vec![]; + + let basis_state = self.expanded_hashes.len() - count; + for i in 0..self.expanded_hashes.len() { + let mut is_zero = true; + for b in 0..count { + is_zero = is_zero & augmented_matrix.at(i + m, basis_state + b).is_zero() + } + if !is_zero { + solution.push(i) + } + } + + println!("Solution: {:?}", solution); + Ok(solution) + } +} + +#[cfg(test)] +mod tests { + use crate::domain::PrimeOrderDomain; + use crate::{Generator, Solver, PRIME_ORDER}; + use rand::Rng; + use rand_core::OsRng; + + #[test] + fn it_works() { + let mut rng = OsRng; + let s = 4u64; + let t = 5u64; + let dhf = Generator::generate(&mut rng, s, t); + + let mut solver = Solver::new(s, t); + for i in 0..10 { + // These are the indexes which are to be random...you can try swapping them around.. + if i != 5 && i != 2 && i != 9 { + let x0: u64 = rng.gen_range(0..PRIME_ORDER); + let hash = dhf.hash(PrimeOrderDomain::new(x0)); + solver.add_hash(hash); + } else { + solver.add_hash(dhf.random(&mut rng)); + } + } + + assert_eq!(solver.attempt_solve().unwrap(), vec![0, 1, 3, 4, 6, 7, 8]); + } +} diff --git a/src/matrix.rs b/src/matrix.rs new file mode 100644 index 0000000..e187732 --- /dev/null +++ b/src/matrix.rs @@ -0,0 +1,56 @@ +use crate::domain::PrimeOrderDomain; +use crate::PRIME_ORDER; + +/// A dense matrix. Apple have said t and s are going to be on the order of 30 so I feel +/// you can probably always get away with a dense representation +pub struct Matrix { + rows: usize, + cols: usize, + vals: Vec>>, +} + +impl Matrix { + pub fn new(rows: usize, cols: usize) -> Matrix { + let mut vals = vec![]; + for _i in 0..rows { + let mut row = vec![]; + for _j in 0..cols { + row.push(PrimeOrderDomain::new(0)) + } + vals.push(row) + } + Matrix { rows, cols, vals } + } + + pub fn rows(&self) -> usize { + self.rows + } + + pub fn cols(&self) -> usize { + self.cols + } + + pub fn update(&mut self, row: usize, col: usize, val: PrimeOrderDomain) { + self.vals[row][col] = val + } + + pub fn at(&self, row: usize, col: usize) -> PrimeOrderDomain { + self.vals[row][col] + } + + pub fn transpose(&self) -> Matrix { + let mut m = Matrix::new(self.cols, self.rows); + for i in 0..self.cols { + for j in 0..self.rows { + m.update(i, j, self.vals[j][i]); + } + } + m + } + + pub fn swap_rows(&mut self, a: usize, b: usize) { + let tmp = self.vals[a].clone(); + self.vals[a] = self.vals[b].clone(); + self.vals[b] = tmp; + } +}