From d0c95cbc669589d3e8768962f1113d7ff235e3c3 Mon Sep 17 00:00:00 2001 From: Sarah Jamie Lewis Date: Sun, 31 Jan 2021 13:21:44 -0800 Subject: [PATCH] New Integration Notes --- ANONYMITY.md | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 2 +- README.md | 2 ++ src/lib.rs | 9 ++++++- 4 files changed, 83 insertions(+), 2 deletions(-) create mode 100644 ANONYMITY.md diff --git a/ANONYMITY.md b/ANONYMITY.md new file mode 100644 index 0000000..e61e90c --- /dev/null +++ b/ANONYMITY.md @@ -0,0 +1,72 @@ +## Integrating FuzzyTags + +The properties provided by this system are highly dependent on selecting a false positive rate _p_. In the following +sections we will cover a number of considerations you should take into account when integrating fuzzytags into a larger +privacy preserving application. + +### How bad is it to let people select their own false-positive rates? + +The short answer is "it depends". + +The longer answer: + +When different parties have different false positive rates. The server can calculate the skew between a party's ideal +false positive rate and observed false positive rate. + +That skew leaks information, especially given certain message distributions. Specifically it leaks parties + who receive a larger proportion of system messages than their ideal false positive rate. + +i.e. for low false positive rates and high message volume for a specific receiver, the adversarial server + can calculate a skew that leaks the receipient of individual messages - breaking privacy for that receiver. + +It *also* removes those messages from the pool of messages that an adversarial server needs to consider for other receivers. +Effectively reducing the anonymity set for everyone else. + +Which brings us onto: + +### Differential Attacks + +Any kind of differential attacks break this scheme, even for a small number of messages i.e. if you learn (through +any means, that a specific set of messages are all likely for 1 party, you can diff them against all other parties keys and +very quickly isolate the intended recipient - in simulations of 100-1000 parties it can take as little as 3 messages - even +with everyone selecting fairly high false positive rates. + +The corollary of the above being that in differential attacks your anonymity set is basically the number of users +who download all messages - since you can't diff them. This has the interesting side effect: the more parties who +download everything, the more the system can safely tolerate parties with small false-positive rates. + +To what extent you can actually account for this in your application is an open question. + +### Should Senders use an anonymous communication network? + +If differential attacks are likely e.g. few parties download everything and +multiple messages are expect to originate from a sender to a receiver or there +is other information that might otherwise link a set of messages to a receiver then you may want to consider how +to remove that context. + +One potential way of removing context is by having senders send their message to the server through some kind of anonymous +communication network e.g. a mixnet or tor. + +Be warned: This may not eliminate all the context! + +### How bad is it to select a poor choice of _p_? + +Consider a _pareto distribution_ where most users only receive a few messages, and small subset of users +receive a large number of messages it seems that increasing the number of parties is + generally more important to overall anonymity of the system than any individual selection of _p_. + +Under a certain threshold of parties, trivial breaks (i.e. tags that only match to a single party) are a bigger concern. + +Assuming we have large number of parties (_N_), the following heuristic emerges: + +* Parties who only expect to receive a small number of messages can safely choose smaller false positive rates, up +to a threshold _θ_, where θ > 2^-N. The lower the value of _θ_ the greater the possibility of random trivial breaks for +the party. +* Parties who expect a large number of messages should choose to receive **all** messages for 2 reasons: + 1) Even high false positive rates for power users result in information leaks to the server (due to the large + skew) i.e. a server can trivially learn what users are power users. + 2) By choosing to receive all messages, power users don't sacrifice much in term of bandwidth, but will provide + cover for parties who receive a small number of messages and who want a lower false-positive rate. + +(We consider a pareto distribution here because we expect many applications to have parties that can be +modelled as such - especially over short-time horizons) \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index a728055..6cea079 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "fuzzytags" description = "a probabilistic cryptographic structure for metadata resistant tagging" -version = "0.1.1" +version = "0.1.2" repository = "https://git.openprivacy.ca/openprivacy/fuzzytags" authors = ["Sarah Jamie Lewis "] edition = "2018" diff --git a/README.md b/README.md index 8901621..06f7460 100644 --- a/README.md +++ b/README.md @@ -101,3 +101,5 @@ This extracted key can then be given to an adversarial server. The server can th } + + diff --git a/src/lib.rs b/src/lib.rs index 226ac95..07bf270 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,7 @@ #![deny(missing_docs)] #![feature(external_doc)] #![doc(include = "../README.md")] +#![doc(include = "../ANONYMITY.md")] use bit_vec::BitVec; use curve25519_dalek::constants::RISTRETTO_BASEPOINT_POINT; use curve25519_dalek::digest::Digest; @@ -46,6 +47,12 @@ impl FuzzySecretKey { pub struct FuzzyDetectionKey(Vec); impl FuzzyDetectionKey { + + /// calculate the ideal false positive rate of this detection key + pub fn false_positive_probability(&self) -> f64 { + (2.0_f64).powi(0 - (self.0.len() as i32)) + } + /// returns true if the tag was intended for this key pub fn test_tag(&self, tag: &FuzzyTag) -> bool { let m = FuzzyTagKeyPair::g(tag.u, &tag.ciphertexts); @@ -241,7 +248,7 @@ mod tests { } println!( "Expected False Positive Rate: {}\nActual False Positive Rate: {}", - (2.0_f64).powi(-3), + key.secret_key.extract(3).false_positive_probability(), (false_positives as f64 / number_of_messages as f64) ); }