From d0c95cbc669589d3e8768962f1113d7ff235e3c3 Mon Sep 17 00:00:00 2001
From: Sarah Jamie Lewis <sarah@openprivacy.ca>
Date: Sun, 31 Jan 2021 13:21:44 -0800
Subject: [PATCH] New Integration Notes

---
 ANONYMITY.md | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 Cargo.toml   |  2 +-
 README.md    |  2 ++
 src/lib.rs   |  9 ++++++-
 4 files changed, 83 insertions(+), 2 deletions(-)
 create mode 100644 ANONYMITY.md

diff --git a/ANONYMITY.md b/ANONYMITY.md
new file mode 100644
index 0000000..e61e90c
--- /dev/null
+++ b/ANONYMITY.md
@@ -0,0 +1,72 @@
+## Integrating FuzzyTags
+
+The properties provided by this system are highly dependent on selecting a false positive rate _p_. In the following
+sections we will cover a number of considerations you should take into account when integrating fuzzytags into a larger
+privacy preserving application.
+
+### How bad is it to let people select their own false-positive rates?
+
+The short answer is "it depends". 
+
+The longer answer:
+
+When different parties have different false positive rates. The server can calculate the skew between a party's ideal 
+false positive rate and observed false positive rate. 
+
+That skew leaks information, especially given certain message distributions. Specifically it leaks parties
+ who receive a larger proportion of system messages than their ideal false positive rate.
+
+i.e. for low false positive rates and high message volume for a specific receiver, the adversarial server
+ can calculate a skew that leaks the receipient of individual messages - breaking privacy for that receiver.
+
+It *also* removes those messages from the pool of messages that an adversarial server needs to consider for other receivers.
+Effectively reducing the anonymity set for everyone else.
+
+Which brings us onto:
+
+### Differential Attacks
+
+Any kind of differential attacks break this scheme, even for a small number of messages i.e. if you learn (through
+any means, that a specific set of messages are all likely for 1 party, you can diff them against all other parties keys and 
+very quickly isolate the intended recipient - in simulations of 100-1000 parties it can take as little as 3 messages  - even 
+with everyone selecting fairly high false positive rates. 
+
+The corollary of the above being that in differential attacks your anonymity set is basically the number of users 
+who download all messages - since you can't diff them. This has the interesting side effect: the more parties who 
+download everything, the more the system can safely tolerate parties with small false-positive rates.
+
+To what extent you can actually account for this in your application is an open question.
+
+### Should Senders use an anonymous communication network?
+
+If differential attacks are likely e.g. few parties download everything and 
+multiple messages are expect to originate from a sender to a receiver or there
+is other information that might otherwise link a set of messages to a receiver then you may want to consider how
+to remove that context.
+
+One potential way of removing context is by having senders send their message to the server through some kind of anonymous
+communication network e.g. a mixnet or tor.
+
+Be warned: This may not eliminate all the context! 
+
+### How bad is it to select a poor choice of _p_?
+
+Consider a _pareto distribution_ where most users only receive a few messages, and small subset of users 
+receive a large number of messages it seems that increasing the number of parties is
+ generally more important to overall anonymity of the system than any individual selection of _p_. 
+ 
+Under a certain threshold  of parties, trivial breaks (i.e. tags that only match to a single party) are a bigger concern. 
+
+Assuming we have large number of parties (_N_), the following heuristic emerges:
+
+* Parties who only expect to receive a small number of messages can safely choose smaller false positive rates, up
+to a threshold _θ_, where θ > 2^-N. The lower the value of _θ_ the greater the possibility of random trivial breaks for
+the party.
+* Parties who expect a large number of messages should choose to receive **all** messages for 2 reasons:
+    1) Even high false positive rates for power users result in information leaks to the server (due to the large
+    skew) i.e. a server can trivially learn what users are power users.
+    2) By choosing to receive all messages, power users don't sacrifice much in term of bandwidth, but will provide
+    cover for parties who receive a small number of messages and who want a lower false-positive rate.
+
+(We consider a pareto distribution here because we expect many applications to have parties that can be
+modelled as such - especially over short-time horizons)
\ No newline at end of file
diff --git a/Cargo.toml b/Cargo.toml
index a728055..6cea079 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "fuzzytags"
 description = "a probabilistic cryptographic structure for metadata resistant tagging"
-version = "0.1.1"
+version = "0.1.2"
 repository = "https://git.openprivacy.ca/openprivacy/fuzzytags"
 authors = ["Sarah Jamie Lewis <sarah@openprivacy.ca>"]
 edition = "2018"
diff --git a/README.md b/README.md
index 8901621..06f7460 100644
--- a/README.md
+++ b/README.md
@@ -101,3 +101,5 @@ This extracted key can then be given to an adversarial server. The server can th
     }
 
 
+
+
diff --git a/src/lib.rs b/src/lib.rs
index 226ac95..07bf270 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,6 +1,7 @@
 #![deny(missing_docs)]
 #![feature(external_doc)]
 #![doc(include = "../README.md")]
+#![doc(include = "../ANONYMITY.md")]
 use bit_vec::BitVec;
 use curve25519_dalek::constants::RISTRETTO_BASEPOINT_POINT;
 use curve25519_dalek::digest::Digest;
@@ -46,6 +47,12 @@ impl FuzzySecretKey {
 pub struct FuzzyDetectionKey(Vec<Scalar>);
 
 impl FuzzyDetectionKey {
+
+    /// calculate the ideal false positive rate of this detection key
+    pub fn false_positive_probability(&self) -> f64 {
+        (2.0_f64).powi(0 - (self.0.len() as i32))
+    }
+
     /// returns true if the tag was intended for this key
     pub fn test_tag(&self, tag: &FuzzyTag) -> bool {
         let m = FuzzyTagKeyPair::g(tag.u, &tag.ciphertexts);
@@ -241,7 +248,7 @@ mod tests {
         }
         println!(
             "Expected False Positive Rate: {}\nActual False Positive Rate: {}",
-            (2.0_f64).powi(-3),
+            key.secret_key.extract(3).false_positive_probability(),
             (false_positives as f64 / number_of_messages as f64)
         );
     }