Datasets, Server upgrades + Nicer Graphs

This commit is contained in:
Sarah Jamie Lewis 2021-02-12 23:36:04 -08:00
parent 3ebbaf640b
commit e4d3b2809f
12 changed files with 392551 additions and 72 deletions

View File

@ -15,4 +15,9 @@ termcolor = "1.1.2"
clap = "3.0.0-beta.2"
tracing = "0.1.22"
tracing-subscriber = "0.2.15"
hex = "0.4.2"
hex = "0.4.2"
csv = "1.1"
serde = { version = "1", features = ["derive"] }
rayon = "1.5.0"
itertools = "0.10.0"
rug = "1.11.0"

59836
datasets/CollegeMsg.txt Normal file

File diff suppressed because it is too large Load Diff

13
datasets/README.md Normal file
View File

@ -0,0 +1,13 @@
## Email EU Core Temporal
Node: 986
Temporal Edges: 332334
Edges in static graph: 24929
Time span 803 days
Ashwin Paranjape, Austin R. Benson, and Jure Leskovec. "Motifs in Temporal Networks." In Proceedings of the Tenth ACM International Conference on Web Search and Data Mining, 2017.
Source, Destination, Time (Seconds)
## College
Pietro Panzarasa, Tore Opsahl, and Kathleen M. Carley. "Patterns and dynamics of users' behavior and interaction: Network analysis of an online community." Journal of the American Society for Information Science and Technology 60.5 (2009): 911-932.

File diff suppressed because it is too large Load Diff

126
src/datasets.rs Normal file
View File

@ -0,0 +1,126 @@
use serde::Deserialize;
use crate::server::SimulatedServer;
use fuzzytags::{RootSecret, TaggingKey};
use rand::Rng;
use tracing::event;
use tracing::span;
use tracing::Level;
use crate::oracle::Oracle;
pub trait TemporalDataset {
fn register_with_server<R>(&self, server: &mut SimulatedServer, rng: &mut R, min_p: usize, max_p: usize, oracle: &mut Oracle) where R: Rng;
fn playthough_traffic(&self, server: &mut SimulatedServer, oracle: &mut Oracle);
}
#[derive(Clone, Debug, Deserialize)]
struct TemporalSocialNetworkRecord {
src_node: usize,
dst_node: usize,
timestamp: u64,
}
pub struct CsvDataset {
root_secrets: Vec<RootSecret<24>>,
tagging_keys: Vec<TaggingKey<24>>,
records: Vec<TemporalSocialNetworkRecord>
}
impl CsvDataset {
pub fn load_dataset(filename: &str) -> CsvDataset {
let mut rdr = csv::Reader::from_path(filename).unwrap();
let mut num_recipients = 0;
let mut records : Vec<TemporalSocialNetworkRecord> = vec![];
for result in rdr.deserialize() {
// Notice that we need to provide a type hint for automatic
// deserialization.
let record: Result<TemporalSocialNetworkRecord, csv::Error> = result;
match record {
Ok(record) => {
if record.dst_node > num_recipients {
num_recipients = record.dst_node;
}
if record.src_node > num_recipients {
num_recipients = record.src_node;
}
records.push(record.clone());
},
Err(err) => {
panic!("invalid data record found in {}", filename)
}
};
}
// generate a root secret for each member of the network
let mut root_secrets = vec![];
let mut tagging_keys = vec![];
for i in 0..num_recipients+1 {
let secret = RootSecret::<24>::generate();
let tagging_key = secret.tagging_key();
root_secrets.push(secret);
tagging_keys.push(tagging_key)
}
CsvDataset {
root_secrets,
tagging_keys,
records
}
}
pub fn num_parties(&self) -> usize {
self.root_secrets.len()
}
pub fn num_records(&self) -> usize {
self.records.len()
}
}
impl TemporalDataset for CsvDataset {
fn register_with_server<R>(&self, server: &mut SimulatedServer, rng: &mut R, min_p: usize, max_p: usize, oracle: &mut Oracle) where
R: Rng {
for secret in self.root_secrets.iter() {
let n = rng.gen_range(min_p..max_p);
let span = span!(Level::INFO, "register", party = secret.tagging_key().id().as_str());
let _enter = span.enter();
let detection_key = secret.extract_detection_key(n);
event!(Level::TRACE, "create detection key {detection_key}", detection_key = detection_key.id().as_str());
event!(Level::TRACE, "register with server");
server.register_key(&detection_key, &secret.tagging_key());
oracle.register_party(secret.tagging_key().id());
}
}
fn playthough_traffic(&self, server: &mut SimulatedServer,oracle: &mut Oracle) {
/// TODO timestamps?
for (i,record) in self.records.iter().enumerate() {
if i %1000 == 0 {
let progress = i as f64 /(self.records.len() as f64);
let days = (((record.timestamp as f64 / 60.0) / 60.0) / 24.0);
event!(Level::INFO, "progress..{:.2} ({} days)", progress*100.0, days);
}
// We pretend that the server will always have access to the sender, even though
// in practical deployments we could mitigate this somewhat using Tor / mixnet.
let tagging_key_src = &self.tagging_keys[record.src_node];
let tagging_key_dst = &self.tagging_keys[record.dst_node];
event!(Level::TRACE, "regular send {party}", party = tagging_key_dst.id().as_str());
let tag = tagging_key_dst.generate_tag();
event!(Level::TRACE, "message sent server {tag}", tag = tag.to_string());
server.add_message(tag, tagging_key_src);
oracle.add_event(tagging_key_src.id(), tagging_key_dst.id(), None, 0.0);
}
}
}
#[cfg(test)]
mod tests {
use crate::datasets::CsvDataset;
#[test]
fn it_works() {
let dataset = CsvDataset::load_dataset("datasets/email-Eu-core-temporal.txt");
assert_eq!(332334, dataset.num_records());
}
}

View File

@ -6,7 +6,8 @@ use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
mod oracle;
mod parties;
mod server;
mod datasets;
mod probability;
use clap::Clap;
use tracing::event;
@ -14,6 +15,7 @@ use crate::oracle::Oracle;
use tracing::Level;
use tracing_subscriber;
use tracing_subscriber::FmtSubscriber;
use crate::datasets::{CsvDataset, TemporalDataset};
#[derive(Clap)]
#[clap(version = "1.0", author = "Sarah Jamie Lewis <sarah@openprivacy.ca>")]
@ -23,6 +25,12 @@ struct Opts {
#[clap(short, long, default_value = "10")]
num_parties: usize,
/// dataset
#[clap(short, long, default_value = "")]
dataset: String,
/// samples per round
#[clap(short, long, default_value = "10")]
samples_per_round: usize,
@ -56,22 +64,26 @@ fn main() {
let mut rng = rand::thread_rng();
let mut server = SimulatedServer::new();
let simulated_parties = SimulatedParties::new_simulation(opts.num_parties);
{
event!(Level::INFO, "Generating {} Parties and registering them with the server", opts.num_parties);
simulated_parties.register_with_server(&mut server, &mut rng, opts.min_p, opts.max_p, &mut oracle);
}
if opts.dataset == "" {
let simulated_parties = SimulatedParties::new_simulation(opts.num_parties);
{
event!(Level::INFO, "Generating {} Parties and registering them with the server", opts.num_parties);
simulated_parties.register_with_server(&mut server, &mut rng, opts.min_p, opts.max_p, &mut oracle);
}
let pareto = Pareto::new(1.0, 1.0).unwrap();
let pareto = Pareto::new(1.0, 1.0).unwrap();
{
event!(Level::INFO, "Simulating message sends using {} samples from a pareto distribution...", opts.samples_per_round);
(0..opts.samples_per_round).for_each(|_i| simulated_parties.sample_traffic(&mut server, &mut rng, pareto, opts.prob_entangled, &mut oracle));
}
{
event!(Level::INFO, "Simulating message sends using {} samples from a pareto distribution...", opts.samples_per_round);
(0..opts.samples_per_round).for_each(|_i| simulated_parties.sample_traffic(&mut server, &mut rng, pareto, opts.prob_entangled, &mut oracle));
}
} else {
let dataset = CsvDataset::load_dataset(opts.dataset.as_str());
event!(Level::INFO, "Registering parties from {} which containts {}", opts.dataset, dataset.num_parties());
dataset.register_with_server(&mut server, &mut rng, opts.min_p, opts.max_p, &mut oracle);
{
event!(Level::INFO, "Simulating Adversarial Server Processing Messages..");
server.test_messages();
event!(Level::INFO, "Playing back {} events from {}", dataset.num_records(), opts.dataset);
dataset.playthough_traffic(&mut server, &mut oracle);
}
{
@ -85,15 +97,16 @@ fn main() {
if_uniform
);
let mut stdout = StandardStream::stdout(ColorChoice::Always);
for (party, stats) in party_stats.iter() {
if stats.trivial_breaks > 0 || (stats.observed_messages > 2 && stats.observed_skew > (if_uniform * stats.ideal_rate)) {
for (index, (party, stats)) in party_stats.iter().enumerate() {
if stats.trivial_breaks > 0 || (stats.observed_messages > 2 && stats.observed_skew > 2.0) {
stdout.set_color(ColorSpec::new().set_fg(Some(Color::Red))).unwrap();
} else {
stdout.set_color(ColorSpec::new().set_fg(Some(Color::Green))).unwrap();
}
writeln!(
&mut stdout,
"Party {} | Ideal: {:>8.2} {:>8.2} | Observed: {:>8.2} ({:>6.2}) | Skew: {:>8.2} ({:>6.2}) | Trivial Attributions this Round: {}",
"({}) Party {} | Ideal: {:>8.2} {:>8.2} | Observed: {:>8.2} ({:>6.2}) | Skew: {:>8.2} ({:>6.2}) | Trivial Attributions this Round: {}",
index,
party,
stats.ideal_rate,
stats.ideal_rate * (round_stats.num_messages as f64),
@ -105,11 +118,11 @@ fn main() {
)
.unwrap();
}
let max = server_oracle.compile_to_dot("server_event.dot", true, false, 1);
let max = server_oracle.compile_to_dot("server_event.dot", true, false, 1.0);
println!("Max from server: {}", max);
oracle.compile_to_dot("actual_events.dot", false, false, max);
oracle.compile_to_dot("actual_events.dot", false, false, 1.0);
server_oracle.compile_to_dot("server_event_inverse.dot", true, true,max);
//server_oracle.compile_to_dot("server_event_inverse.dot", true, true,max);
}
});

View File

@ -12,6 +12,7 @@ pub struct Event {
sender: String,
intended_receiver: String,
entangled_receiver: Option<String>,
confidence: f64
}
#[derive(Clone)]
@ -32,29 +33,26 @@ impl Oracle {
self.parties.push(party);
}
pub fn add_event(&mut self, sender: String, intended_receiver: String, entangled_receiver: Option<String>) {
pub fn add_event(&mut self, sender: String, intended_receiver: String, entangled_receiver: Option<String>, confidence: f64) {
self.actual_events.push(Event {
sender,
intended_receiver,
entangled_receiver,
confidence,
});
}
pub fn compile_to_dot(&self, filename: &str, strict: bool, inverse: bool, max: u64) -> u64 {
pub fn compile_to_dot(&self, filename: &str, strict: bool, inverse: bool, max: f64) -> f64 {
let mut output = File::create(filename).unwrap();
if strict {
write!(output, "strict ");
}
write!(output, "digraph {{\n");
write!(output, "strict digraph {{\n");
write!(
output,
r#" dim=3;
overlap=true;
splines = true;
dpi=400;
penwidth = 1;
bgcolor = "black";
"#
r##"
K=1.3;
repulsiveforce=0.00001;
dpi=2400;
bgcolor = "#111111ff";
"##
);
@ -66,32 +64,33 @@ impl Oracle {
let r = hex::decode(party).unwrap()[0];
let g = hex::decode(party).unwrap()[1];
let b = hex::decode(party).unwrap()[2];
writeln!(output, "\"{}\" [shape=point, color=\"#{:x}{:x}{:x}\",pos=\"{},{}!\"]", party, r, g, b,x,y);
writeln!(output, "\"{}\" [shape=point, penwidth=0, fixedsize=true, width=0.005,height=0.005,peripheries=0,style=\"filled,setlinewidth(0)\", color=\"#{:02x}{:02x}{:02x}\"]", party, r, g, b);
//writeln!(output, "\"{}\" [shape=point, color=\"#{:x}{:x}{:x}\",pos=\"{},{}!\"]", party, r, g, b,x,y);
}
let mut real_connection_map: HashMap<(String, String), u64> = HashMap::new();
let mut entangled_connection_map: HashMap<(String, String), u64> = HashMap::new();
let mut max_conn = max;
let mut real_connection_map: HashMap<(String, String), f64> = HashMap::new();
let mut entangled_connection_map: HashMap<(String, String), f64> = HashMap::new();
let mut max_conn = 1.0;
for event in self.actual_events.iter() {
let key = (event.sender.clone(), event.intended_receiver.clone());
if real_connection_map.contains_key(&key) {
*real_connection_map.get_mut(&key).unwrap() += 1;
*real_connection_map.get_mut(&key).unwrap() += (1.0 * (1.0-event.confidence));
if real_connection_map[&key] > max_conn {
max_conn = real_connection_map[&key];
}
} else {
real_connection_map.insert(key, 1);
real_connection_map.insert(key, 1.0 * (1.0-event.confidence));
}
match &event.entangled_receiver {
Some(entangled_receiver) => {
let key = (event.sender.clone(), entangled_receiver.clone());
if entangled_connection_map.contains_key(&key) {
*entangled_connection_map.get_mut(&key).unwrap() += 1;
*entangled_connection_map.get_mut(&key).unwrap() += 1.0;
if entangled_connection_map[&key] > max_conn {
max_conn = entangled_connection_map[&key];
}
} else {
entangled_connection_map.insert(key, 1);
entangled_connection_map.insert(key, 1.0);
}
}
_ => {}
@ -100,27 +99,22 @@ impl Oracle {
for ((sender, receiver), size) in real_connection_map.iter() {
let normalized = (*size as f64 / max_conn as f64);
let mut transparency = (normalized * 172.0) as u8 + 64;
let mut penwidth = ((normalized * 2048.0) as f64).log2();
if inverse {
penwidth = 4.0 - penwidth;
transparency = 255 - transparency;
}
let mut transparency = (normalized * 64.0) as u8 + 172;
let mut penwidth = (normalized * 0.01) as f64;
writeln!(
output,
"\"{}\" -> \"{}\" [arrowhead=none, penwidth={}, color=\"#ffffff{:x}\"]",
sender, receiver, penwidth, transparency
"\"{}\" -> \"{}\" [arrowhead=none, penwidth={}, color=\"#ffffff{:02x}\", weight={}]",
sender, receiver, f64::max(0.005, penwidth), transparency, penwidth
);
}
for ((sender, receiver), size) in entangled_connection_map.iter() {
let normalized = (*size as f64 / max_conn as f64);
let transparency = (normalized * 172.0) as u8 + 64;
let penwidth = ((normalized * 2048.0) as f64).log2();
let penwidth = ((normalized * 32.0) as f64).log2();
writeln!(
output,
"\"{}\" -> \"{}\" [arrowhead=none, style=dashed, penwidth={}, color=\"#ffffff{:x}\"]",
"\"{}\" -> \"{}\" [arrowhead=none, style=dashed, penwidth={}, color=\"#ffffff{:02x}\"]",
sender, receiver, penwidth, transparency
);
}

View File

@ -71,7 +71,7 @@ impl SimulatedParties {
server.add_message(tag, &sender_public_key);
}
oracle.add_event(sender_public_key.id().clone(), receiver_public_key.id(), Some(receiver_public_key_2.id()));
oracle.add_event(sender_public_key.id().clone(), receiver_public_key.id(), Some(receiver_public_key_2.id()),1.0);
} else {
event!(Level::INFO, "regular send {party}", party = receiver_public_key.id().as_str());
for _i in 0..v {
@ -79,7 +79,7 @@ impl SimulatedParties {
event!(Level::INFO, "message sent server {tag}", tag = tag.to_string());
server.add_message(tag, &sender_public_key);
}
oracle.add_event(sender_public_key.id().clone(), receiver_public_key.id(), None);
oracle.add_event(sender_public_key.id().clone(), receiver_public_key.id(), None,1.0);
}
}
}

View File

@ -0,0 +1,33 @@
/// calculate the binomial coefficient.
pub fn nchoosek(n: u64, k: u64) -> f64{
if k > n {
return 0.0;
}
let n_fac = rug::Integer::from(rug::Integer::factorial(n as u32)) / (rug::Integer::from(rug::Integer::factorial(k as u32)) * rug::Integer::from(rug::Integer::factorial((n - k) as u32)));
return n_fac.to_f64();
}
/// the probability that a given outcome (with probability p) will occur at least k times in n independent trials (aka Bernoulli trials).
pub fn at_least_with_replacement(k: u64, n: u64, p: f64) -> rug::Float {
let mut prob_at_least = rug::Float::with_val(64, 0.0);
for x in k..n {
prob_at_least += nchoosek(n, x) * rug::Float::with_val(64, p.powi(x as i32)) * rug::Float::with_val(64, (1.0 - p).powi((n - x) as i32));
}
prob_at_least
}
#[cfg(test)]
mod tests {
use crate::probability::binomial::{at_least_with_replacement, nchoosek};
#[test]
fn test_nchoosek() {
assert_eq!(17310309456440u64, nchoosek(100, 10));
}
#[test]
fn test_at_least() {
// probability of at least 48 heads in 100 coin flips
assert_eq!(0.691350293205374, at_least_with_replacement(48, 100, 0.5).to_f64());
}
}

View File

@ -0,0 +1,62 @@
use crate::probability::binomial;
use std::cmp::min;
/// choose exactly w items (of a max of m) given k trials and n total items for a process that can be characterized by the `hypergeometric distribution`:
/// That is:
/// * The result of each draw (the elements of the population being sampled) can be classified into one of two mutually exclusive categories (e.g. Red/Blue or Not Red/Red).
/// * The probability of a success changes on each draw, as each draw decreases the population (sampling without replacement from a finite population).
pub fn exactly_without_replacement(x: u64, m: u64, k: u64, n: u64) -> rug::Float {
// println!("Selecting exactly {} (out of {}) from {} samples of {}",x,m,k,n);
if x > k || k > n || m > n || m < x {
return rug::Float::with_val(64, 0.0);
}
// If the desired population is the same as the total population then the answer is trivial
if m == n {
return rug::Float::with_val(64, 1.0);
}
let mchoosen = binomial::nchoosek(m, x) * rug::Float::with_val(64, 1.0);
let nsubmchooseksubx = binomial::nchoosek(n - m, k - x) * rug::Float::with_val(64, 1.0);
let nchoosek = binomial::nchoosek(n, k) * rug::Float::with_val(64, 1.0);
(mchoosen * nsubmchooseksubx) / nchoosek
}
/// choose at least w items (of a max of m) given k trials and n total items for a process that can be characterized by the `hypergeometric distribution`:
/// That is:
/// * The result of each draw (the elements of the population being sampled) can be classified into one of two mutually exclusive categories (e.g. Red/Blue or Not Red/Red).
/// * The probability of a success changes on each draw, as each draw decreases the population (sampling without replacement from a finite population).
/// This implementation is based on summing over `exactly_without_replacement`.
pub fn at_least_without_replacement(w: u64, m: u64, k: u64, n: u64) -> rug::Float {
// println!("Selecting at least {} (out of {}) from {} samples of {}",w,m,k,n);
let mut prob_at_least = rug::Float::with_val(64, 0.0);
if m == k && k == n || m == n {
return rug::Float::with_val(64, 1.0);
}
if w == m {
return exactly_without_replacement(w, m, k, n);
}
for x in w..=min(m, k) {
let a = exactly_without_replacement(x, m, k, n);
prob_at_least += a;
}
prob_at_least
}
#[cfg(test)]
mod tests {
use crate::probability::hypergeometric::at_least_without_replacement;
#[test]
fn test_at_least_without_replacement() {
println!("{:.12}", at_least_without_replacement(2, 3, 3, 4).to_f64());
}
#[test]
fn test_at_least_without_replacement_2() {
println!("{:.12}", at_least_without_replacement(1, 1, 3, 9).to_f64());
}
}

11
src/probability/mod.rs Normal file
View File

@ -0,0 +1,11 @@
/// Welcome to the probability theory module!
/// In this module we encapsulate a bunch of different theory useful for probabilistic modelling
/// Calculating properties of binomial distributions
/// e.g. the probability of flipping heads a least 48 times out of 100 flips.
pub mod binomial;
/// Calculating properties of hypergeometric distributions
/// e.g. the chance of drawing at least 2 red balls from a bucket containing 4 red balls and 6 non
/// red balls.
pub mod hypergeometric;

View File

@ -1,18 +1,30 @@
use crate::oracle::Oracle;
use fuzzytags::{DetectionKey, TaggingKey, Tag};
use hashbrown::HashMap;
use hashbrown::{HashMap, HashSet};
use tracing::event;
use tracing::span;
use tracing::Level;
use rayon::iter::IntoParallelRefIterator;
use rayon::iter::ParallelIterator;
use std::sync::mpsc::channel;
use std::time::Duration;
use std::sync::Arc;
use itertools::Itertools;
use crate::probability::binomial::{nchoosek, at_least_with_replacement};
pub struct SimulatedServer {
keybase: Vec<(DetectionKey<24>, TaggingKey<24>)>,
messages: Vec<(Tag<24>, TaggingKey<24>)>,
tags_to_keys_cache: HashMap<String, Vec<TaggingKey<24>>>,
keys_to_tags_cache: HashMap<String, Vec<Tag<24>>>,
sender_tags: HashMap<String, String>,
sender_count: HashMap<String, f64>,
tags_to_keys_cache: HashMap<String, HashSet<String>>,
keys_to_tags_cache: HashMap<String, HashSet<String>>,
oracle: Oracle,
}
struct Event(String,String,String,f64);
pub struct RoundStatistics {
pub num_registered_parties: usize,
pub num_messages: usize,
@ -34,6 +46,8 @@ impl SimulatedServer {
SimulatedServer {
keybase: vec![],
messages: vec![],
sender_tags: HashMap::new(),
sender_count: HashMap::new(),
tags_to_keys_cache: HashMap::new(),
keys_to_tags_cache: HashMap::new(),
oracle: Oracle::new(),
@ -42,37 +56,52 @@ impl SimulatedServer {
pub fn register_key(&mut self, detection_key: &DetectionKey<24>, tagging_key: &TaggingKey<24>) {
self.keybase.push((detection_key.clone(), tagging_key.clone()));
self.keys_to_tags_cache.insert(tagging_key.id(), vec![]);
self.keys_to_tags_cache.insert(tagging_key.id(), HashSet::new());
self.oracle.register_party(tagging_key.id());
}
pub fn add_message(&mut self, tag: Tag<24>, sender_tagging_key: &TaggingKey<24>) {
self.messages.push((tag.clone(), sender_tagging_key.clone()));
self.tags_to_keys_cache.insert(tag.to_string(), vec![]);
}
self.tags_to_keys_cache.insert(tag.to_string(), HashSet::new());
self.sender_tags.insert(tag.to_string(), sender_tagging_key.id());
pub fn test_messages(&mut self) {
for (message, sender) in self.messages.iter() {
for (detection_key, public_key) in self.keybase.iter() {
let span = span!(Level::TRACE, "{detection}", party = public_key.id().as_str());
let _enter = span.enter();
if detection_key.test_tag(message) {
event!(Level::TRACE, "Matched detection key for {key} to tag {tag} ", key = public_key.id(), tag = message.to_string());
self.tags_to_keys_cache.get_mut(message.to_string().as_str()).unwrap().push((*public_key).clone());
self.keys_to_tags_cache.get_mut(public_key.id().as_str()).unwrap().push((*message).clone());
self.oracle.add_event(sender.id(), public_key.id(), None);
let count = match self.sender_count.get(sender_tagging_key.id().as_str()) {
Some(count) => *count + 1.0,
_ => 1.0,
};
self.sender_count.insert(sender_tagging_key.id(), count);
let (tx, rx) = channel();
self.keybase.par_iter().for_each_with(tx.clone(), |tx,(detection_key, receiver_tagging_key)| {
if detection_key.test_tag(&tag) {
let tag_str = tag.to_string();
tx.send(Event(tag_str.clone(), sender_tagging_key.id(), receiver_tagging_key.id(), detection_key.false_positive_probability()));
}
});
std::mem::drop(tx);
loop {
let event = rx.recv();
match event {
Ok(event) => {
event!(Level::TRACE, "Matched detection key for {key} to tag {tag} ", key = event.2, tag = event.0);
self.tags_to_keys_cache.get_mut(event.0.as_str()).unwrap().insert(event.2.clone());
self.keys_to_tags_cache.get_mut(event.2.as_str()).unwrap().insert(event.0.clone());
self.oracle.add_event(event.1.to_string(), event.2.to_string(), None, event.3);
}
_ => {break;}
}
}
}
pub fn statistics(&self) -> (Oracle, RoundStatistics, HashMap<String, PartyStatistics>) {
let mut party_stats = HashMap::new();
let round_stats = RoundStatistics {
num_messages: self.messages.len(),
num_registered_parties: self.keybase.len(),
};
for (party, pub_key) in self.keybase.iter() {
for (recipient_index,(party, pub_key)) in self.keybase.iter().enumerate() {
let matched = self.keys_to_tags_cache[pub_key.id().as_str()].clone();
let observed_messages = matched.len();
let ideal_rate = party.false_positive_probability();
@ -83,12 +112,34 @@ impl SimulatedServer {
let observed_skew = (observed_messages as f64) / expected_messages;
let mut trivial_breaks = 0;
let mut num_times_matched_with = HashMap::new();
for tag in matched.iter() {
let sender = self.sender_tags[tag].clone();
let num = match num_times_matched_with.get(sender.as_str()) {
Some(num) => *num + 1.0,
_ => 1.0,
};
num_times_matched_with.insert(sender, num);
if self.tags_to_keys_cache[tag.to_string().as_str()].len() == 1 {
trivial_breaks += 1;
}
}
for (sender, count) in num_times_matched_with.iter() {
let expected_matched_count = (ideal_rate * self.sender_count[sender.as_str()]);
let actual_matched_count = (*count);
let prob = at_least_with_replacement( actual_matched_count as u64, self.sender_count[sender.as_str()] as u64,ideal_rate).to_f64();
let diff = f64::abs(actual_matched_count - expected_matched_count);
// these numbers are arbitrary, but the point is probability only works if your sample is big enough...
if diff > 4.0 && actual_matched_count > expected_matched_count && prob < 0.01 {
let sender_index = self.keybase.iter().find_position(|(d,t)|t.id() == *sender).unwrap().0;
event!(Level::INFO, "Found Anomalous Relationship Between {sender}({sender_index}) and {recipient}({recipient_index}) {falsepositiverate}, {detected} >> {expected} | Probability of Event: {prob:.9}", sender=sender, sender_index=sender_index, recipient=pub_key.id(), recipient_index=recipient_index, falsepositiverate=ideal_rate, detected=actual_matched_count, expected=expected_matched_count, prob=prob);
}
}
let p_stats = PartyStatistics {
ideal_rate,
expected_messages,