Datasets, Server upgrades + Nicer Graphs
This commit is contained in:
parent
3ebbaf640b
commit
e4d3b2809f
|
@ -15,4 +15,9 @@ termcolor = "1.1.2"
|
|||
clap = "3.0.0-beta.2"
|
||||
tracing = "0.1.22"
|
||||
tracing-subscriber = "0.2.15"
|
||||
hex = "0.4.2"
|
||||
hex = "0.4.2"
|
||||
csv = "1.1"
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
rayon = "1.5.0"
|
||||
itertools = "0.10.0"
|
||||
rug = "1.11.0"
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,13 @@
|
|||
## Email EU Core Temporal
|
||||
|
||||
Node: 986
|
||||
Temporal Edges: 332334
|
||||
Edges in static graph: 24929
|
||||
Time span 803 days
|
||||
|
||||
Ashwin Paranjape, Austin R. Benson, and Jure Leskovec. "Motifs in Temporal Networks." In Proceedings of the Tenth ACM International Conference on Web Search and Data Mining, 2017.
|
||||
Source, Destination, Time (Seconds)
|
||||
|
||||
## College
|
||||
|
||||
Pietro Panzarasa, Tore Opsahl, and Kathleen M. Carley. "Patterns and dynamics of users' behavior and interaction: Network analysis of an online community." Journal of the American Society for Information Science and Technology 60.5 (2009): 911-932.
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,126 @@
|
|||
use serde::Deserialize;
|
||||
use crate::server::SimulatedServer;
|
||||
use fuzzytags::{RootSecret, TaggingKey};
|
||||
use rand::Rng;
|
||||
use tracing::event;
|
||||
use tracing::span;
|
||||
use tracing::Level;
|
||||
use crate::oracle::Oracle;
|
||||
|
||||
pub trait TemporalDataset {
|
||||
fn register_with_server<R>(&self, server: &mut SimulatedServer, rng: &mut R, min_p: usize, max_p: usize, oracle: &mut Oracle) where R: Rng;
|
||||
fn playthough_traffic(&self, server: &mut SimulatedServer, oracle: &mut Oracle);
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
struct TemporalSocialNetworkRecord {
|
||||
src_node: usize,
|
||||
dst_node: usize,
|
||||
timestamp: u64,
|
||||
}
|
||||
|
||||
|
||||
pub struct CsvDataset {
|
||||
root_secrets: Vec<RootSecret<24>>,
|
||||
tagging_keys: Vec<TaggingKey<24>>,
|
||||
records: Vec<TemporalSocialNetworkRecord>
|
||||
}
|
||||
|
||||
impl CsvDataset {
|
||||
pub fn load_dataset(filename: &str) -> CsvDataset {
|
||||
let mut rdr = csv::Reader::from_path(filename).unwrap();
|
||||
|
||||
let mut num_recipients = 0;
|
||||
let mut records : Vec<TemporalSocialNetworkRecord> = vec![];
|
||||
for result in rdr.deserialize() {
|
||||
// Notice that we need to provide a type hint for automatic
|
||||
// deserialization.
|
||||
let record: Result<TemporalSocialNetworkRecord, csv::Error> = result;
|
||||
match record {
|
||||
Ok(record) => {
|
||||
if record.dst_node > num_recipients {
|
||||
num_recipients = record.dst_node;
|
||||
}
|
||||
if record.src_node > num_recipients {
|
||||
num_recipients = record.src_node;
|
||||
}
|
||||
records.push(record.clone());
|
||||
},
|
||||
Err(err) => {
|
||||
panic!("invalid data record found in {}", filename)
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// generate a root secret for each member of the network
|
||||
let mut root_secrets = vec![];
|
||||
let mut tagging_keys = vec![];
|
||||
for i in 0..num_recipients+1 {
|
||||
let secret = RootSecret::<24>::generate();
|
||||
let tagging_key = secret.tagging_key();
|
||||
root_secrets.push(secret);
|
||||
tagging_keys.push(tagging_key)
|
||||
}
|
||||
|
||||
CsvDataset {
|
||||
root_secrets,
|
||||
tagging_keys,
|
||||
records
|
||||
}
|
||||
}
|
||||
|
||||
pub fn num_parties(&self) -> usize {
|
||||
self.root_secrets.len()
|
||||
}
|
||||
|
||||
pub fn num_records(&self) -> usize {
|
||||
self.records.len()
|
||||
}
|
||||
}
|
||||
|
||||
impl TemporalDataset for CsvDataset {
|
||||
fn register_with_server<R>(&self, server: &mut SimulatedServer, rng: &mut R, min_p: usize, max_p: usize, oracle: &mut Oracle) where
|
||||
R: Rng {
|
||||
for secret in self.root_secrets.iter() {
|
||||
let n = rng.gen_range(min_p..max_p);
|
||||
let span = span!(Level::INFO, "register", party = secret.tagging_key().id().as_str());
|
||||
let _enter = span.enter();
|
||||
let detection_key = secret.extract_detection_key(n);
|
||||
event!(Level::TRACE, "create detection key {detection_key}", detection_key = detection_key.id().as_str());
|
||||
event!(Level::TRACE, "register with server");
|
||||
server.register_key(&detection_key, &secret.tagging_key());
|
||||
oracle.register_party(secret.tagging_key().id());
|
||||
}
|
||||
}
|
||||
|
||||
fn playthough_traffic(&self, server: &mut SimulatedServer,oracle: &mut Oracle) {
|
||||
/// TODO timestamps?
|
||||
for (i,record) in self.records.iter().enumerate() {
|
||||
if i %1000 == 0 {
|
||||
let progress = i as f64 /(self.records.len() as f64);
|
||||
let days = (((record.timestamp as f64 / 60.0) / 60.0) / 24.0);
|
||||
event!(Level::INFO, "progress..{:.2} ({} days)", progress*100.0, days);
|
||||
}
|
||||
// We pretend that the server will always have access to the sender, even though
|
||||
// in practical deployments we could mitigate this somewhat using Tor / mixnet.
|
||||
let tagging_key_src = &self.tagging_keys[record.src_node];
|
||||
let tagging_key_dst = &self.tagging_keys[record.dst_node];
|
||||
event!(Level::TRACE, "regular send {party}", party = tagging_key_dst.id().as_str());
|
||||
let tag = tagging_key_dst.generate_tag();
|
||||
event!(Level::TRACE, "message sent server {tag}", tag = tag.to_string());
|
||||
server.add_message(tag, tagging_key_src);
|
||||
oracle.add_event(tagging_key_src.id(), tagging_key_dst.id(), None, 0.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::datasets::CsvDataset;
|
||||
|
||||
#[test]
|
||||
fn it_works() {
|
||||
let dataset = CsvDataset::load_dataset("datasets/email-Eu-core-temporal.txt");
|
||||
assert_eq!(332334, dataset.num_records());
|
||||
}
|
||||
}
|
53
src/main.rs
53
src/main.rs
|
@ -6,7 +6,8 @@ use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
|
|||
mod oracle;
|
||||
mod parties;
|
||||
mod server;
|
||||
|
||||
mod datasets;
|
||||
mod probability;
|
||||
use clap::Clap;
|
||||
use tracing::event;
|
||||
|
||||
|
@ -14,6 +15,7 @@ use crate::oracle::Oracle;
|
|||
use tracing::Level;
|
||||
use tracing_subscriber;
|
||||
use tracing_subscriber::FmtSubscriber;
|
||||
use crate::datasets::{CsvDataset, TemporalDataset};
|
||||
|
||||
#[derive(Clap)]
|
||||
#[clap(version = "1.0", author = "Sarah Jamie Lewis <sarah@openprivacy.ca>")]
|
||||
|
@ -23,6 +25,12 @@ struct Opts {
|
|||
#[clap(short, long, default_value = "10")]
|
||||
num_parties: usize,
|
||||
|
||||
|
||||
/// dataset
|
||||
#[clap(short, long, default_value = "")]
|
||||
dataset: String,
|
||||
|
||||
|
||||
/// samples per round
|
||||
#[clap(short, long, default_value = "10")]
|
||||
samples_per_round: usize,
|
||||
|
@ -56,22 +64,26 @@ fn main() {
|
|||
let mut rng = rand::thread_rng();
|
||||
let mut server = SimulatedServer::new();
|
||||
|
||||
let simulated_parties = SimulatedParties::new_simulation(opts.num_parties);
|
||||
{
|
||||
event!(Level::INFO, "Generating {} Parties and registering them with the server", opts.num_parties);
|
||||
simulated_parties.register_with_server(&mut server, &mut rng, opts.min_p, opts.max_p, &mut oracle);
|
||||
}
|
||||
if opts.dataset == "" {
|
||||
let simulated_parties = SimulatedParties::new_simulation(opts.num_parties);
|
||||
{
|
||||
event!(Level::INFO, "Generating {} Parties and registering them with the server", opts.num_parties);
|
||||
simulated_parties.register_with_server(&mut server, &mut rng, opts.min_p, opts.max_p, &mut oracle);
|
||||
}
|
||||
|
||||
let pareto = Pareto::new(1.0, 1.0).unwrap();
|
||||
let pareto = Pareto::new(1.0, 1.0).unwrap();
|
||||
|
||||
{
|
||||
event!(Level::INFO, "Simulating message sends using {} samples from a pareto distribution...", opts.samples_per_round);
|
||||
(0..opts.samples_per_round).for_each(|_i| simulated_parties.sample_traffic(&mut server, &mut rng, pareto, opts.prob_entangled, &mut oracle));
|
||||
}
|
||||
{
|
||||
event!(Level::INFO, "Simulating message sends using {} samples from a pareto distribution...", opts.samples_per_round);
|
||||
(0..opts.samples_per_round).for_each(|_i| simulated_parties.sample_traffic(&mut server, &mut rng, pareto, opts.prob_entangled, &mut oracle));
|
||||
}
|
||||
} else {
|
||||
let dataset = CsvDataset::load_dataset(opts.dataset.as_str());
|
||||
event!(Level::INFO, "Registering parties from {} which containts {}", opts.dataset, dataset.num_parties());
|
||||
dataset.register_with_server(&mut server, &mut rng, opts.min_p, opts.max_p, &mut oracle);
|
||||
|
||||
{
|
||||
event!(Level::INFO, "Simulating Adversarial Server Processing Messages..");
|
||||
server.test_messages();
|
||||
event!(Level::INFO, "Playing back {} events from {}", dataset.num_records(), opts.dataset);
|
||||
dataset.playthough_traffic(&mut server, &mut oracle);
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -85,15 +97,16 @@ fn main() {
|
|||
if_uniform
|
||||
);
|
||||
let mut stdout = StandardStream::stdout(ColorChoice::Always);
|
||||
for (party, stats) in party_stats.iter() {
|
||||
if stats.trivial_breaks > 0 || (stats.observed_messages > 2 && stats.observed_skew > (if_uniform * stats.ideal_rate)) {
|
||||
for (index, (party, stats)) in party_stats.iter().enumerate() {
|
||||
if stats.trivial_breaks > 0 || (stats.observed_messages > 2 && stats.observed_skew > 2.0) {
|
||||
stdout.set_color(ColorSpec::new().set_fg(Some(Color::Red))).unwrap();
|
||||
} else {
|
||||
stdout.set_color(ColorSpec::new().set_fg(Some(Color::Green))).unwrap();
|
||||
}
|
||||
writeln!(
|
||||
&mut stdout,
|
||||
"Party {} | Ideal: {:>8.2} {:>8.2} | Observed: {:>8.2} ({:>6.2}) | Skew: {:>8.2} ({:>6.2}) | Trivial Attributions this Round: {}",
|
||||
"({}) Party {} | Ideal: {:>8.2} {:>8.2} | Observed: {:>8.2} ({:>6.2}) | Skew: {:>8.2} ({:>6.2}) | Trivial Attributions this Round: {}",
|
||||
index,
|
||||
party,
|
||||
stats.ideal_rate,
|
||||
stats.ideal_rate * (round_stats.num_messages as f64),
|
||||
|
@ -105,11 +118,11 @@ fn main() {
|
|||
)
|
||||
.unwrap();
|
||||
}
|
||||
let max = server_oracle.compile_to_dot("server_event.dot", true, false, 1);
|
||||
let max = server_oracle.compile_to_dot("server_event.dot", true, false, 1.0);
|
||||
println!("Max from server: {}", max);
|
||||
oracle.compile_to_dot("actual_events.dot", false, false, max);
|
||||
oracle.compile_to_dot("actual_events.dot", false, false, 1.0);
|
||||
|
||||
server_oracle.compile_to_dot("server_event_inverse.dot", true, true,max);
|
||||
//server_oracle.compile_to_dot("server_event_inverse.dot", true, true,max);
|
||||
}
|
||||
});
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@ pub struct Event {
|
|||
sender: String,
|
||||
intended_receiver: String,
|
||||
entangled_receiver: Option<String>,
|
||||
confidence: f64
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
|
@ -32,29 +33,26 @@ impl Oracle {
|
|||
self.parties.push(party);
|
||||
}
|
||||
|
||||
pub fn add_event(&mut self, sender: String, intended_receiver: String, entangled_receiver: Option<String>) {
|
||||
pub fn add_event(&mut self, sender: String, intended_receiver: String, entangled_receiver: Option<String>, confidence: f64) {
|
||||
self.actual_events.push(Event {
|
||||
sender,
|
||||
intended_receiver,
|
||||
entangled_receiver,
|
||||
confidence,
|
||||
});
|
||||
}
|
||||
|
||||
pub fn compile_to_dot(&self, filename: &str, strict: bool, inverse: bool, max: u64) -> u64 {
|
||||
pub fn compile_to_dot(&self, filename: &str, strict: bool, inverse: bool, max: f64) -> f64 {
|
||||
let mut output = File::create(filename).unwrap();
|
||||
if strict {
|
||||
write!(output, "strict ");
|
||||
}
|
||||
write!(output, "digraph {{\n");
|
||||
write!(output, "strict digraph {{\n");
|
||||
write!(
|
||||
output,
|
||||
r#" dim=3;
|
||||
overlap=true;
|
||||
splines = true;
|
||||
dpi=400;
|
||||
penwidth = 1;
|
||||
bgcolor = "black";
|
||||
"#
|
||||
r##"
|
||||
K=1.3;
|
||||
repulsiveforce=0.00001;
|
||||
dpi=2400;
|
||||
bgcolor = "#111111ff";
|
||||
"##
|
||||
);
|
||||
|
||||
|
||||
|
@ -66,32 +64,33 @@ impl Oracle {
|
|||
let r = hex::decode(party).unwrap()[0];
|
||||
let g = hex::decode(party).unwrap()[1];
|
||||
let b = hex::decode(party).unwrap()[2];
|
||||
writeln!(output, "\"{}\" [shape=point, color=\"#{:x}{:x}{:x}\",pos=\"{},{}!\"]", party, r, g, b,x,y);
|
||||
writeln!(output, "\"{}\" [shape=point, penwidth=0, fixedsize=true, width=0.005,height=0.005,peripheries=0,style=\"filled,setlinewidth(0)\", color=\"#{:02x}{:02x}{:02x}\"]", party, r, g, b);
|
||||
//writeln!(output, "\"{}\" [shape=point, color=\"#{:x}{:x}{:x}\",pos=\"{},{}!\"]", party, r, g, b,x,y);
|
||||
}
|
||||
|
||||
let mut real_connection_map: HashMap<(String, String), u64> = HashMap::new();
|
||||
let mut entangled_connection_map: HashMap<(String, String), u64> = HashMap::new();
|
||||
let mut max_conn = max;
|
||||
let mut real_connection_map: HashMap<(String, String), f64> = HashMap::new();
|
||||
let mut entangled_connection_map: HashMap<(String, String), f64> = HashMap::new();
|
||||
let mut max_conn = 1.0;
|
||||
for event in self.actual_events.iter() {
|
||||
let key = (event.sender.clone(), event.intended_receiver.clone());
|
||||
if real_connection_map.contains_key(&key) {
|
||||
*real_connection_map.get_mut(&key).unwrap() += 1;
|
||||
*real_connection_map.get_mut(&key).unwrap() += (1.0 * (1.0-event.confidence));
|
||||
if real_connection_map[&key] > max_conn {
|
||||
max_conn = real_connection_map[&key];
|
||||
}
|
||||
} else {
|
||||
real_connection_map.insert(key, 1);
|
||||
real_connection_map.insert(key, 1.0 * (1.0-event.confidence));
|
||||
}
|
||||
match &event.entangled_receiver {
|
||||
Some(entangled_receiver) => {
|
||||
let key = (event.sender.clone(), entangled_receiver.clone());
|
||||
if entangled_connection_map.contains_key(&key) {
|
||||
*entangled_connection_map.get_mut(&key).unwrap() += 1;
|
||||
*entangled_connection_map.get_mut(&key).unwrap() += 1.0;
|
||||
if entangled_connection_map[&key] > max_conn {
|
||||
max_conn = entangled_connection_map[&key];
|
||||
}
|
||||
} else {
|
||||
entangled_connection_map.insert(key, 1);
|
||||
entangled_connection_map.insert(key, 1.0);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
|
@ -100,27 +99,22 @@ impl Oracle {
|
|||
|
||||
for ((sender, receiver), size) in real_connection_map.iter() {
|
||||
let normalized = (*size as f64 / max_conn as f64);
|
||||
let mut transparency = (normalized * 172.0) as u8 + 64;
|
||||
let mut penwidth = ((normalized * 2048.0) as f64).log2();
|
||||
|
||||
if inverse {
|
||||
penwidth = 4.0 - penwidth;
|
||||
transparency = 255 - transparency;
|
||||
}
|
||||
let mut transparency = (normalized * 64.0) as u8 + 172;
|
||||
let mut penwidth = (normalized * 0.01) as f64;
|
||||
|
||||
writeln!(
|
||||
output,
|
||||
"\"{}\" -> \"{}\" [arrowhead=none, penwidth={}, color=\"#ffffff{:x}\"]",
|
||||
sender, receiver, penwidth, transparency
|
||||
"\"{}\" -> \"{}\" [arrowhead=none, penwidth={}, color=\"#ffffff{:02x}\", weight={}]",
|
||||
sender, receiver, f64::max(0.005, penwidth), transparency, penwidth
|
||||
);
|
||||
}
|
||||
for ((sender, receiver), size) in entangled_connection_map.iter() {
|
||||
let normalized = (*size as f64 / max_conn as f64);
|
||||
let transparency = (normalized * 172.0) as u8 + 64;
|
||||
let penwidth = ((normalized * 2048.0) as f64).log2();
|
||||
let penwidth = ((normalized * 32.0) as f64).log2();
|
||||
writeln!(
|
||||
output,
|
||||
"\"{}\" -> \"{}\" [arrowhead=none, style=dashed, penwidth={}, color=\"#ffffff{:x}\"]",
|
||||
"\"{}\" -> \"{}\" [arrowhead=none, style=dashed, penwidth={}, color=\"#ffffff{:02x}\"]",
|
||||
sender, receiver, penwidth, transparency
|
||||
);
|
||||
}
|
||||
|
|
|
@ -71,7 +71,7 @@ impl SimulatedParties {
|
|||
server.add_message(tag, &sender_public_key);
|
||||
}
|
||||
|
||||
oracle.add_event(sender_public_key.id().clone(), receiver_public_key.id(), Some(receiver_public_key_2.id()));
|
||||
oracle.add_event(sender_public_key.id().clone(), receiver_public_key.id(), Some(receiver_public_key_2.id()),1.0);
|
||||
} else {
|
||||
event!(Level::INFO, "regular send {party}", party = receiver_public_key.id().as_str());
|
||||
for _i in 0..v {
|
||||
|
@ -79,7 +79,7 @@ impl SimulatedParties {
|
|||
event!(Level::INFO, "message sent server {tag}", tag = tag.to_string());
|
||||
server.add_message(tag, &sender_public_key);
|
||||
}
|
||||
oracle.add_event(sender_public_key.id().clone(), receiver_public_key.id(), None);
|
||||
oracle.add_event(sender_public_key.id().clone(), receiver_public_key.id(), None,1.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
/// calculate the binomial coefficient.
|
||||
pub fn nchoosek(n: u64, k: u64) -> f64{
|
||||
if k > n {
|
||||
return 0.0;
|
||||
}
|
||||
let n_fac = rug::Integer::from(rug::Integer::factorial(n as u32)) / (rug::Integer::from(rug::Integer::factorial(k as u32)) * rug::Integer::from(rug::Integer::factorial((n - k) as u32)));
|
||||
return n_fac.to_f64();
|
||||
}
|
||||
|
||||
/// the probability that a given outcome (with probability p) will occur at least k times in n independent trials (aka Bernoulli trials).
|
||||
pub fn at_least_with_replacement(k: u64, n: u64, p: f64) -> rug::Float {
|
||||
let mut prob_at_least = rug::Float::with_val(64, 0.0);
|
||||
for x in k..n {
|
||||
prob_at_least += nchoosek(n, x) * rug::Float::with_val(64, p.powi(x as i32)) * rug::Float::with_val(64, (1.0 - p).powi((n - x) as i32));
|
||||
}
|
||||
prob_at_least
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::probability::binomial::{at_least_with_replacement, nchoosek};
|
||||
|
||||
#[test]
|
||||
fn test_nchoosek() {
|
||||
assert_eq!(17310309456440u64, nchoosek(100, 10));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_at_least() {
|
||||
// probability of at least 48 heads in 100 coin flips
|
||||
assert_eq!(0.691350293205374, at_least_with_replacement(48, 100, 0.5).to_f64());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,62 @@
|
|||
use crate::probability::binomial;
|
||||
use std::cmp::min;
|
||||
|
||||
/// choose exactly w items (of a max of m) given k trials and n total items for a process that can be characterized by the `hypergeometric distribution`:
|
||||
/// That is:
|
||||
/// * The result of each draw (the elements of the population being sampled) can be classified into one of two mutually exclusive categories (e.g. Red/Blue or Not Red/Red).
|
||||
/// * The probability of a success changes on each draw, as each draw decreases the population (sampling without replacement from a finite population).
|
||||
pub fn exactly_without_replacement(x: u64, m: u64, k: u64, n: u64) -> rug::Float {
|
||||
// println!("Selecting exactly {} (out of {}) from {} samples of {}",x,m,k,n);
|
||||
if x > k || k > n || m > n || m < x {
|
||||
return rug::Float::with_val(64, 0.0);
|
||||
}
|
||||
|
||||
// If the desired population is the same as the total population then the answer is trivial
|
||||
if m == n {
|
||||
return rug::Float::with_val(64, 1.0);
|
||||
}
|
||||
|
||||
let mchoosen = binomial::nchoosek(m, x) * rug::Float::with_val(64, 1.0);
|
||||
let nsubmchooseksubx = binomial::nchoosek(n - m, k - x) * rug::Float::with_val(64, 1.0);
|
||||
let nchoosek = binomial::nchoosek(n, k) * rug::Float::with_val(64, 1.0);
|
||||
(mchoosen * nsubmchooseksubx) / nchoosek
|
||||
}
|
||||
|
||||
/// choose at least w items (of a max of m) given k trials and n total items for a process that can be characterized by the `hypergeometric distribution`:
|
||||
/// That is:
|
||||
/// * The result of each draw (the elements of the population being sampled) can be classified into one of two mutually exclusive categories (e.g. Red/Blue or Not Red/Red).
|
||||
/// * The probability of a success changes on each draw, as each draw decreases the population (sampling without replacement from a finite population).
|
||||
/// This implementation is based on summing over `exactly_without_replacement`.
|
||||
pub fn at_least_without_replacement(w: u64, m: u64, k: u64, n: u64) -> rug::Float {
|
||||
// println!("Selecting at least {} (out of {}) from {} samples of {}",w,m,k,n);
|
||||
let mut prob_at_least = rug::Float::with_val(64, 0.0);
|
||||
|
||||
if m == k && k == n || m == n {
|
||||
return rug::Float::with_val(64, 1.0);
|
||||
}
|
||||
|
||||
if w == m {
|
||||
return exactly_without_replacement(w, m, k, n);
|
||||
}
|
||||
|
||||
for x in w..=min(m, k) {
|
||||
let a = exactly_without_replacement(x, m, k, n);
|
||||
prob_at_least += a;
|
||||
}
|
||||
prob_at_least
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::probability::hypergeometric::at_least_without_replacement;
|
||||
|
||||
#[test]
|
||||
fn test_at_least_without_replacement() {
|
||||
println!("{:.12}", at_least_without_replacement(2, 3, 3, 4).to_f64());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_at_least_without_replacement_2() {
|
||||
println!("{:.12}", at_least_without_replacement(1, 1, 3, 9).to_f64());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
/// Welcome to the probability theory module!
|
||||
/// In this module we encapsulate a bunch of different theory useful for probabilistic modelling
|
||||
|
||||
/// Calculating properties of binomial distributions
|
||||
/// e.g. the probability of flipping heads a least 48 times out of 100 flips.
|
||||
pub mod binomial;
|
||||
|
||||
/// Calculating properties of hypergeometric distributions
|
||||
/// e.g. the chance of drawing at least 2 red balls from a bucket containing 4 red balls and 6 non
|
||||
/// red balls.
|
||||
pub mod hypergeometric;
|
|
@ -1,18 +1,30 @@
|
|||
use crate::oracle::Oracle;
|
||||
use fuzzytags::{DetectionKey, TaggingKey, Tag};
|
||||
use hashbrown::HashMap;
|
||||
use hashbrown::{HashMap, HashSet};
|
||||
use tracing::event;
|
||||
use tracing::span;
|
||||
use tracing::Level;
|
||||
use rayon::iter::IntoParallelRefIterator;
|
||||
use rayon::iter::ParallelIterator;
|
||||
use std::sync::mpsc::channel;
|
||||
use std::time::Duration;
|
||||
use std::sync::Arc;
|
||||
use itertools::Itertools;
|
||||
use crate::probability::binomial::{nchoosek, at_least_with_replacement};
|
||||
|
||||
pub struct SimulatedServer {
|
||||
keybase: Vec<(DetectionKey<24>, TaggingKey<24>)>,
|
||||
messages: Vec<(Tag<24>, TaggingKey<24>)>,
|
||||
tags_to_keys_cache: HashMap<String, Vec<TaggingKey<24>>>,
|
||||
keys_to_tags_cache: HashMap<String, Vec<Tag<24>>>,
|
||||
sender_tags: HashMap<String, String>,
|
||||
sender_count: HashMap<String, f64>,
|
||||
tags_to_keys_cache: HashMap<String, HashSet<String>>,
|
||||
keys_to_tags_cache: HashMap<String, HashSet<String>>,
|
||||
oracle: Oracle,
|
||||
}
|
||||
|
||||
struct Event(String,String,String,f64);
|
||||
|
||||
|
||||
pub struct RoundStatistics {
|
||||
pub num_registered_parties: usize,
|
||||
pub num_messages: usize,
|
||||
|
@ -34,6 +46,8 @@ impl SimulatedServer {
|
|||
SimulatedServer {
|
||||
keybase: vec![],
|
||||
messages: vec![],
|
||||
sender_tags: HashMap::new(),
|
||||
sender_count: HashMap::new(),
|
||||
tags_to_keys_cache: HashMap::new(),
|
||||
keys_to_tags_cache: HashMap::new(),
|
||||
oracle: Oracle::new(),
|
||||
|
@ -42,37 +56,52 @@ impl SimulatedServer {
|
|||
|
||||
pub fn register_key(&mut self, detection_key: &DetectionKey<24>, tagging_key: &TaggingKey<24>) {
|
||||
self.keybase.push((detection_key.clone(), tagging_key.clone()));
|
||||
self.keys_to_tags_cache.insert(tagging_key.id(), vec![]);
|
||||
self.keys_to_tags_cache.insert(tagging_key.id(), HashSet::new());
|
||||
self.oracle.register_party(tagging_key.id());
|
||||
}
|
||||
|
||||
pub fn add_message(&mut self, tag: Tag<24>, sender_tagging_key: &TaggingKey<24>) {
|
||||
self.messages.push((tag.clone(), sender_tagging_key.clone()));
|
||||
self.tags_to_keys_cache.insert(tag.to_string(), vec![]);
|
||||
}
|
||||
self.tags_to_keys_cache.insert(tag.to_string(), HashSet::new());
|
||||
self.sender_tags.insert(tag.to_string(), sender_tagging_key.id());
|
||||
|
||||
pub fn test_messages(&mut self) {
|
||||
for (message, sender) in self.messages.iter() {
|
||||
for (detection_key, public_key) in self.keybase.iter() {
|
||||
let span = span!(Level::TRACE, "{detection}", party = public_key.id().as_str());
|
||||
let _enter = span.enter();
|
||||
if detection_key.test_tag(message) {
|
||||
event!(Level::TRACE, "Matched detection key for {key} to tag {tag} ", key = public_key.id(), tag = message.to_string());
|
||||
self.tags_to_keys_cache.get_mut(message.to_string().as_str()).unwrap().push((*public_key).clone());
|
||||
self.keys_to_tags_cache.get_mut(public_key.id().as_str()).unwrap().push((*message).clone());
|
||||
self.oracle.add_event(sender.id(), public_key.id(), None);
|
||||
let count = match self.sender_count.get(sender_tagging_key.id().as_str()) {
|
||||
Some(count) => *count + 1.0,
|
||||
_ => 1.0,
|
||||
};
|
||||
self.sender_count.insert(sender_tagging_key.id(), count);
|
||||
|
||||
let (tx, rx) = channel();
|
||||
self.keybase.par_iter().for_each_with(tx.clone(), |tx,(detection_key, receiver_tagging_key)| {
|
||||
if detection_key.test_tag(&tag) {
|
||||
let tag_str = tag.to_string();
|
||||
tx.send(Event(tag_str.clone(), sender_tagging_key.id(), receiver_tagging_key.id(), detection_key.false_positive_probability()));
|
||||
}
|
||||
});
|
||||
std::mem::drop(tx);
|
||||
loop {
|
||||
let event = rx.recv();
|
||||
match event {
|
||||
Ok(event) => {
|
||||
event!(Level::TRACE, "Matched detection key for {key} to tag {tag} ", key = event.2, tag = event.0);
|
||||
self.tags_to_keys_cache.get_mut(event.0.as_str()).unwrap().insert(event.2.clone());
|
||||
self.keys_to_tags_cache.get_mut(event.2.as_str()).unwrap().insert(event.0.clone());
|
||||
self.oracle.add_event(event.1.to_string(), event.2.to_string(), None, event.3);
|
||||
}
|
||||
_ => {break;}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
pub fn statistics(&self) -> (Oracle, RoundStatistics, HashMap<String, PartyStatistics>) {
|
||||
let mut party_stats = HashMap::new();
|
||||
let round_stats = RoundStatistics {
|
||||
num_messages: self.messages.len(),
|
||||
num_registered_parties: self.keybase.len(),
|
||||
};
|
||||
for (party, pub_key) in self.keybase.iter() {
|
||||
for (recipient_index,(party, pub_key)) in self.keybase.iter().enumerate() {
|
||||
let matched = self.keys_to_tags_cache[pub_key.id().as_str()].clone();
|
||||
let observed_messages = matched.len();
|
||||
let ideal_rate = party.false_positive_probability();
|
||||
|
@ -83,12 +112,34 @@ impl SimulatedServer {
|
|||
let observed_skew = (observed_messages as f64) / expected_messages;
|
||||
|
||||
let mut trivial_breaks = 0;
|
||||
let mut num_times_matched_with = HashMap::new();
|
||||
|
||||
for tag in matched.iter() {
|
||||
|
||||
let sender = self.sender_tags[tag].clone();
|
||||
let num = match num_times_matched_with.get(sender.as_str()) {
|
||||
Some(num) => *num + 1.0,
|
||||
_ => 1.0,
|
||||
};
|
||||
num_times_matched_with.insert(sender, num);
|
||||
|
||||
if self.tags_to_keys_cache[tag.to_string().as_str()].len() == 1 {
|
||||
trivial_breaks += 1;
|
||||
}
|
||||
}
|
||||
|
||||
for (sender, count) in num_times_matched_with.iter() {
|
||||
let expected_matched_count = (ideal_rate * self.sender_count[sender.as_str()]);
|
||||
let actual_matched_count = (*count);
|
||||
let prob = at_least_with_replacement( actual_matched_count as u64, self.sender_count[sender.as_str()] as u64,ideal_rate).to_f64();
|
||||
let diff = f64::abs(actual_matched_count - expected_matched_count);
|
||||
// these numbers are arbitrary, but the point is probability only works if your sample is big enough...
|
||||
if diff > 4.0 && actual_matched_count > expected_matched_count && prob < 0.01 {
|
||||
let sender_index = self.keybase.iter().find_position(|(d,t)|t.id() == *sender).unwrap().0;
|
||||
event!(Level::INFO, "Found Anomalous Relationship Between {sender}({sender_index}) and {recipient}({recipient_index}) {falsepositiverate}, {detected} >> {expected} | Probability of Event: {prob:.9}", sender=sender, sender_index=sender_index, recipient=pub_key.id(), recipient_index=recipient_index, falsepositiverate=ideal_rate, detected=actual_matched_count, expected=expected_matched_count, prob=prob);
|
||||
}
|
||||
}
|
||||
|
||||
let p_stats = PartyStatistics {
|
||||
ideal_rate,
|
||||
expected_messages,
|
||||
|
|
Loading…
Reference in New Issue