Datasets, Server upgrades + Nicer Graphs
This commit is contained in:
parent
3ebbaf640b
commit
e4d3b2809f
|
@ -15,4 +15,9 @@ termcolor = "1.1.2"
|
||||||
clap = "3.0.0-beta.2"
|
clap = "3.0.0-beta.2"
|
||||||
tracing = "0.1.22"
|
tracing = "0.1.22"
|
||||||
tracing-subscriber = "0.2.15"
|
tracing-subscriber = "0.2.15"
|
||||||
hex = "0.4.2"
|
hex = "0.4.2"
|
||||||
|
csv = "1.1"
|
||||||
|
serde = { version = "1", features = ["derive"] }
|
||||||
|
rayon = "1.5.0"
|
||||||
|
itertools = "0.10.0"
|
||||||
|
rug = "1.11.0"
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,13 @@
|
||||||
|
## Email EU Core Temporal
|
||||||
|
|
||||||
|
Node: 986
|
||||||
|
Temporal Edges: 332334
|
||||||
|
Edges in static graph: 24929
|
||||||
|
Time span 803 days
|
||||||
|
|
||||||
|
Ashwin Paranjape, Austin R. Benson, and Jure Leskovec. "Motifs in Temporal Networks." In Proceedings of the Tenth ACM International Conference on Web Search and Data Mining, 2017.
|
||||||
|
Source, Destination, Time (Seconds)
|
||||||
|
|
||||||
|
## College
|
||||||
|
|
||||||
|
Pietro Panzarasa, Tore Opsahl, and Kathleen M. Carley. "Patterns and dynamics of users' behavior and interaction: Network analysis of an online community." Journal of the American Society for Information Science and Technology 60.5 (2009): 911-932.
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,126 @@
|
||||||
|
use serde::Deserialize;
|
||||||
|
use crate::server::SimulatedServer;
|
||||||
|
use fuzzytags::{RootSecret, TaggingKey};
|
||||||
|
use rand::Rng;
|
||||||
|
use tracing::event;
|
||||||
|
use tracing::span;
|
||||||
|
use tracing::Level;
|
||||||
|
use crate::oracle::Oracle;
|
||||||
|
|
||||||
|
pub trait TemporalDataset {
|
||||||
|
fn register_with_server<R>(&self, server: &mut SimulatedServer, rng: &mut R, min_p: usize, max_p: usize, oracle: &mut Oracle) where R: Rng;
|
||||||
|
fn playthough_traffic(&self, server: &mut SimulatedServer, oracle: &mut Oracle);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Deserialize)]
|
||||||
|
struct TemporalSocialNetworkRecord {
|
||||||
|
src_node: usize,
|
||||||
|
dst_node: usize,
|
||||||
|
timestamp: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
pub struct CsvDataset {
|
||||||
|
root_secrets: Vec<RootSecret<24>>,
|
||||||
|
tagging_keys: Vec<TaggingKey<24>>,
|
||||||
|
records: Vec<TemporalSocialNetworkRecord>
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CsvDataset {
|
||||||
|
pub fn load_dataset(filename: &str) -> CsvDataset {
|
||||||
|
let mut rdr = csv::Reader::from_path(filename).unwrap();
|
||||||
|
|
||||||
|
let mut num_recipients = 0;
|
||||||
|
let mut records : Vec<TemporalSocialNetworkRecord> = vec![];
|
||||||
|
for result in rdr.deserialize() {
|
||||||
|
// Notice that we need to provide a type hint for automatic
|
||||||
|
// deserialization.
|
||||||
|
let record: Result<TemporalSocialNetworkRecord, csv::Error> = result;
|
||||||
|
match record {
|
||||||
|
Ok(record) => {
|
||||||
|
if record.dst_node > num_recipients {
|
||||||
|
num_recipients = record.dst_node;
|
||||||
|
}
|
||||||
|
if record.src_node > num_recipients {
|
||||||
|
num_recipients = record.src_node;
|
||||||
|
}
|
||||||
|
records.push(record.clone());
|
||||||
|
},
|
||||||
|
Err(err) => {
|
||||||
|
panic!("invalid data record found in {}", filename)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// generate a root secret for each member of the network
|
||||||
|
let mut root_secrets = vec![];
|
||||||
|
let mut tagging_keys = vec![];
|
||||||
|
for i in 0..num_recipients+1 {
|
||||||
|
let secret = RootSecret::<24>::generate();
|
||||||
|
let tagging_key = secret.tagging_key();
|
||||||
|
root_secrets.push(secret);
|
||||||
|
tagging_keys.push(tagging_key)
|
||||||
|
}
|
||||||
|
|
||||||
|
CsvDataset {
|
||||||
|
root_secrets,
|
||||||
|
tagging_keys,
|
||||||
|
records
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn num_parties(&self) -> usize {
|
||||||
|
self.root_secrets.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn num_records(&self) -> usize {
|
||||||
|
self.records.len()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TemporalDataset for CsvDataset {
|
||||||
|
fn register_with_server<R>(&self, server: &mut SimulatedServer, rng: &mut R, min_p: usize, max_p: usize, oracle: &mut Oracle) where
|
||||||
|
R: Rng {
|
||||||
|
for secret in self.root_secrets.iter() {
|
||||||
|
let n = rng.gen_range(min_p..max_p);
|
||||||
|
let span = span!(Level::INFO, "register", party = secret.tagging_key().id().as_str());
|
||||||
|
let _enter = span.enter();
|
||||||
|
let detection_key = secret.extract_detection_key(n);
|
||||||
|
event!(Level::TRACE, "create detection key {detection_key}", detection_key = detection_key.id().as_str());
|
||||||
|
event!(Level::TRACE, "register with server");
|
||||||
|
server.register_key(&detection_key, &secret.tagging_key());
|
||||||
|
oracle.register_party(secret.tagging_key().id());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn playthough_traffic(&self, server: &mut SimulatedServer,oracle: &mut Oracle) {
|
||||||
|
/// TODO timestamps?
|
||||||
|
for (i,record) in self.records.iter().enumerate() {
|
||||||
|
if i %1000 == 0 {
|
||||||
|
let progress = i as f64 /(self.records.len() as f64);
|
||||||
|
let days = (((record.timestamp as f64 / 60.0) / 60.0) / 24.0);
|
||||||
|
event!(Level::INFO, "progress..{:.2} ({} days)", progress*100.0, days);
|
||||||
|
}
|
||||||
|
// We pretend that the server will always have access to the sender, even though
|
||||||
|
// in practical deployments we could mitigate this somewhat using Tor / mixnet.
|
||||||
|
let tagging_key_src = &self.tagging_keys[record.src_node];
|
||||||
|
let tagging_key_dst = &self.tagging_keys[record.dst_node];
|
||||||
|
event!(Level::TRACE, "regular send {party}", party = tagging_key_dst.id().as_str());
|
||||||
|
let tag = tagging_key_dst.generate_tag();
|
||||||
|
event!(Level::TRACE, "message sent server {tag}", tag = tag.to_string());
|
||||||
|
server.add_message(tag, tagging_key_src);
|
||||||
|
oracle.add_event(tagging_key_src.id(), tagging_key_dst.id(), None, 0.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use crate::datasets::CsvDataset;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn it_works() {
|
||||||
|
let dataset = CsvDataset::load_dataset("datasets/email-Eu-core-temporal.txt");
|
||||||
|
assert_eq!(332334, dataset.num_records());
|
||||||
|
}
|
||||||
|
}
|
53
src/main.rs
53
src/main.rs
|
@ -6,7 +6,8 @@ use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
|
||||||
mod oracle;
|
mod oracle;
|
||||||
mod parties;
|
mod parties;
|
||||||
mod server;
|
mod server;
|
||||||
|
mod datasets;
|
||||||
|
mod probability;
|
||||||
use clap::Clap;
|
use clap::Clap;
|
||||||
use tracing::event;
|
use tracing::event;
|
||||||
|
|
||||||
|
@ -14,6 +15,7 @@ use crate::oracle::Oracle;
|
||||||
use tracing::Level;
|
use tracing::Level;
|
||||||
use tracing_subscriber;
|
use tracing_subscriber;
|
||||||
use tracing_subscriber::FmtSubscriber;
|
use tracing_subscriber::FmtSubscriber;
|
||||||
|
use crate::datasets::{CsvDataset, TemporalDataset};
|
||||||
|
|
||||||
#[derive(Clap)]
|
#[derive(Clap)]
|
||||||
#[clap(version = "1.0", author = "Sarah Jamie Lewis <sarah@openprivacy.ca>")]
|
#[clap(version = "1.0", author = "Sarah Jamie Lewis <sarah@openprivacy.ca>")]
|
||||||
|
@ -23,6 +25,12 @@ struct Opts {
|
||||||
#[clap(short, long, default_value = "10")]
|
#[clap(short, long, default_value = "10")]
|
||||||
num_parties: usize,
|
num_parties: usize,
|
||||||
|
|
||||||
|
|
||||||
|
/// dataset
|
||||||
|
#[clap(short, long, default_value = "")]
|
||||||
|
dataset: String,
|
||||||
|
|
||||||
|
|
||||||
/// samples per round
|
/// samples per round
|
||||||
#[clap(short, long, default_value = "10")]
|
#[clap(short, long, default_value = "10")]
|
||||||
samples_per_round: usize,
|
samples_per_round: usize,
|
||||||
|
@ -56,22 +64,26 @@ fn main() {
|
||||||
let mut rng = rand::thread_rng();
|
let mut rng = rand::thread_rng();
|
||||||
let mut server = SimulatedServer::new();
|
let mut server = SimulatedServer::new();
|
||||||
|
|
||||||
let simulated_parties = SimulatedParties::new_simulation(opts.num_parties);
|
if opts.dataset == "" {
|
||||||
{
|
let simulated_parties = SimulatedParties::new_simulation(opts.num_parties);
|
||||||
event!(Level::INFO, "Generating {} Parties and registering them with the server", opts.num_parties);
|
{
|
||||||
simulated_parties.register_with_server(&mut server, &mut rng, opts.min_p, opts.max_p, &mut oracle);
|
event!(Level::INFO, "Generating {} Parties and registering them with the server", opts.num_parties);
|
||||||
}
|
simulated_parties.register_with_server(&mut server, &mut rng, opts.min_p, opts.max_p, &mut oracle);
|
||||||
|
}
|
||||||
|
|
||||||
let pareto = Pareto::new(1.0, 1.0).unwrap();
|
let pareto = Pareto::new(1.0, 1.0).unwrap();
|
||||||
|
|
||||||
{
|
{
|
||||||
event!(Level::INFO, "Simulating message sends using {} samples from a pareto distribution...", opts.samples_per_round);
|
event!(Level::INFO, "Simulating message sends using {} samples from a pareto distribution...", opts.samples_per_round);
|
||||||
(0..opts.samples_per_round).for_each(|_i| simulated_parties.sample_traffic(&mut server, &mut rng, pareto, opts.prob_entangled, &mut oracle));
|
(0..opts.samples_per_round).for_each(|_i| simulated_parties.sample_traffic(&mut server, &mut rng, pareto, opts.prob_entangled, &mut oracle));
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
let dataset = CsvDataset::load_dataset(opts.dataset.as_str());
|
||||||
|
event!(Level::INFO, "Registering parties from {} which containts {}", opts.dataset, dataset.num_parties());
|
||||||
|
dataset.register_with_server(&mut server, &mut rng, opts.min_p, opts.max_p, &mut oracle);
|
||||||
|
|
||||||
{
|
event!(Level::INFO, "Playing back {} events from {}", dataset.num_records(), opts.dataset);
|
||||||
event!(Level::INFO, "Simulating Adversarial Server Processing Messages..");
|
dataset.playthough_traffic(&mut server, &mut oracle);
|
||||||
server.test_messages();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
|
@ -85,15 +97,16 @@ fn main() {
|
||||||
if_uniform
|
if_uniform
|
||||||
);
|
);
|
||||||
let mut stdout = StandardStream::stdout(ColorChoice::Always);
|
let mut stdout = StandardStream::stdout(ColorChoice::Always);
|
||||||
for (party, stats) in party_stats.iter() {
|
for (index, (party, stats)) in party_stats.iter().enumerate() {
|
||||||
if stats.trivial_breaks > 0 || (stats.observed_messages > 2 && stats.observed_skew > (if_uniform * stats.ideal_rate)) {
|
if stats.trivial_breaks > 0 || (stats.observed_messages > 2 && stats.observed_skew > 2.0) {
|
||||||
stdout.set_color(ColorSpec::new().set_fg(Some(Color::Red))).unwrap();
|
stdout.set_color(ColorSpec::new().set_fg(Some(Color::Red))).unwrap();
|
||||||
} else {
|
} else {
|
||||||
stdout.set_color(ColorSpec::new().set_fg(Some(Color::Green))).unwrap();
|
stdout.set_color(ColorSpec::new().set_fg(Some(Color::Green))).unwrap();
|
||||||
}
|
}
|
||||||
writeln!(
|
writeln!(
|
||||||
&mut stdout,
|
&mut stdout,
|
||||||
"Party {} | Ideal: {:>8.2} {:>8.2} | Observed: {:>8.2} ({:>6.2}) | Skew: {:>8.2} ({:>6.2}) | Trivial Attributions this Round: {}",
|
"({}) Party {} | Ideal: {:>8.2} {:>8.2} | Observed: {:>8.2} ({:>6.2}) | Skew: {:>8.2} ({:>6.2}) | Trivial Attributions this Round: {}",
|
||||||
|
index,
|
||||||
party,
|
party,
|
||||||
stats.ideal_rate,
|
stats.ideal_rate,
|
||||||
stats.ideal_rate * (round_stats.num_messages as f64),
|
stats.ideal_rate * (round_stats.num_messages as f64),
|
||||||
|
@ -105,11 +118,11 @@ fn main() {
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
}
|
}
|
||||||
let max = server_oracle.compile_to_dot("server_event.dot", true, false, 1);
|
let max = server_oracle.compile_to_dot("server_event.dot", true, false, 1.0);
|
||||||
println!("Max from server: {}", max);
|
println!("Max from server: {}", max);
|
||||||
oracle.compile_to_dot("actual_events.dot", false, false, max);
|
oracle.compile_to_dot("actual_events.dot", false, false, 1.0);
|
||||||
|
|
||||||
server_oracle.compile_to_dot("server_event_inverse.dot", true, true,max);
|
//server_oracle.compile_to_dot("server_event_inverse.dot", true, true,max);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,7 @@ pub struct Event {
|
||||||
sender: String,
|
sender: String,
|
||||||
intended_receiver: String,
|
intended_receiver: String,
|
||||||
entangled_receiver: Option<String>,
|
entangled_receiver: Option<String>,
|
||||||
|
confidence: f64
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
|
@ -32,29 +33,26 @@ impl Oracle {
|
||||||
self.parties.push(party);
|
self.parties.push(party);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn add_event(&mut self, sender: String, intended_receiver: String, entangled_receiver: Option<String>) {
|
pub fn add_event(&mut self, sender: String, intended_receiver: String, entangled_receiver: Option<String>, confidence: f64) {
|
||||||
self.actual_events.push(Event {
|
self.actual_events.push(Event {
|
||||||
sender,
|
sender,
|
||||||
intended_receiver,
|
intended_receiver,
|
||||||
entangled_receiver,
|
entangled_receiver,
|
||||||
|
confidence,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn compile_to_dot(&self, filename: &str, strict: bool, inverse: bool, max: u64) -> u64 {
|
pub fn compile_to_dot(&self, filename: &str, strict: bool, inverse: bool, max: f64) -> f64 {
|
||||||
let mut output = File::create(filename).unwrap();
|
let mut output = File::create(filename).unwrap();
|
||||||
if strict {
|
write!(output, "strict digraph {{\n");
|
||||||
write!(output, "strict ");
|
|
||||||
}
|
|
||||||
write!(output, "digraph {{\n");
|
|
||||||
write!(
|
write!(
|
||||||
output,
|
output,
|
||||||
r#" dim=3;
|
r##"
|
||||||
overlap=true;
|
K=1.3;
|
||||||
splines = true;
|
repulsiveforce=0.00001;
|
||||||
dpi=400;
|
dpi=2400;
|
||||||
penwidth = 1;
|
bgcolor = "#111111ff";
|
||||||
bgcolor = "black";
|
"##
|
||||||
"#
|
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
|
@ -66,32 +64,33 @@ impl Oracle {
|
||||||
let r = hex::decode(party).unwrap()[0];
|
let r = hex::decode(party).unwrap()[0];
|
||||||
let g = hex::decode(party).unwrap()[1];
|
let g = hex::decode(party).unwrap()[1];
|
||||||
let b = hex::decode(party).unwrap()[2];
|
let b = hex::decode(party).unwrap()[2];
|
||||||
writeln!(output, "\"{}\" [shape=point, color=\"#{:x}{:x}{:x}\",pos=\"{},{}!\"]", party, r, g, b,x,y);
|
writeln!(output, "\"{}\" [shape=point, penwidth=0, fixedsize=true, width=0.005,height=0.005,peripheries=0,style=\"filled,setlinewidth(0)\", color=\"#{:02x}{:02x}{:02x}\"]", party, r, g, b);
|
||||||
|
//writeln!(output, "\"{}\" [shape=point, color=\"#{:x}{:x}{:x}\",pos=\"{},{}!\"]", party, r, g, b,x,y);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut real_connection_map: HashMap<(String, String), u64> = HashMap::new();
|
let mut real_connection_map: HashMap<(String, String), f64> = HashMap::new();
|
||||||
let mut entangled_connection_map: HashMap<(String, String), u64> = HashMap::new();
|
let mut entangled_connection_map: HashMap<(String, String), f64> = HashMap::new();
|
||||||
let mut max_conn = max;
|
let mut max_conn = 1.0;
|
||||||
for event in self.actual_events.iter() {
|
for event in self.actual_events.iter() {
|
||||||
let key = (event.sender.clone(), event.intended_receiver.clone());
|
let key = (event.sender.clone(), event.intended_receiver.clone());
|
||||||
if real_connection_map.contains_key(&key) {
|
if real_connection_map.contains_key(&key) {
|
||||||
*real_connection_map.get_mut(&key).unwrap() += 1;
|
*real_connection_map.get_mut(&key).unwrap() += (1.0 * (1.0-event.confidence));
|
||||||
if real_connection_map[&key] > max_conn {
|
if real_connection_map[&key] > max_conn {
|
||||||
max_conn = real_connection_map[&key];
|
max_conn = real_connection_map[&key];
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
real_connection_map.insert(key, 1);
|
real_connection_map.insert(key, 1.0 * (1.0-event.confidence));
|
||||||
}
|
}
|
||||||
match &event.entangled_receiver {
|
match &event.entangled_receiver {
|
||||||
Some(entangled_receiver) => {
|
Some(entangled_receiver) => {
|
||||||
let key = (event.sender.clone(), entangled_receiver.clone());
|
let key = (event.sender.clone(), entangled_receiver.clone());
|
||||||
if entangled_connection_map.contains_key(&key) {
|
if entangled_connection_map.contains_key(&key) {
|
||||||
*entangled_connection_map.get_mut(&key).unwrap() += 1;
|
*entangled_connection_map.get_mut(&key).unwrap() += 1.0;
|
||||||
if entangled_connection_map[&key] > max_conn {
|
if entangled_connection_map[&key] > max_conn {
|
||||||
max_conn = entangled_connection_map[&key];
|
max_conn = entangled_connection_map[&key];
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
entangled_connection_map.insert(key, 1);
|
entangled_connection_map.insert(key, 1.0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_ => {}
|
_ => {}
|
||||||
|
@ -100,27 +99,22 @@ impl Oracle {
|
||||||
|
|
||||||
for ((sender, receiver), size) in real_connection_map.iter() {
|
for ((sender, receiver), size) in real_connection_map.iter() {
|
||||||
let normalized = (*size as f64 / max_conn as f64);
|
let normalized = (*size as f64 / max_conn as f64);
|
||||||
let mut transparency = (normalized * 172.0) as u8 + 64;
|
let mut transparency = (normalized * 64.0) as u8 + 172;
|
||||||
let mut penwidth = ((normalized * 2048.0) as f64).log2();
|
let mut penwidth = (normalized * 0.01) as f64;
|
||||||
|
|
||||||
if inverse {
|
|
||||||
penwidth = 4.0 - penwidth;
|
|
||||||
transparency = 255 - transparency;
|
|
||||||
}
|
|
||||||
|
|
||||||
writeln!(
|
writeln!(
|
||||||
output,
|
output,
|
||||||
"\"{}\" -> \"{}\" [arrowhead=none, penwidth={}, color=\"#ffffff{:x}\"]",
|
"\"{}\" -> \"{}\" [arrowhead=none, penwidth={}, color=\"#ffffff{:02x}\", weight={}]",
|
||||||
sender, receiver, penwidth, transparency
|
sender, receiver, f64::max(0.005, penwidth), transparency, penwidth
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
for ((sender, receiver), size) in entangled_connection_map.iter() {
|
for ((sender, receiver), size) in entangled_connection_map.iter() {
|
||||||
let normalized = (*size as f64 / max_conn as f64);
|
let normalized = (*size as f64 / max_conn as f64);
|
||||||
let transparency = (normalized * 172.0) as u8 + 64;
|
let transparency = (normalized * 172.0) as u8 + 64;
|
||||||
let penwidth = ((normalized * 2048.0) as f64).log2();
|
let penwidth = ((normalized * 32.0) as f64).log2();
|
||||||
writeln!(
|
writeln!(
|
||||||
output,
|
output,
|
||||||
"\"{}\" -> \"{}\" [arrowhead=none, style=dashed, penwidth={}, color=\"#ffffff{:x}\"]",
|
"\"{}\" -> \"{}\" [arrowhead=none, style=dashed, penwidth={}, color=\"#ffffff{:02x}\"]",
|
||||||
sender, receiver, penwidth, transparency
|
sender, receiver, penwidth, transparency
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
|
@ -71,7 +71,7 @@ impl SimulatedParties {
|
||||||
server.add_message(tag, &sender_public_key);
|
server.add_message(tag, &sender_public_key);
|
||||||
}
|
}
|
||||||
|
|
||||||
oracle.add_event(sender_public_key.id().clone(), receiver_public_key.id(), Some(receiver_public_key_2.id()));
|
oracle.add_event(sender_public_key.id().clone(), receiver_public_key.id(), Some(receiver_public_key_2.id()),1.0);
|
||||||
} else {
|
} else {
|
||||||
event!(Level::INFO, "regular send {party}", party = receiver_public_key.id().as_str());
|
event!(Level::INFO, "regular send {party}", party = receiver_public_key.id().as_str());
|
||||||
for _i in 0..v {
|
for _i in 0..v {
|
||||||
|
@ -79,7 +79,7 @@ impl SimulatedParties {
|
||||||
event!(Level::INFO, "message sent server {tag}", tag = tag.to_string());
|
event!(Level::INFO, "message sent server {tag}", tag = tag.to_string());
|
||||||
server.add_message(tag, &sender_public_key);
|
server.add_message(tag, &sender_public_key);
|
||||||
}
|
}
|
||||||
oracle.add_event(sender_public_key.id().clone(), receiver_public_key.id(), None);
|
oracle.add_event(sender_public_key.id().clone(), receiver_public_key.id(), None,1.0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,33 @@
|
||||||
|
/// calculate the binomial coefficient.
|
||||||
|
pub fn nchoosek(n: u64, k: u64) -> f64{
|
||||||
|
if k > n {
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
let n_fac = rug::Integer::from(rug::Integer::factorial(n as u32)) / (rug::Integer::from(rug::Integer::factorial(k as u32)) * rug::Integer::from(rug::Integer::factorial((n - k) as u32)));
|
||||||
|
return n_fac.to_f64();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// the probability that a given outcome (with probability p) will occur at least k times in n independent trials (aka Bernoulli trials).
|
||||||
|
pub fn at_least_with_replacement(k: u64, n: u64, p: f64) -> rug::Float {
|
||||||
|
let mut prob_at_least = rug::Float::with_val(64, 0.0);
|
||||||
|
for x in k..n {
|
||||||
|
prob_at_least += nchoosek(n, x) * rug::Float::with_val(64, p.powi(x as i32)) * rug::Float::with_val(64, (1.0 - p).powi((n - x) as i32));
|
||||||
|
}
|
||||||
|
prob_at_least
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use crate::probability::binomial::{at_least_with_replacement, nchoosek};
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_nchoosek() {
|
||||||
|
assert_eq!(17310309456440u64, nchoosek(100, 10));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_at_least() {
|
||||||
|
// probability of at least 48 heads in 100 coin flips
|
||||||
|
assert_eq!(0.691350293205374, at_least_with_replacement(48, 100, 0.5).to_f64());
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,62 @@
|
||||||
|
use crate::probability::binomial;
|
||||||
|
use std::cmp::min;
|
||||||
|
|
||||||
|
/// choose exactly w items (of a max of m) given k trials and n total items for a process that can be characterized by the `hypergeometric distribution`:
|
||||||
|
/// That is:
|
||||||
|
/// * The result of each draw (the elements of the population being sampled) can be classified into one of two mutually exclusive categories (e.g. Red/Blue or Not Red/Red).
|
||||||
|
/// * The probability of a success changes on each draw, as each draw decreases the population (sampling without replacement from a finite population).
|
||||||
|
pub fn exactly_without_replacement(x: u64, m: u64, k: u64, n: u64) -> rug::Float {
|
||||||
|
// println!("Selecting exactly {} (out of {}) from {} samples of {}",x,m,k,n);
|
||||||
|
if x > k || k > n || m > n || m < x {
|
||||||
|
return rug::Float::with_val(64, 0.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the desired population is the same as the total population then the answer is trivial
|
||||||
|
if m == n {
|
||||||
|
return rug::Float::with_val(64, 1.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mchoosen = binomial::nchoosek(m, x) * rug::Float::with_val(64, 1.0);
|
||||||
|
let nsubmchooseksubx = binomial::nchoosek(n - m, k - x) * rug::Float::with_val(64, 1.0);
|
||||||
|
let nchoosek = binomial::nchoosek(n, k) * rug::Float::with_val(64, 1.0);
|
||||||
|
(mchoosen * nsubmchooseksubx) / nchoosek
|
||||||
|
}
|
||||||
|
|
||||||
|
/// choose at least w items (of a max of m) given k trials and n total items for a process that can be characterized by the `hypergeometric distribution`:
|
||||||
|
/// That is:
|
||||||
|
/// * The result of each draw (the elements of the population being sampled) can be classified into one of two mutually exclusive categories (e.g. Red/Blue or Not Red/Red).
|
||||||
|
/// * The probability of a success changes on each draw, as each draw decreases the population (sampling without replacement from a finite population).
|
||||||
|
/// This implementation is based on summing over `exactly_without_replacement`.
|
||||||
|
pub fn at_least_without_replacement(w: u64, m: u64, k: u64, n: u64) -> rug::Float {
|
||||||
|
// println!("Selecting at least {} (out of {}) from {} samples of {}",w,m,k,n);
|
||||||
|
let mut prob_at_least = rug::Float::with_val(64, 0.0);
|
||||||
|
|
||||||
|
if m == k && k == n || m == n {
|
||||||
|
return rug::Float::with_val(64, 1.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if w == m {
|
||||||
|
return exactly_without_replacement(w, m, k, n);
|
||||||
|
}
|
||||||
|
|
||||||
|
for x in w..=min(m, k) {
|
||||||
|
let a = exactly_without_replacement(x, m, k, n);
|
||||||
|
prob_at_least += a;
|
||||||
|
}
|
||||||
|
prob_at_least
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use crate::probability::hypergeometric::at_least_without_replacement;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_at_least_without_replacement() {
|
||||||
|
println!("{:.12}", at_least_without_replacement(2, 3, 3, 4).to_f64());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_at_least_without_replacement_2() {
|
||||||
|
println!("{:.12}", at_least_without_replacement(1, 1, 3, 9).to_f64());
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,11 @@
|
||||||
|
/// Welcome to the probability theory module!
|
||||||
|
/// In this module we encapsulate a bunch of different theory useful for probabilistic modelling
|
||||||
|
|
||||||
|
/// Calculating properties of binomial distributions
|
||||||
|
/// e.g. the probability of flipping heads a least 48 times out of 100 flips.
|
||||||
|
pub mod binomial;
|
||||||
|
|
||||||
|
/// Calculating properties of hypergeometric distributions
|
||||||
|
/// e.g. the chance of drawing at least 2 red balls from a bucket containing 4 red balls and 6 non
|
||||||
|
/// red balls.
|
||||||
|
pub mod hypergeometric;
|
|
@ -1,18 +1,30 @@
|
||||||
use crate::oracle::Oracle;
|
use crate::oracle::Oracle;
|
||||||
use fuzzytags::{DetectionKey, TaggingKey, Tag};
|
use fuzzytags::{DetectionKey, TaggingKey, Tag};
|
||||||
use hashbrown::HashMap;
|
use hashbrown::{HashMap, HashSet};
|
||||||
use tracing::event;
|
use tracing::event;
|
||||||
use tracing::span;
|
use tracing::span;
|
||||||
use tracing::Level;
|
use tracing::Level;
|
||||||
|
use rayon::iter::IntoParallelRefIterator;
|
||||||
|
use rayon::iter::ParallelIterator;
|
||||||
|
use std::sync::mpsc::channel;
|
||||||
|
use std::time::Duration;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use itertools::Itertools;
|
||||||
|
use crate::probability::binomial::{nchoosek, at_least_with_replacement};
|
||||||
|
|
||||||
pub struct SimulatedServer {
|
pub struct SimulatedServer {
|
||||||
keybase: Vec<(DetectionKey<24>, TaggingKey<24>)>,
|
keybase: Vec<(DetectionKey<24>, TaggingKey<24>)>,
|
||||||
messages: Vec<(Tag<24>, TaggingKey<24>)>,
|
messages: Vec<(Tag<24>, TaggingKey<24>)>,
|
||||||
tags_to_keys_cache: HashMap<String, Vec<TaggingKey<24>>>,
|
sender_tags: HashMap<String, String>,
|
||||||
keys_to_tags_cache: HashMap<String, Vec<Tag<24>>>,
|
sender_count: HashMap<String, f64>,
|
||||||
|
tags_to_keys_cache: HashMap<String, HashSet<String>>,
|
||||||
|
keys_to_tags_cache: HashMap<String, HashSet<String>>,
|
||||||
oracle: Oracle,
|
oracle: Oracle,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct Event(String,String,String,f64);
|
||||||
|
|
||||||
|
|
||||||
pub struct RoundStatistics {
|
pub struct RoundStatistics {
|
||||||
pub num_registered_parties: usize,
|
pub num_registered_parties: usize,
|
||||||
pub num_messages: usize,
|
pub num_messages: usize,
|
||||||
|
@ -34,6 +46,8 @@ impl SimulatedServer {
|
||||||
SimulatedServer {
|
SimulatedServer {
|
||||||
keybase: vec![],
|
keybase: vec![],
|
||||||
messages: vec![],
|
messages: vec![],
|
||||||
|
sender_tags: HashMap::new(),
|
||||||
|
sender_count: HashMap::new(),
|
||||||
tags_to_keys_cache: HashMap::new(),
|
tags_to_keys_cache: HashMap::new(),
|
||||||
keys_to_tags_cache: HashMap::new(),
|
keys_to_tags_cache: HashMap::new(),
|
||||||
oracle: Oracle::new(),
|
oracle: Oracle::new(),
|
||||||
|
@ -42,37 +56,52 @@ impl SimulatedServer {
|
||||||
|
|
||||||
pub fn register_key(&mut self, detection_key: &DetectionKey<24>, tagging_key: &TaggingKey<24>) {
|
pub fn register_key(&mut self, detection_key: &DetectionKey<24>, tagging_key: &TaggingKey<24>) {
|
||||||
self.keybase.push((detection_key.clone(), tagging_key.clone()));
|
self.keybase.push((detection_key.clone(), tagging_key.clone()));
|
||||||
self.keys_to_tags_cache.insert(tagging_key.id(), vec![]);
|
self.keys_to_tags_cache.insert(tagging_key.id(), HashSet::new());
|
||||||
self.oracle.register_party(tagging_key.id());
|
self.oracle.register_party(tagging_key.id());
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn add_message(&mut self, tag: Tag<24>, sender_tagging_key: &TaggingKey<24>) {
|
pub fn add_message(&mut self, tag: Tag<24>, sender_tagging_key: &TaggingKey<24>) {
|
||||||
self.messages.push((tag.clone(), sender_tagging_key.clone()));
|
self.messages.push((tag.clone(), sender_tagging_key.clone()));
|
||||||
self.tags_to_keys_cache.insert(tag.to_string(), vec![]);
|
self.tags_to_keys_cache.insert(tag.to_string(), HashSet::new());
|
||||||
}
|
self.sender_tags.insert(tag.to_string(), sender_tagging_key.id());
|
||||||
|
|
||||||
pub fn test_messages(&mut self) {
|
let count = match self.sender_count.get(sender_tagging_key.id().as_str()) {
|
||||||
for (message, sender) in self.messages.iter() {
|
Some(count) => *count + 1.0,
|
||||||
for (detection_key, public_key) in self.keybase.iter() {
|
_ => 1.0,
|
||||||
let span = span!(Level::TRACE, "{detection}", party = public_key.id().as_str());
|
};
|
||||||
let _enter = span.enter();
|
self.sender_count.insert(sender_tagging_key.id(), count);
|
||||||
if detection_key.test_tag(message) {
|
|
||||||
event!(Level::TRACE, "Matched detection key for {key} to tag {tag} ", key = public_key.id(), tag = message.to_string());
|
let (tx, rx) = channel();
|
||||||
self.tags_to_keys_cache.get_mut(message.to_string().as_str()).unwrap().push((*public_key).clone());
|
self.keybase.par_iter().for_each_with(tx.clone(), |tx,(detection_key, receiver_tagging_key)| {
|
||||||
self.keys_to_tags_cache.get_mut(public_key.id().as_str()).unwrap().push((*message).clone());
|
if detection_key.test_tag(&tag) {
|
||||||
self.oracle.add_event(sender.id(), public_key.id(), None);
|
let tag_str = tag.to_string();
|
||||||
|
tx.send(Event(tag_str.clone(), sender_tagging_key.id(), receiver_tagging_key.id(), detection_key.false_positive_probability()));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
std::mem::drop(tx);
|
||||||
|
loop {
|
||||||
|
let event = rx.recv();
|
||||||
|
match event {
|
||||||
|
Ok(event) => {
|
||||||
|
event!(Level::TRACE, "Matched detection key for {key} to tag {tag} ", key = event.2, tag = event.0);
|
||||||
|
self.tags_to_keys_cache.get_mut(event.0.as_str()).unwrap().insert(event.2.clone());
|
||||||
|
self.keys_to_tags_cache.get_mut(event.2.as_str()).unwrap().insert(event.0.clone());
|
||||||
|
self.oracle.add_event(event.1.to_string(), event.2.to_string(), None, event.3);
|
||||||
}
|
}
|
||||||
|
_ => {break;}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
pub fn statistics(&self) -> (Oracle, RoundStatistics, HashMap<String, PartyStatistics>) {
|
pub fn statistics(&self) -> (Oracle, RoundStatistics, HashMap<String, PartyStatistics>) {
|
||||||
let mut party_stats = HashMap::new();
|
let mut party_stats = HashMap::new();
|
||||||
let round_stats = RoundStatistics {
|
let round_stats = RoundStatistics {
|
||||||
num_messages: self.messages.len(),
|
num_messages: self.messages.len(),
|
||||||
num_registered_parties: self.keybase.len(),
|
num_registered_parties: self.keybase.len(),
|
||||||
};
|
};
|
||||||
for (party, pub_key) in self.keybase.iter() {
|
for (recipient_index,(party, pub_key)) in self.keybase.iter().enumerate() {
|
||||||
let matched = self.keys_to_tags_cache[pub_key.id().as_str()].clone();
|
let matched = self.keys_to_tags_cache[pub_key.id().as_str()].clone();
|
||||||
let observed_messages = matched.len();
|
let observed_messages = matched.len();
|
||||||
let ideal_rate = party.false_positive_probability();
|
let ideal_rate = party.false_positive_probability();
|
||||||
|
@ -83,12 +112,34 @@ impl SimulatedServer {
|
||||||
let observed_skew = (observed_messages as f64) / expected_messages;
|
let observed_skew = (observed_messages as f64) / expected_messages;
|
||||||
|
|
||||||
let mut trivial_breaks = 0;
|
let mut trivial_breaks = 0;
|
||||||
|
let mut num_times_matched_with = HashMap::new();
|
||||||
|
|
||||||
for tag in matched.iter() {
|
for tag in matched.iter() {
|
||||||
|
|
||||||
|
let sender = self.sender_tags[tag].clone();
|
||||||
|
let num = match num_times_matched_with.get(sender.as_str()) {
|
||||||
|
Some(num) => *num + 1.0,
|
||||||
|
_ => 1.0,
|
||||||
|
};
|
||||||
|
num_times_matched_with.insert(sender, num);
|
||||||
|
|
||||||
if self.tags_to_keys_cache[tag.to_string().as_str()].len() == 1 {
|
if self.tags_to_keys_cache[tag.to_string().as_str()].len() == 1 {
|
||||||
trivial_breaks += 1;
|
trivial_breaks += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (sender, count) in num_times_matched_with.iter() {
|
||||||
|
let expected_matched_count = (ideal_rate * self.sender_count[sender.as_str()]);
|
||||||
|
let actual_matched_count = (*count);
|
||||||
|
let prob = at_least_with_replacement( actual_matched_count as u64, self.sender_count[sender.as_str()] as u64,ideal_rate).to_f64();
|
||||||
|
let diff = f64::abs(actual_matched_count - expected_matched_count);
|
||||||
|
// these numbers are arbitrary, but the point is probability only works if your sample is big enough...
|
||||||
|
if diff > 4.0 && actual_matched_count > expected_matched_count && prob < 0.01 {
|
||||||
|
let sender_index = self.keybase.iter().find_position(|(d,t)|t.id() == *sender).unwrap().0;
|
||||||
|
event!(Level::INFO, "Found Anomalous Relationship Between {sender}({sender_index}) and {recipient}({recipient_index}) {falsepositiverate}, {detected} >> {expected} | Probability of Event: {prob:.9}", sender=sender, sender_index=sender_index, recipient=pub_key.id(), recipient_index=recipient_index, falsepositiverate=ideal_rate, detected=actual_matched_count, expected=expected_matched_count, prob=prob);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let p_stats = PartyStatistics {
|
let p_stats = PartyStatistics {
|
||||||
ideal_rate,
|
ideal_rate,
|
||||||
expected_messages,
|
expected_messages,
|
||||||
|
|
Loading…
Reference in New Issue