2021-02-03 13:05:59 +00:00
use crate ::oracle ::Oracle ;
2021-02-14 04:43:54 +00:00
use crate ::probability ::binomial ::{ at_least_with_replacement , nchoosek } ;
use fuzzytags ::{ DetectionKey , Tag , TaggingKey } ;
2021-02-13 07:36:04 +00:00
use hashbrown ::{ HashMap , HashSet } ;
2021-02-14 04:43:54 +00:00
use itertools ::Itertools ;
2021-02-13 07:36:04 +00:00
use rayon ::iter ::IntoParallelRefIterator ;
use rayon ::iter ::ParallelIterator ;
use std ::sync ::mpsc ::channel ;
use std ::sync ::Arc ;
2021-02-14 04:43:54 +00:00
use std ::time ::Duration ;
use tracing ::event ;
use tracing ::span ;
use tracing ::Level ;
2021-02-01 07:50:59 +00:00
pub struct SimulatedServer {
2021-02-14 04:43:54 +00:00
keybase : Vec < ( DetectionKey < 24 > , TaggingKey < 24 > , String ) > ,
detection_key_cache : Vec < DetectionKey < 24 > > ,
2021-02-10 07:23:50 +00:00
messages : Vec < ( Tag < 24 > , TaggingKey < 24 > ) > ,
2021-02-13 07:36:04 +00:00
sender_tags : HashMap < String , String > ,
sender_count : HashMap < String , f64 > ,
tags_to_keys_cache : HashMap < String , HashSet < String > > ,
keys_to_tags_cache : HashMap < String , HashSet < String > > ,
2021-02-03 13:05:59 +00:00
oracle : Oracle ,
2021-02-01 07:50:59 +00:00
}
2021-02-14 04:43:54 +00:00
struct Event ( String , String , String , f64 ) ;
2021-02-13 07:36:04 +00:00
2021-02-01 23:23:01 +00:00
pub struct RoundStatistics {
pub num_registered_parties : usize ,
pub num_messages : usize ,
}
#[ derive(Debug) ]
2021-02-01 07:50:59 +00:00
pub struct PartyStatistics {
pub ideal_rate : f64 ,
pub expected_messages : f64 ,
pub observed_messages : usize ,
pub observed_rate : f64 ,
pub observed_skew_messages : f64 ,
pub observed_skew : f64 ,
pub trivial_breaks : usize ,
}
impl SimulatedServer {
pub fn new ( ) -> SimulatedServer {
SimulatedServer {
keybase : vec ! [ ] ,
messages : vec ! [ ] ,
2021-02-14 04:43:54 +00:00
detection_key_cache : vec ! [ ] ,
2021-02-13 07:36:04 +00:00
sender_tags : HashMap ::new ( ) ,
sender_count : HashMap ::new ( ) ,
2021-02-01 07:50:59 +00:00
tags_to_keys_cache : HashMap ::new ( ) ,
keys_to_tags_cache : HashMap ::new ( ) ,
2021-02-03 13:05:59 +00:00
oracle : Oracle ::new ( ) ,
2021-02-01 07:50:59 +00:00
}
}
2021-02-10 07:23:50 +00:00
pub fn register_key ( & mut self , detection_key : & DetectionKey < 24 > , tagging_key : & TaggingKey < 24 > ) {
2021-02-14 04:43:54 +00:00
self . keybase . push ( ( detection_key . clone ( ) , tagging_key . clone ( ) , tagging_key . id ( ) ) ) ;
2021-02-13 07:36:04 +00:00
self . keys_to_tags_cache . insert ( tagging_key . id ( ) , HashSet ::new ( ) ) ;
2021-02-10 07:23:50 +00:00
self . oracle . register_party ( tagging_key . id ( ) ) ;
2021-02-01 07:50:59 +00:00
}
2021-02-14 04:43:54 +00:00
pub fn finalize ( & mut self ) {
self . detection_key_cache = self . keybase . iter ( ) . map ( | ( d , t , _ ) | d . clone ( ) ) . collect ( ) ;
}
2021-02-10 07:23:50 +00:00
pub fn add_message ( & mut self , tag : Tag < 24 > , sender_tagging_key : & TaggingKey < 24 > ) {
2021-02-14 04:43:54 +00:00
let tag_id = tag . to_string ( ) ;
let sender_id = sender_tagging_key . id ( ) ;
2021-02-10 07:23:50 +00:00
self . messages . push ( ( tag . clone ( ) , sender_tagging_key . clone ( ) ) ) ;
2021-02-14 04:43:54 +00:00
self . tags_to_keys_cache . insert ( tag_id . clone ( ) , HashSet ::new ( ) ) ;
self . sender_tags . insert ( tag_id . clone ( ) , sender_tagging_key . id ( ) ) ;
2021-02-01 07:50:59 +00:00
2021-02-13 07:36:04 +00:00
let count = match self . sender_count . get ( sender_tagging_key . id ( ) . as_str ( ) ) {
Some ( count ) = > * count + 1.0 ,
_ = > 1.0 ,
} ;
self . sender_count . insert ( sender_tagging_key . id ( ) , count ) ;
2021-02-14 04:43:54 +00:00
let results = DetectionKey ::test_tag_bulk ( & self . detection_key_cache , & tag ) ;
for index in results {
let detection_key = & self . keybase [ index ] . 0 ;
let receiver_tagging_id = & self . keybase [ index ] . 2 ;
event! ( Level ::TRACE , " Matched detection key for {key} to tag {tag} " , key = receiver_tagging_id . clone ( ) , tag = tag_id . clone ( ) ) ;
self . tags_to_keys_cache . get_mut ( tag_id . as_str ( ) ) . unwrap ( ) . insert ( receiver_tagging_id . clone ( ) ) ;
self . keys_to_tags_cache . get_mut ( receiver_tagging_id . as_str ( ) ) . unwrap ( ) . insert ( tag_id . clone ( ) ) ;
self . oracle . add_event ( sender_id . clone ( ) , receiver_tagging_id . clone ( ) , None , detection_key . false_positive_probability ( ) ) ;
2021-02-01 07:50:59 +00:00
}
}
2021-02-14 04:43:54 +00:00
pub fn statistics ( & mut self ) -> ( Oracle , RoundStatistics , HashMap < String , PartyStatistics > ) {
2021-02-01 23:23:01 +00:00
let mut party_stats = HashMap ::new ( ) ;
let round_stats = RoundStatistics {
num_messages : self . messages . len ( ) ,
num_registered_parties : self . keybase . len ( ) ,
} ;
2021-02-14 04:43:54 +00:00
for ( recipient_index , ( party , pub_key , id ) ) in self . keybase . iter ( ) . enumerate ( ) {
2021-02-01 23:23:01 +00:00
let matched = self . keys_to_tags_cache [ pub_key . id ( ) . as_str ( ) ] . clone ( ) ;
2021-02-01 07:50:59 +00:00
let observed_messages = matched . len ( ) ;
let ideal_rate = party . false_positive_probability ( ) ;
2021-02-01 23:23:01 +00:00
let expected_messages = ideal_rate * ( round_stats . num_messages as f64 ) ;
2021-02-01 07:50:59 +00:00
let observed_rate = ( observed_messages as f64 ) / ( self . messages . len ( ) as f64 ) ;
2021-02-01 23:23:01 +00:00
let observed_skew_messages = ( observed_messages as f64 ) - expected_messages ;
2021-02-01 20:20:30 +00:00
let observed_skew = ( observed_messages as f64 ) / expected_messages ;
2021-02-01 07:50:59 +00:00
let mut trivial_breaks = 0 ;
2021-02-13 07:36:04 +00:00
let mut num_times_matched_with = HashMap ::new ( ) ;
2021-02-01 07:50:59 +00:00
for tag in matched . iter ( ) {
2021-02-13 07:36:04 +00:00
let sender = self . sender_tags [ tag ] . clone ( ) ;
let num = match num_times_matched_with . get ( sender . as_str ( ) ) {
Some ( num ) = > * num + 1.0 ,
_ = > 1.0 ,
} ;
num_times_matched_with . insert ( sender , num ) ;
2021-02-01 07:50:59 +00:00
if self . tags_to_keys_cache [ tag . to_string ( ) . as_str ( ) ] . len ( ) = = 1 {
trivial_breaks + = 1 ;
}
}
2021-02-13 07:36:04 +00:00
for ( sender , count ) in num_times_matched_with . iter ( ) {
2021-02-14 04:43:54 +00:00
let expected_matched_count = ( ideal_rate * self . sender_count [ sender . as_str ( ) ] ) ;
let actual_matched_count = ( * count ) ;
let prob = at_least_with_replacement ( actual_matched_count as u64 , self . sender_count [ sender . as_str ( ) ] as u64 , ideal_rate ) . to_f64 ( ) ;
2021-02-13 07:36:04 +00:00
let diff = f64 ::abs ( actual_matched_count - expected_matched_count ) ;
// these numbers are arbitrary, but the point is probability only works if your sample is big enough...
2021-02-14 04:43:54 +00:00
if diff > 4.0 & & actual_matched_count > expected_matched_count & & prob < 0.0001 {
let sender_index = self . keybase . iter ( ) . find_position ( | ( d , t , _ ) | t . id ( ) = = * sender ) . unwrap ( ) . 0 ;
event! ( Level ::INFO , " Found Anomalous Relationship Between {sender}({sender_index}) and {recipient}({recipient_index}) {falsepositiverate}, {detected} >> {expected} | Probability of Event: {prob:.9} " , sender = sender , sender_index = sender_index , recipient = pub_key . id ( ) , recipient_index = recipient_index , falsepositiverate = ideal_rate , detected = actual_matched_count , expected = expected_matched_count , prob = prob ) ;
self . oracle . add_suspect ( & sender , & id , 1.0 - prob ) ;
}
2021-02-13 07:36:04 +00:00
}
2021-02-01 23:23:01 +00:00
let p_stats = PartyStatistics {
ideal_rate ,
expected_messages ,
observed_messages ,
observed_rate ,
observed_skew_messages ,
observed_skew ,
trivial_breaks ,
} ;
party_stats . insert ( pub_key . id ( ) , p_stats ) ;
2021-02-01 07:50:59 +00:00
}
2021-02-01 23:23:01 +00:00
2021-02-03 13:05:59 +00:00
( self . oracle . clone ( ) , round_stats , party_stats )
2021-02-01 07:50:59 +00:00
}
}