@ -10,12 +10,13 @@ import (
"time"
)
const tickTimeSec = 30 0 //120
const tickTimeSec = 30
const tickTime = tickTimeSec * time . Second
const baseMaxBackoffPeer int = 6 // 5min * 6 = 30min
// Servers don't reach out, we can assume other peers coming online will ideally contact us, servers will not
const baseMaxBackoffServer int = 3 // 5min * 3 = 15min
const circutTimeoutMins float64 = 2
const circutTimeoutSecs int = 120
const MaxBaseTimeoutSec = 5 * 60 // a max base time out of 5 min
const maxFailedBackoff = 6 // 2^6 = 64 -> 64 * [2m to 5m] = 2h8m to 5h20m
type connectionType int
@ -29,29 +30,126 @@ type contact struct {
state connections . ConnectionState
ctype connectionType
ticks int
backoff int
lastAttempt time . Time
failedCount int
lastSeen time . Time
queued bool
}
// compare a to b
// returns -1 if a < b
//
// 0 if a == b
// +1 if a > b
//
// algo: sort by failedCount first favouring less attempts, then sort by lastSeen time favouring more recent connections
func ( a * contact ) compare ( b * contact ) int {
if a . failedCount < b . failedCount {
return - 1
} else if a . failedCount > b . failedCount {
return + 1
}
if a . lastSeen . After ( b . lastSeen ) {
return - 1
} else if a . lastSeen . Before ( b . lastSeen ) {
return + 1
}
return 0
}
type connectionQueue struct {
queue [ ] * contact
lock sync . Mutex
}
func newConnectionQueue ( ) * connectionQueue {
return & connectionQueue { queue : [ ] * contact { } }
}
func ( cq * connectionQueue ) insert ( c * contact ) {
cq . lock . Lock ( )
defer cq . lock . Unlock ( )
// find loc
i := 0
var b * contact
for i , b = range cq . queue {
if c . compare ( b ) >= 0 {
break
}
}
// insert
if len ( cq . queue ) == i { // nil or empty slice or after last element
cq . queue = append ( cq . queue , c )
} else {
cq . queue = append ( cq . queue [ : i + 1 ] , cq . queue [ i : ] ... ) // index < len(a)
cq . queue [ i ] = c
}
c . queued = true
}
func ( cq * connectionQueue ) dequeue ( ) * contact {
cq . lock . Lock ( )
defer cq . lock . Unlock ( )
if len ( cq . queue ) == 0 {
return nil
}
c := cq . queue [ 0 ]
cq . queue = cq . queue [ 1 : ]
c . queued = false
return c
}
type contactRetry struct {
bus event . Manager
queue event . Queue
networkUp bool
running bool
breakChan chan bool
onion string
lastCheck time . Time
bus event . Manager
queue event . Queue
networkUp bool
networkUpTime time . Time
running bool
breakChan chan bool
onion string
lastCheck time . Time
connectingCount int64
connections sync . Map //[string]*contact
connections sync . Map //[string]*contact
connCount int64
pendingQueue * connectionQueue
}
// NewConnectionRetry returns a Plugin that when started will retry connecting to contacts with a backoff timing
// NewConnectionRetry returns a Plugin that when started will retry connecting to contacts with a failedCount timing
func NewConnectionRetry ( bus event . Manager , onion string ) Plugin {
cr := & contactRetry { bus : bus , queue : event . NewQueue ( ) , breakChan : make ( chan bool , 1 ) , connections : sync . Map { } , networkUp : false , onion : onion , connectingCount : 0 }
cr := & contactRetry { bus : bus , queue : event . NewQueue ( ) , breakChan : make ( chan bool , 1 ) , connections : sync . Map { } , connCount: 0 , networkUp: false , networkUpTime: time . Now ( ) , onion : onion , pendingQueue : newConnectionQueue ( ) }
return cr
}
// maxTorCircuitsPending a function to throttle access to tor network during start up
func ( cr * contactRetry ) maxTorCircuitsPending ( ) int {
timeSinceStart := time . Now ( ) . Sub ( cr . networkUpTime )
if timeSinceStart < 30 * time . Second {
return 4
} else if timeSinceStart < 4 * time . Minute {
return 8
} else if timeSinceStart < 8 * time . Minute {
return 16
}
return connections . TorMaxPendingConns
}
func ( cr * contactRetry ) connectingCount ( ) int {
connecting := 0
cr . connections . Range ( func ( k , v interface { } ) bool {
conn := v . ( * contact )
if conn . state == connections . CONNECTING {
connecting ++
}
return true
} )
return connecting
}
func ( cr * contactRetry ) Start ( ) {
if ! cr . running {
go cr . run ( )
@ -60,17 +158,34 @@ func (cr *contactRetry) Start() {
}
}
func ( cr * contactRetry ) Id ( ) PluginID {
return CONNECTIONRETRY
}
func ( cr * contactRetry ) run ( ) {
cr . running = true
cr . bus . Subscribe ( event . PeerStateChange , cr . queue )
cr . bus . Subscribe ( event . ACNStatus , cr . queue )
cr . bus . Subscribe ( event . ServerStateChange , cr . queue )
cr . bus . Subscribe ( event . PeerRequest , cr . queue )
cr . bus . Subscribe ( event . QueuePeerRequest , cr . queue )
cr . bus . Subscribe ( event . QueueJoinServer , cr . queue )
for {
if time . Since ( cr . lastCheck ) > tickTime {
cr . retryDisconnected ( )
cr . lastCheck = time . Now ( )
cr . requeueReady ( )
connectingCount := cr . connectingCount ( )
connCount := atomic . LoadInt64 ( & cr . connCount )
log . Debugf ( "checking queue (len: %v) of total conns watched: %v, with current connecingCount: %v" , len ( cr . pendingQueue . queue ) , connCount , connectingCount )
for connectingCount < cr . maxTorCircuitsPending ( ) && len ( cr . pendingQueue . queue ) > 0 {
contact := cr . pendingQueue . dequeue ( )
// could have received incoming connection while in queue, make sure still disconnected before trying
if contact . state == connections . DISCONNECTED {
cr . publishConnectionRequest ( contact )
connectingCount ++
}
}
cr . lastCheck = time . Now ( )
select {
case e := <- cr . queue . OutChan ( ) :
switch e . EventType {
@ -84,20 +199,38 @@ func (cr *contactRetry) run() {
server := e . Data [ event . GroupServer ]
cr . handleEvent ( server , state , serverConn )
case event . QueueJoinServer :
fallthrough
case event . QueuePeerRequest :
lastSeen , err := time . Parse ( e . Data [ event . LastSeen ] , time . RFC3339Nano )
if err != nil {
lastSeen = event . CwtchEpoch
}
id := ""
if peer , exists := e . Data [ event . RemotePeer ] ; exists {
id = peer
cr . addConnection ( peer , connections . DISCONNECTED , peerConn , lastSeen )
} else if server , exists := e . Data [ event . GroupServer ] ; exists {
id = server
cr . addConnection ( server , connections . DISCONNECTED , serverConn , lastSeen )
}
if c , ok := cr . connections . Load ( id ) ; ok {
contact := c . ( * contact )
if contact . state == connections . DISCONNECTED && ! contact . queued {
cr . pendingQueue . insert ( contact )
}
}
case event . ACNStatus :
prog := e . Data [ event . Progress ]
if prog == "100" && ! cr . networkUp {
cr . networkUp = true
cr . networkUpTime = time . Now ( )
cr . connections . Range ( func ( k , v interface { } ) bool {
p := v . ( * contact )
p . ticks = 0
p . backoff = 1
if p . ctype == peerConn {
cr . bus . Publish ( event . NewEvent ( event . RetryPeerRequest , map [ event . Field ] string { event . RemotePeer : p . id } ) )
}
if p . ctype == serverConn {
cr . bus . Publish ( event . NewEvent ( event . RetryServerRequest , map [ event . Field ] string { event . GroupServer : p . id } ) )
}
p . failedCount = 0
return true
} )
} else if prog != "100" {
@ -115,89 +248,103 @@ func (cr *contactRetry) run() {
}
}
func calcPendingMultiplier ( connectingCount int ) int {
throughPutPerMin := ( int ) ( math . Floor ( connections . TorMaxPendingConns / circutTimeoutMins ) )
minsToClear := connectingCount / throughPutPerMin
baseMaxServerTime := baseMaxBackoffServer * ( tickTimeSec / 60 ) // the lower of the two queues * the tick time in min
multiplier := minsToClear / baseMaxServerTime
if multiplier < 1 {
return 1
}
return multiplier
func ticksToSec ( ticks int ) int {
return ticks * tickTimeSec
}
func ( cr * contactRetry ) retryDisconnected ( ) {
var retryCount int64 = 0
func ( cr * contactRetry ) requeueReady ( ) {
if ! cr . networkUp {
return
}
retryable := [ ] * contact { }
count := atomic . LoadInt64 ( & cr . connCount )
throughPutPerMin := cr . maxTorCircuitsPending ( ) / ( circutTimeoutSecs / 60 )
adjustedBaseTimeout := int ( count ) / throughPutPerMin * 60
if adjustedBaseTimeout < circutTimeoutSecs {
adjustedBaseTimeout = circutTimeoutSecs
} else if adjustedBaseTimeout > MaxBaseTimeoutSec {
adjustedBaseTimeout = MaxBaseTimeoutSec
}
cr . connections . Range ( func ( k , v interface { } ) bool {
p := v . ( * contact )
pendingMultiplier := calcPendingMultiplier ( ( int ) ( atomic . LoadInt64 ( & cr . connectingCount ) + retryCount ) )
if p . state == connections . DISCONNECTED {
p . ticks ++
log . Infof ( "Retrying on disconnected connection, with pendingmult: %v" , pendingMultiplier )
if p . ticks >= ( p . backoff * pendingMultiplier ) {
retryCount ++
p . ticks = 0
if cr . networkUp {
if p . ctype == peerConn {
go func ( id string ) {
cr . bus . Publish ( event . NewEvent ( event . RetryPeerRequest , map [ event . Field ] string { event . RemotePeer : id } ) )
} ( p . id )
}
if p . ctype == serverConn {
go func ( id string ) {
cr . bus . Publish ( event . NewEvent ( event . RetryServerRequest , map [ event . Field ] string { event . GroupServer : p . id } ) )
} ( p . id )
}
}
if p . state == connections . DISCONNECTED && ! p . queued {
timeout := time . Duration ( ( math . Pow ( 2 , float64 ( p . failedCount ) ) ) * float64 ( adjustedBaseTimeout /*baseTimeoutSec*/ ) ) * time . Second
if time . Now ( ) . Sub ( p . lastAttempt ) > timeout {
retryable = append ( retryable , p )
}
}
return true
} )
for _ , contact := range retryable {
cr . pendingQueue . insert ( contact )
}
}
func ( cr * contactRetry ) handleEvent ( id string , state connections . ConnectionState , ctype connectionType ) {
func ( cr * contactRetry ) publishConnectionRequest ( contact * contact ) {
if contact . ctype == peerConn {
cr . bus . Publish ( event . NewEvent ( event . RetryPeerRequest , map [ event . Field ] string { event . RemotePeer : contact . id } ) )
}
if contact . ctype == serverConn {
cr . bus . Publish ( event . NewEvent ( event . RetryServerRequest , map [ event . Field ] string { event . GroupServer : contact . id } ) )
}
contact . state = connections . CONNECTING // Hacky but needed so we don't over flood waiting for PeerStateChange from engine
contact . lastAttempt = time . Now ( )
}
func ( cr * contactRetry ) addConnection ( id string , state connections . ConnectionState , ctype connectionType , lastSeen time . Time ) {
// don't handle contact retries for ourselves
if id == cr . onion {
return
}
if _ , exists := cr . connections . Load ( id ) ; ! exists {
p := & contact { id : id , state : state , backoff : 0 , ticks : 0 , ctype : ctype }
p := & contact { id : id , state : state , failedCount: 0 , lastAttempt : event . CwtchEpoch , ctype : ctype , lastSeen : lastSeen , queued : fals e}
cr . connections . Store ( id , p )
cr . manageStateChange ( state , connections . DISCONNECTED )
atomic . AddInt64 ( & cr . connCount , 1 )
return
}
}
func ( cr * contactRetry ) handleEvent ( id string , state connections . ConnectionState , ctype connectionType ) {
log . Debugf ( "cr.handleEvent state to %v on id %v" , connections . ConnectionStateName [ state ] , id )
// don't handle contact retries for ourselves
if id == cr . onion {
return
}
if _ , exists := cr . connections . Load ( id ) ; ! exists {
cr . addConnection ( id , state , ctype , event . CwtchEpoch )
return
}
pinf , _ := cr . connections . Load ( id )
p := pinf . ( * contact )
cr . manageStateChange ( state , p . state )
log. Infof ( " managing state change for %v %v to %v by self %v" , id , connections . ConnectionStateName [ p . state ] , connections . ConnectionStateName [ state ] , cr . onion )
if state == connections . DISCONNECTED || state == connections . FAILED || state == connections . KILLED {
if p . state == connections . SYNCED || p . state == connections . AUTHENTICATED {
p . lastSeen = time . Now ( )
} else {
p . failedCount += 1
}
p . state = connections . DISCONNECTED
if p . backoff == 0 {
p . backoff = 1
} else if p . backoff < baseMaxBackoffPeer {
p . backoff *= 2
p . lastAttempt = time . Now ( )
if p . failedCount > maxFailedBackoff {
p . failedCount = maxFailedBackoff
}
p . ticks = 0
} else if state == connections . CONNECTING || state == connections . CONNECTED {
p . state = state
} else if state == connections . AUTHENTICATED {
} else if state == connections . AUTHENTICATED || state == connections . SYNCED {
p . state = state
p . backoff = 0
}
}
func ( cr * contactRetry ) manageStateChange ( state , prevState connections . ConnectionState ) {
if state == connections . CONNECTING {
atomic . AddInt64 ( & cr . connectingCount , 1 )
log . Infof ( "New connecting, connectingCount: %v" , atomic . LoadInt64 ( & cr . connectingCount ) )
} else if prevState == connections . CONNECTING {
atomic . AddInt64 ( & cr . connectingCount , - 1 )
log . Infof ( "Failed or Connected, connectingCount: %v" , atomic . LoadInt64 ( & cr . connectingCount ) )
p . lastSeen = time . Now ( )
p . failedCount = 0
}
}
func ( cr * contactRetry ) Shutdown ( ) {
cr . breakChan <- true
cr . queue . Shutdown ( )
}