relay: Avoid connecting to down relays

If we failed to connect at the TCP level to a relay, note it down and refuse
to connect again for another 60 seconds.

Fixes #24767

Signed-off-by: David Goulet <dgoulet@torproject.org>
This commit is contained in:
David Goulet 2018-02-06 15:40:17 -05:00
parent 46c2b0ca22
commit f29d158330
2 changed files with 229 additions and 0 deletions

5
changes/bug24767 Normal file
View File

@ -0,0 +1,5 @@
o Major bugfixes (relay, connection):
- Refuse to connect again to a relay from which we failed previously with
a connection refused, timeout or error (at the TCP level). The relay
won't be retried for 60 seconds after the failure occured. Fixes bug
24767; bugfix on 0.0.6.

View File

@ -1122,6 +1122,216 @@ connection_or_group_set_badness_(smartlist_t *group, int force)
} SMARTLIST_FOREACH_END(or_conn);
}
/* Lifetime of a connection failure. After that, we'll retry. This is in
* seconds. */
#define OR_CONNECT_FAILURE_LIFETIME 60
/* The interval to use with when to clean up the failure cache. */
#define OR_CONNECT_FAILURE_CLEANUP_INTERVAL 60
/* When is the next time we have to cleanup the failure map. We keep this
* because we clean it opportunistically. */
static time_t or_connect_failure_map_next_cleanup_ts = 0;
/* OR connection failure entry data structure. It is kept in the connection
* failure map defined below and indexed by OR identity digest, address and
* port.
*
* We need to identify a connection failure with these three values because we
* want to avoid to wrongfully blacklist a relay if someone is trying to
* extend to a known identity digest but with the wrong IP/port. For instance,
* it can happen if a relay changed its port but the client still has an old
* descriptor with the old port. We want to stop connecting to that
* IP/port/identity all together, not only the relay identity. */
typedef struct or_connect_failure_entry_t {
HT_ENTRY(or_connect_failure_entry_t) node;
/* Identity digest of the connection where it is connecting to. */
uint8_t identity_digest[DIGEST_LEN];
/* This is the connection address from the base connection_t. After the
* connection is checked for canonicity, the base address should represent
* what we know instead of where we are connecting to. This is what we need
* so we can correlate known relays within the consensus. */
tor_addr_t addr;
uint16_t port;
/* Last time we were unable to connect. */
time_t last_failed_connect_ts;
} or_connect_failure_entry_t;
/* Map where we keep connection failure entries. They are indexed by addr,
* port and identity digest. */
static HT_HEAD(or_connect_failure_ht, or_connect_failure_entry_t)
or_connect_failures_map = HT_INITIALIZER();
/* Helper: Hashtable equal function. Return 1 if equal else 0. */
static int
or_connect_failure_ht_eq(const or_connect_failure_entry_t *a,
const or_connect_failure_entry_t *b)
{
return fast_memeq(a->identity_digest, b->identity_digest, DIGEST_LEN) &&
tor_addr_eq(&a->addr, &b->addr) &&
a->port == b->port;
}
/* Helper: Return the hash for the hashtable of the given entry. For this
* table, it is a combination of address, port and identity digest. */
static unsigned int
or_connect_failure_ht_hash(const or_connect_failure_entry_t *entry)
{
size_t offset = 0, addr_size;
const void *addr_ptr;
/* Largest size is IPv6 and IPv4 is smaller so it is fine. */
uint8_t data[16 + sizeof(uint16_t) + DIGEST_LEN];
/* Get the right address bytes depending on the family. */
switch (tor_addr_family(&entry->addr)) {
case AF_INET:
addr_size = 4;
addr_ptr = &entry->addr.addr.in_addr.s_addr;
break;
case AF_INET6:
addr_size = 16;
addr_ptr = &entry->addr.addr.in6_addr.s6_addr;
break;
default:
tor_assert_nonfatal_unreached();
return 0;
}
memcpy(data, addr_ptr, addr_size);
offset += addr_size;
memcpy(data + offset, entry->identity_digest, DIGEST_LEN);
offset += DIGEST_LEN;
set_uint16(data + offset, entry->port);
offset += sizeof(uint16_t);
return (unsigned int) siphash24g(data, offset);
}
HT_PROTOTYPE(or_connect_failure_ht, or_connect_failure_entry_t, node,
or_connect_failure_ht_hash, or_connect_failure_ht_eq)
HT_GENERATE2(or_connect_failure_ht, or_connect_failure_entry_t, node,
or_connect_failure_ht_hash, or_connect_failure_ht_eq,
0.6, tor_reallocarray_, tor_free_)
/* Initialize a given connect failure entry with the given identity_digest,
* addr and port. All field are optional except ocf. */
static void
or_connect_failure_init(const char *identity_digest, const tor_addr_t *addr,
uint16_t port, or_connect_failure_entry_t *ocf)
{
tor_assert(ocf);
if (identity_digest) {
memcpy(ocf->identity_digest, identity_digest,
sizeof(ocf->identity_digest));
}
if (addr) {
tor_addr_copy(&ocf->addr, addr);
}
ocf->port = port;
}
/* Return a newly allocated connection failure entry. It is initialized with
* the given or_conn data. This can't fail. */
static or_connect_failure_entry_t *
or_connect_failure_new(const or_connection_t *or_conn)
{
or_connect_failure_entry_t *ocf = tor_malloc_zero(sizeof(*ocf));
or_connect_failure_init(or_conn->identity_digest, &or_conn->real_addr,
TO_CONN(or_conn)->port, ocf);
return ocf;
}
/* Return a connection failure entry matching the given or_conn. NULL is
* returned if not found. */
static or_connect_failure_entry_t *
or_connect_failure_find(const or_connection_t *or_conn)
{
or_connect_failure_entry_t lookup;
tor_assert(or_conn);
or_connect_failure_init(or_conn->identity_digest, &TO_CONN(or_conn)->addr,
TO_CONN(or_conn)->port, &lookup);
return HT_FIND(or_connect_failure_ht, &or_connect_failures_map, &lookup);
}
/* Note down in the connection failure cache that a failure occurred on the
* given or_conn. */
static void
note_or_connect_failed(const or_connection_t *or_conn)
{
or_connect_failure_entry_t *ocf = NULL;
tor_assert(or_conn);
ocf = or_connect_failure_find(or_conn);
if (ocf == NULL) {
ocf = or_connect_failure_new(or_conn);
HT_INSERT(or_connect_failure_ht, &or_connect_failures_map, ocf);
}
ocf->last_failed_connect_ts = approx_time();
}
/* Cleanup the connection failure cache and remove all entries below the
* given cutoff. */
static void
or_connect_failure_map_cleanup(time_t cutoff)
{
or_connect_failure_entry_t **ptr, **next, *entry;
for (ptr = HT_START(or_connect_failure_ht, &or_connect_failures_map);
ptr != NULL; ptr = next) {
entry = *ptr;
if (entry->last_failed_connect_ts <= cutoff) {
next = HT_NEXT_RMV(or_connect_failure_ht, &or_connect_failures_map, ptr);
tor_free(entry);
} else {
next = HT_NEXT(or_connect_failure_ht, &or_connect_failures_map, ptr);
}
}
}
/* Return true iff the given OR connection can connect to its destination that
* is the triplet identity_digest, address and port.
*
* The or_conn MUST have gone through connection_or_check_canonicity() so the
* base address is properly set to what we know or doesn't know. */
static int
should_connect_to_relay(const or_connection_t *or_conn)
{
time_t now, cutoff;
time_t connect_failed_since_ts = 0;
or_connect_failure_entry_t *ocf;
tor_assert(or_conn);
now = approx_time();
cutoff = now - OR_CONNECT_FAILURE_LIFETIME;
/* Opportunistically try to cleanup the failure cache. We do that at regular
* interval so it doesn't grow too big. */
if (or_connect_failure_map_next_cleanup_ts <= now) {
or_connect_failure_map_cleanup(cutoff);
or_connect_failure_map_next_cleanup_ts =
now + OR_CONNECT_FAILURE_CLEANUP_INTERVAL;
}
/* Look if we have failed previously to the same destination as this
* OR connection. */
ocf = or_connect_failure_find(or_conn);
if (ocf) {
connect_failed_since_ts = ocf->last_failed_connect_ts;
}
/* If we do have an unable to connect timestamp and it is below cutoff, we
* can connect. Or we have never failed before so let it connect. */
if (connect_failed_since_ts > cutoff) {
goto no_connect;
}
/* Ok we can connect! */
return 1;
no_connect:
return 0;
}
/** <b>conn</b> is in the 'connecting' state, and it failed to complete
* a TCP connection. Send notifications appropriately.
*
@ -1135,6 +1345,7 @@ connection_or_connect_failed(or_connection_t *conn,
control_event_or_conn_status(conn, OR_CONN_EVENT_FAILED, reason);
if (!authdir_mode_tests_reachability(get_options()))
control_event_bootstrap_prob_or(msg, reason, conn);
note_or_connect_failed(conn);
}
/** <b>conn</b> got an error in connection_handle_read_impl() or
@ -1225,6 +1436,19 @@ connection_or_connect, (const tor_addr_t *_addr, uint16_t port,
conn->chan = chan;
chan->conn = conn;
connection_or_init_conn_from_address(conn, &addr, port, id_digest, ed_id, 1);
/* We have a proper OR connection setup, now check if we can connect to it
* that is we haven't had a failure earlier. This is to avoid to try to
* constantly connect to relays that we think are not reachable. */
if (!should_connect_to_relay(conn)) {
log_info(LD_GENERAL, "Can't connect to identity %s at %s:%u because we "
"failed earlier. Refusing.",
hex_str(id_digest, DIGEST_LEN), fmt_addr(&TO_CONN(conn)->addr),
TO_CONN(conn)->port);
connection_free_(TO_CONN(conn));
return NULL;
}
connection_or_change_state(conn, OR_CONN_STATE_CONNECTING);
control_event_or_conn_status(conn, OR_CONN_EVENT_LAUNCHED, 0);