diff --git a/changes/bug23816 b/changes/bug23816 new file mode 100644 index 000000000..6139dec9e --- /dev/null +++ b/changes/bug23816 @@ -0,0 +1,6 @@ + o Minor bugfixes (directory client): + - On failure to download directory information, delay retry attempts + by a random amount based on the "decorrelated jitter" algorithm. + Our previous delay algorithm tended to produce extra-long delays too + easily. Fixes bug 23816; bugfix on 0.2.9.1-alpha. + diff --git a/src/or/directory.c b/src/or/directory.c index 7a1364bd7..b2e0a96a2 100644 --- a/src/or/directory.c +++ b/src/or/directory.c @@ -5421,54 +5421,68 @@ find_dl_min_and_max_delay(download_status_t *dls, const or_options_t *options, *max = INT_MAX; } -/** Advance one delay step. The algorithm is to use the previous delay to - * compute an increment, we construct a value uniformly at random between - * delay+1 and (delay*(DIR_DEFAULT_RANDOM_MULTIPLIER+1))+1 (or - * DIR_TEST_NET_RANDOM_MULTIPLIER in test networks). +/** As next_random_exponential_delay() below, but does not compute a random + * value. Instead, compute the range of values that + * next_random_exponential_delay() should use when computing its random value. + * Store the low bound into *low_bound_out, and the high bound into + * *high_bound_out. Guarantees that the low bound is strictly less + * than the high bound. */ +STATIC void +next_random_exponential_delay_range(int *low_bound_out, + int *high_bound_out, + int delay, + int base_delay) +{ + // This is the "decorrelated jitter" approach, from + // https://www.awsarchitectureblog.com/2015/03/backoff.html + // The formula is + // sleep = min(cap, random_between(base, sleep * 3)) + + const int delay_times_3 = delay < INT_MAX/3 ? delay * 3 : INT_MAX; + *low_bound_out = base_delay; + if (delay_times_3 > base_delay) { + *high_bound_out = delay_times_3; + } else { + *high_bound_out = base_delay+1; + } +} + +/** Advance one delay step. The algorithm will generate a random delay, + * such that each failure is possibly (random) longer than the ones before. + * * We then clamp that value to be no larger than max_delay, and return it. * + * The base_delay parameter is lowest possible delay time (can't be + * zero); the backoff_position parameter is the number of times we've + * generated a delay; and the delay argument is the most recently used + * delay. + * * Requires that delay is less than INT_MAX, and delay is in [0,max_delay]. */ STATIC int -next_random_exponential_delay(int delay, int max_delay) +next_random_exponential_delay(int delay, + int base_delay, + int max_delay) { /* Check preconditions */ if (BUG(max_delay < 0)) max_delay = 0; if (BUG(delay > max_delay)) delay = max_delay; - if (delay == INT_MAX) - return INT_MAX; /* prevent overflow */ if (BUG(delay < 0)) delay = 0; - /* How much are we willing to add to the delay? */ - int max_increment; - int multiplier = DIR_DEFAULT_RANDOM_MULTIPLIER; - if (get_options()->TestingTorNetwork) { - /* Decrease the multiplier in testing networks. This reduces the variance, - * so that bootstrap is more reliable. */ - multiplier = DIR_TEST_NET_RANDOM_MULTIPLIER; - } + if (base_delay < 1) + base_delay = 1; - if (delay && delay < (INT_MAX-1) / multiplier) { - max_increment = delay * multiplier; - } else if (delay) { - max_increment = INT_MAX-1; - } else { - max_increment = 1; - } + int low_bound=0, high_bound=max_delay; - if (BUG(max_increment < 1)) - max_increment = 1; + next_random_exponential_delay_range(&low_bound, &high_bound, + delay, base_delay); - /* the + 1 here is so that we always wait longer than last time. */ - int increment = crypto_rand_int(max_increment)+1; + int rand_delay = crypto_rand_int_range(low_bound, high_bound); - if (increment < max_delay - delay) - return delay + increment; - else - return max_delay; + return MIN(rand_delay, max_delay); } /** Find the current delay for dls based on schedule or min_delay/ @@ -5517,7 +5531,7 @@ download_status_schedule_get_delay(download_status_t *dls, while (dls->last_backoff_position < dls_schedule_position) { /* Do one increment step */ - delay = next_random_exponential_delay(delay, max_delay); + delay = next_random_exponential_delay(delay, min_delay, max_delay); /* Update our position */ ++(dls->last_backoff_position); } diff --git a/src/or/directory.h b/src/or/directory.h index 79984be32..d26d83537 100644 --- a/src/or/directory.h +++ b/src/or/directory.h @@ -226,7 +226,15 @@ STATIC const smartlist_t *find_dl_schedule(const download_status_t *dls, STATIC void find_dl_min_and_max_delay(download_status_t *dls, const or_options_t *options, int *min, int *max); -STATIC int next_random_exponential_delay(int delay, int max_delay); + +STATIC int next_random_exponential_delay(int delay, + int base_delay, + int max_delay); + +STATIC void next_random_exponential_delay_range(int *low_bound_out, + int *high_bound_out, + int delay, + int base_delay); STATIC int parse_hs_version_from_post(const char *url, const char *prefix, const char **end_pos); diff --git a/src/test/test_dir.c b/src/test/test_dir.c index 87b86c38b..ee4a9780b 100644 --- a/src/test/test_dir.c +++ b/src/test/test_dir.c @@ -4126,9 +4126,9 @@ download_status_random_backoff_helper(int min_delay, int max_delay) int increment = -1; int old_increment = -1; time_t current_time = time(NULL); - const int exponent = DIR_DEFAULT_RANDOM_MULTIPLIER + 1; /* Check the random backoff cases */ + int n_attempts = 0; do { increment = download_status_schedule_get_delay(&dls_random, NULL, @@ -4148,40 +4148,16 @@ download_status_random_backoff_helper(int min_delay, int max_delay) /* Test */ tt_int_op(increment, OP_GE, min_delay); tt_int_op(increment, OP_LE, max_delay); - if (dls_random.last_backoff_position == 0) { - /* regression tests for 17750 - * Always use the minimum delay for the first increment */ - tt_int_op(increment, OP_EQ, min_delay); - } else { - /* It's times like these I'd love a good saturating arithmetic - * implementation */ - int min_inc = INT_MAX; - if (old_increment <= INT_MAX - 1) { - min_inc = old_increment + 1; - } - - int max_inc = INT_MAX; - if (old_increment <= (INT_MAX - 1)/exponent) { - max_inc = (exponent * old_increment) + 1; - } - - /* Regression test for 20534 and friends: - * increment must always increase after the first */ - tt_int_op(increment, OP_GE, min_inc); - /* We at most quadruple, and always add one */ - tt_int_op(increment, OP_LE, max_inc); - } /* Advance */ - ++(dls_random.n_download_attempts); - ++(dls_random.n_download_failures); + if (dls_random.n_download_attempts < IMPOSSIBLE_TO_DOWNLOAD - 1) { + ++(dls_random.n_download_attempts); + ++(dls_random.n_download_failures); + } /* Try another maybe */ old_increment = increment; - if (increment >= max_delay) - current_time += increment; - - } while (increment < max_delay); + } while (increment < max_delay && ++n_attempts < 1000); done: return; @@ -4208,6 +4184,38 @@ test_dir_download_status_random_backoff(void *arg) download_status_random_backoff_helper(INT_MAX/2, INT_MAX); } +static void +test_dir_download_status_random_backoff_ranges(void *arg) +{ + (void)arg; + int lo, hi; + next_random_exponential_delay_range(&lo, &hi, 0, 10); + tt_int_op(lo, OP_EQ, 10); + tt_int_op(hi, OP_EQ, 11); + + next_random_exponential_delay_range(&lo, &hi, 6, 10); + tt_int_op(lo, OP_EQ, 10); + tt_int_op(hi, OP_EQ, 6*3); + + next_random_exponential_delay_range(&lo, &hi, 13, 10); + tt_int_op(lo, OP_EQ, 10); + tt_int_op(hi, OP_EQ, 13 * 3); + + next_random_exponential_delay_range(&lo, &hi, 37, 10); + tt_int_op(lo, OP_EQ, 10); + tt_int_op(hi, OP_EQ, 111); + + next_random_exponential_delay_range(&lo, &hi, 123, 10); + tt_int_op(lo, OP_EQ, 10); + tt_int_op(hi, OP_EQ, 369); + + next_random_exponential_delay_range(&lo, &hi, INT_MAX-5, 10); + tt_int_op(lo, OP_EQ, 10); + tt_int_op(hi, OP_EQ, INT_MAX); + done: + ; +} + static void test_dir_download_status_increment(void *arg) { @@ -6208,6 +6216,7 @@ struct testcase_t dir_tests[] = { DIR(packages, 0), DIR(download_status_schedule, 0), DIR(download_status_random_backoff, 0), + DIR(download_status_random_backoff_ranges, 0), DIR(download_status_increment, 0), DIR(authdir_type_to_string, 0), DIR(conn_purpose_to_string, 0),