From 5e395ba2c2bf041cacf85e6ad38cb39ea8ae7419 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Thu, 26 Apr 2018 10:18:39 -0400 Subject: [PATCH 1/3] Rewrite time-handling in circuitmux_ewma to use monotime_coarse This part of the code was the only part that used "cached getttimeofday" feature, which wasn't monotonic, which we updated at slight expense, and which I'd rather not maintain. --- changes/ticket25927.1 | 6 +++ src/or/circuitmux_ewma.c | 79 ++++++++++++++++++++++++++++---------- src/or/circuitmux_ewma.h | 7 ++++ src/or/main.c | 2 + src/test/test_channel.c | 2 + src/test/test_circuitmux.c | 40 +++++++++++++++++++ 6 files changed, 115 insertions(+), 21 deletions(-) create mode 100644 changes/ticket25927.1 diff --git a/changes/ticket25927.1 b/changes/ticket25927.1 new file mode 100644 index 000000000..84ac86e18 --- /dev/null +++ b/changes/ticket25927.1 @@ -0,0 +1,6 @@ + o Minor features (timekeeping, circuit scheduling): + - When keeping track of how busy each circuit have been recently on + a given connection, use coarse-grained monotonic timers rather than + gettimeofday(). This change should marginally increase accuracy + and performance. Implements part of ticket 25927. + diff --git a/src/or/circuitmux_ewma.c b/src/or/circuitmux_ewma.c index 4b80124a7..a8a645ca5 100644 --- a/src/or/circuitmux_ewma.c +++ b/src/or/circuitmux_ewma.c @@ -28,7 +28,7 @@ * **/ -#define TOR_CIRCUITMUX_EWMA_C_ +#define CIRCUITMUX_EWMA_PRIVATE #include "orconfig.h" @@ -169,8 +169,6 @@ TO_EWMA_POL_CIRC_DATA(circuitmux_policy_circ_data_t *pol) static void add_cell_ewma(ewma_policy_data_t *pol, cell_ewma_t *ewma); static int compare_cell_ewma_counts(const void *p1, const void *p2); -static unsigned cell_ewma_tick_from_timeval(const struct timeval *now, - double *remainder_out); static circuit_t * cell_ewma_to_circuit(cell_ewma_t *ewma); static inline double get_scale_factor(unsigned from_tick, unsigned to_tick); static cell_ewma_t * pop_first_cell_ewma(ewma_policy_data_t *pol); @@ -239,6 +237,13 @@ circuitmux_policy_t ewma_policy = { /*.cmp_cmux =*/ ewma_cmp_cmux }; +/** Have we initialized the ewma tick-counting logic? */ +static int ewma_ticks_initialized = 0; +/** At what monotime_coarse_t did the current tick begin? */ +static monotime_coarse_t start_of_current_tick; +/** What is the number of the current tick? */ +static unsigned current_tick_num; + /*** EWMA method implementations using the below EWMA helper functions ***/ /** Compute and return the current cell_ewma tick. */ @@ -421,8 +426,6 @@ ewma_notify_xmit_cells(circuitmux_t *cmux, ewma_policy_circ_data_t *cdata = NULL; unsigned int tick; double fractional_tick, ewma_increment; - /* The current (hi-res) time */ - struct timeval now_hires; cell_ewma_t *cell_ewma, *tmp; tor_assert(cmux); @@ -435,8 +438,7 @@ ewma_notify_xmit_cells(circuitmux_t *cmux, cdata = TO_EWMA_POL_CIRC_DATA(pol_circ_data); /* Rescale the EWMAs if needed */ - tor_gettimeofday_cached(&now_hires); - tick = cell_ewma_tick_from_timeval(&now_hires, &fractional_tick); + tick = cell_ewma_get_current_tick_and_fraction(&fractional_tick); if (tick != pol->active_circuit_pqueue_last_recalibrated) { scale_active_circuits(pol, tick); @@ -597,24 +599,48 @@ cell_ewma_to_circuit(cell_ewma_t *ewma) rescale. */ -/** Given a timeval now, compute the cell_ewma tick in which it occurs - * and the fraction of the tick that has elapsed between the start of the tick - * and now. Return the former and store the latter in - * *remainder_out. +/** + * Initialize the system that tells which ewma tick we are in. + */ +STATIC void +cell_ewma_initialize_ticks(void) +{ + if (ewma_ticks_initialized) + return; + monotime_coarse_get(&start_of_current_tick); + crypto_rand((char*)¤t_tick_num, sizeof(current_tick_num)); + ewma_ticks_initialized = 1; +} + +/** Compute the current cell_ewma tick and the fraction of the tick that has + * elapsed between the start of the tick and the current time. Return the + * former and store the latter in *remainder_out. * * These tick values are not meant to be shared between Tor instances, or used * for other purposes. */ - -static unsigned -cell_ewma_tick_from_timeval(const struct timeval *now, - double *remainder_out) +STATIC unsigned +cell_ewma_get_current_tick_and_fraction(double *remainder_out) { - unsigned res = (unsigned) (now->tv_sec / EWMA_TICK_LEN); - /* rem */ - double rem = (now->tv_sec % EWMA_TICK_LEN) + - ((double)(now->tv_usec)) / 1.0e6; - *remainder_out = rem / EWMA_TICK_LEN; - return res; + if (BUG(!ewma_ticks_initialized)) { + cell_ewma_initialize_ticks(); // LCOV_EXCL_LINE + } + monotime_coarse_t now; + monotime_coarse_get(&now); + // XXXX this does a division operation that can be slow on 32-bit + // XXXX systems. + int32_t msec_diff = + (int32_t)monotime_coarse_diff_msec(&start_of_current_tick, + &now); + if (msec_diff > (1000*EWMA_TICK_LEN)) { + unsigned ticks_difference = msec_diff / (1000*EWMA_TICK_LEN); + monotime_coarse_add_msec(&start_of_current_tick, + &start_of_current_tick, + ticks_difference * 1000 * EWMA_TICK_LEN); + current_tick_num += ticks_difference; + msec_diff %= 1000*EWMA_TICK_LEN; + } + *remainder_out = ((double)msec_diff) / (1.0e3 * EWMA_TICK_LEN); + return current_tick_num; } /* Default value for the CircuitPriorityHalflifeMsec consensus parameter in @@ -678,6 +704,8 @@ cmux_ewma_set_options(const or_options_t *options, double halflife; const char *source; + cell_ewma_initialize_ticks(); + /* Both options and consensus can be NULL. This assures us to either get a * valid configured value or the default one. */ halflife = get_circuit_priority_halflife(options, consensus, &source); @@ -788,3 +816,12 @@ pop_first_cell_ewma(ewma_policy_data_t *pol) offsetof(cell_ewma_t, heap_index)); } +/** + * Drop all resources held by circuitmux_ewma.c, and deinitialize the + * module. */ +void +circuitmux_ewma_free_all(void) +{ + ewma_ticks_initialized = 0; +} + diff --git a/src/or/circuitmux_ewma.h b/src/or/circuitmux_ewma.h index 2ef8c2586..f0c4c3609 100644 --- a/src/or/circuitmux_ewma.h +++ b/src/or/circuitmux_ewma.h @@ -19,5 +19,12 @@ extern circuitmux_policy_t ewma_policy; void cmux_ewma_set_options(const or_options_t *options, const networkstatus_t *consensus); +void circuitmux_ewma_free_all(void); + +#ifdef CIRCUITMUX_EWMA_PRIVATE +STATIC unsigned cell_ewma_get_current_tick_and_fraction(double *remainder_out); +STATIC void cell_ewma_initialize_ticks(void); +#endif + #endif /* !defined(TOR_CIRCUITMUX_EWMA_H) */ diff --git a/src/or/main.c b/src/or/main.c index ffe407329..ee6a2459a 100644 --- a/src/or/main.c +++ b/src/or/main.c @@ -59,6 +59,7 @@ #include "circuitbuild.h" #include "circuitlist.h" #include "circuituse.h" +#include "circuitmux_ewma.h" #include "command.h" #include "compress.h" #include "config.h" @@ -3488,6 +3489,7 @@ tor_free_all(int postfork) consdiffmgr_free_all(); hs_free_all(); dos_free_all(); + circuitmux_ewma_free_all(); if (!postfork) { config_free_all(); or_state_free_all(); diff --git a/src/test/test_channel.c b/src/test/test_channel.c index 812ec6c1a..7d5018ef5 100644 --- a/src/test/test_channel.c +++ b/src/test/test_channel.c @@ -544,6 +544,8 @@ test_channel_outbound_cell(void *arg) (void) arg; + cmux_ewma_set_options(NULL,NULL); + /* The channel will be freed so we need to hijack this so the scheduler * doesn't get confused. */ MOCK(scheduler_release_channel, scheduler_release_channel_mock); diff --git a/src/test/test_circuitmux.c b/src/test/test_circuitmux.c index 75b7a0ea4..14c759870 100644 --- a/src/test/test_circuitmux.c +++ b/src/test/test_circuitmux.c @@ -3,6 +3,7 @@ #define TOR_CHANNEL_INTERNAL_ #define CIRCUITMUX_PRIVATE +#define CIRCUITMUX_EWMA_PRIVATE #define RELAY_PRIVATE #include "or.h" #include "channel.h" @@ -79,8 +80,47 @@ test_cmux_destroy_cell_queue(void *arg) tor_free(dc); } +static void +test_cmux_compute_ticks(void *arg) +{ + const int64_t NS_PER_S = 1000 * 1000 * 1000; + const int64_t START_NS = U64_LITERAL(1217709000)*NS_PER_S; + int64_t now; + double rem; + unsigned tick; + (void)arg; + circuitmux_ewma_free_all(); + monotime_enable_test_mocking(); + + monotime_coarse_set_mock_time_nsec(START_NS); + cell_ewma_initialize_ticks(); + const unsigned tick_zero = cell_ewma_get_current_tick_and_fraction(&rem); + tt_double_op(rem, OP_GT, -1e-9); + tt_double_op(rem, OP_LT, 1e-9); + + /* 1.5 second later and we should still be in the same tick. */ + now = START_NS + NS_PER_S + NS_PER_S/2; + monotime_coarse_set_mock_time_nsec(now); + tick = cell_ewma_get_current_tick_and_fraction(&rem); + tt_uint_op(tick, OP_EQ, tick_zero); + tt_double_op(rem, OP_GT, .149999999); + tt_double_op(rem, OP_LT, .150000001); + + /* 25 second later and we should be in another tick. */ + now = START_NS + NS_PER_S * 25; + monotime_coarse_set_mock_time_nsec(now); + tick = cell_ewma_get_current_tick_and_fraction(&rem); + tt_uint_op(tick, OP_EQ, tick_zero + 2); + tt_double_op(rem, OP_GT, .499999999); + tt_double_op(rem, OP_LT, .500000001); + + done: + ; +} + struct testcase_t circuitmux_tests[] = { { "destroy_cell_queue", test_cmux_destroy_cell_queue, TT_FORK, NULL, NULL }, + { "compute_ticks", test_cmux_compute_ticks, TT_FORK, NULL, NULL }, END_OF_TESTCASES }; From 7cbc44eeb19831bc467f8e1b87062ed1c87934d5 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Thu, 26 Apr 2018 10:24:33 -0400 Subject: [PATCH 2/3] Remove the "cached gettimeofday" logic. Previously were using this value to have a cheap highish-resolution timer. But we were only using it in one place, and current dogma is to use monotime_coarse_t for this kind of thing. --- changes/ticket25927.2 | 5 ++++ src/common/compat_libevent.c | 47 +----------------------------------- src/common/compat_libevent.h | 3 --- src/or/connection.c | 2 -- 4 files changed, 6 insertions(+), 51 deletions(-) create mode 100644 changes/ticket25927.2 diff --git a/changes/ticket25927.2 b/changes/ticket25927.2 new file mode 100644 index 000000000..9acb4aaf6 --- /dev/null +++ b/changes/ticket25927.2 @@ -0,0 +1,5 @@ + o Code simplification and refactoring: + - Remove our previous logic for "cached gettimeofday()" -- our coarse + monotonic timers are fast enough for this purpose, and far less + error-prone. Implements part of ticket 25927. + diff --git a/src/common/compat_libevent.c b/src/common/compat_libevent.c index 9936c0aac..707efe389 100644 --- a/src/common/compat_libevent.c +++ b/src/common/compat_libevent.c @@ -494,51 +494,7 @@ tor_libevent_exit_loop_after_callback(struct event_base *base) event_base_loopbreak(base); } -#if defined(LIBEVENT_VERSION_NUMBER) && \ - LIBEVENT_VERSION_NUMBER >= V(2,1,1) && \ - !defined(TOR_UNIT_TESTS) -void -tor_gettimeofday_cached(struct timeval *tv) -{ - event_base_gettimeofday_cached(the_event_base, tv); -} -void -tor_gettimeofday_cache_clear(void) -{ - event_base_update_cache_time(the_event_base); -} -#else /* !(defined(LIBEVENT_VERSION_NUMBER) && ...) */ -/** Cache the current hi-res time; the cache gets reset when libevent - * calls us. */ -static struct timeval cached_time_hires = {0, 0}; - -/** Return a fairly recent view of the current time. */ -void -tor_gettimeofday_cached(struct timeval *tv) -{ - if (cached_time_hires.tv_sec == 0) { - tor_gettimeofday(&cached_time_hires); - } - *tv = cached_time_hires; -} - -/** Reset the cached view of the current time, so that the next time we try - * to learn it, we will get an up-to-date value. */ -void -tor_gettimeofday_cache_clear(void) -{ - cached_time_hires.tv_sec = 0; -} - -#ifdef TOR_UNIT_TESTS -/** For testing: force-update the cached time to a given value. */ -void -tor_gettimeofday_cache_set(const struct timeval *tv) -{ - tor_assert(tv); - memcpy(&cached_time_hires, tv, sizeof(*tv)); -} - +#if defined(TOR_UNIT_TESTS) /** For testing: called post-fork to make libevent reinitialize * kernel structures. */ void @@ -548,5 +504,4 @@ tor_libevent_postfork(void) tor_assert(r == 0); } #endif /* defined(TOR_UNIT_TESTS) */ -#endif /* defined(LIBEVENT_VERSION_NUMBER) && ... */ diff --git a/src/common/compat_libevent.h b/src/common/compat_libevent.h index 29c6ad375..e2747860a 100644 --- a/src/common/compat_libevent.h +++ b/src/common/compat_libevent.h @@ -68,10 +68,7 @@ void tor_libevent_free_all(void); int tor_init_libevent_rng(void); -void tor_gettimeofday_cached(struct timeval *tv); -void tor_gettimeofday_cache_clear(void); #ifdef TOR_UNIT_TESTS -void tor_gettimeofday_cache_set(const struct timeval *tv); void tor_libevent_postfork(void); #endif diff --git a/src/or/connection.c b/src/or/connection.c index de0f0485b..c2673ade1 100644 --- a/src/or/connection.c +++ b/src/or/connection.c @@ -3440,7 +3440,6 @@ connection_handle_read(connection_t *conn) { int res; - tor_gettimeofday_cache_clear(); res = connection_handle_read_impl(conn); return res; } @@ -3983,7 +3982,6 @@ int connection_handle_write(connection_t *conn, int force) { int res; - tor_gettimeofday_cache_clear(); conn->in_connection_handle_write = 1; res = connection_handle_write_impl(conn, force); conn->in_connection_handle_write = 0; From 9abf541f7f70068b6f7567481739ca6374f1fd57 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Thu, 26 Apr 2018 11:17:48 -0400 Subject: [PATCH 3/3] Add a function to compute millisecond time difference quickly. Our main function, though accurate on all platforms, can be very slow on 32-bit hosts. This one is faster on all 32-bit hosts, and accurate everywhere except apple, where it will typically be off by 1%. But since 32-bit apple is a relic anyway, I think we should be fine. --- src/common/compat_time.c | 50 ++++++++++++++++++++++++++++++++++++++++ src/common/compat_time.h | 27 ++++++++++++++++++++++ src/or/circuitmux_ewma.c | 7 ++---- src/test/test_util.c | 5 ++++ 4 files changed, 84 insertions(+), 5 deletions(-) diff --git a/src/common/compat_time.c b/src/common/compat_time.c index 966216768..b3723f533 100644 --- a/src/common/compat_time.c +++ b/src/common/compat_time.c @@ -279,6 +279,7 @@ monotime_reset_ratchets_for_testing(void) * nanoseconds. */ static struct mach_timebase_info mach_time_info; +static struct mach_timebase_info mach_time_info_msec_cvt; static int monotime_shift = 0; static void @@ -296,6 +297,14 @@ monotime_init_internal(void) // requires that tor_log2(0) == 0. monotime_shift = tor_log2(ms_per_tick); } + { + // For converting ticks to milliseconds in a 32-bit-friendly way, we + // will first right-shift by 20, and then multiply by 20/19, since + // (1<<20) * 19/20 is about 1e6. We precompute a new numerate and + // denominator here to avoid multiple multiplies. + mach_time_info_msec_cvt.numer = mach_time_info.numer * 20; + mach_time_info_msec_cvt.denom = mach_time_info.denom * 19; + } } /** @@ -345,6 +354,22 @@ monotime_diff_nsec(const monotime_t *start, return diff_nsec; } +int32_t +monotime_coarse_diff_msec32_(const monotime_coarse_t *start, + const monotime_coarse_t *end) +{ + if (BUG(mach_time_info.denom == 0)) { + monotime_init(); + } + const int64_t diff_ticks = end->abstime_ - start->abstime_; + + /* We already require in di_ops.c that right-shift performs a sign-extend. */ + const int32_t diff_microticks = (int32_t)(diff_ticks >> 20); + + return (diff_microticks * mach_time_info_msec_cvt.numer) / + mach_time_info_msec_cvt.denom; +} + uint32_t monotime_coarse_to_stamp(const monotime_coarse_t *t) { @@ -443,6 +468,15 @@ monotime_diff_nsec(const monotime_t *start, return diff_nsec; } +int32_t +monotime_coarse_diff_msec32_(const monotime_coarse_t *start, + const monotime_coarse_t *end) +{ + const int32_t diff_sec = (int32_t)(end->ts_.tv_sec - start->ts_.tv_sec); + const int32_t diff_nsec = (int32_t)(end->ts_.tv_nsec - start->ts_.tv_nsec); + return diff_sec * 1000 + diff_nsec / ONE_MILLION; +} + /* This value is ONE_BILLION >> 20. */ static const uint32_t STAMP_TICKS_PER_SECOND = 953; @@ -592,6 +626,13 @@ monotime_coarse_diff_msec(const monotime_coarse_t *start, return diff_ticks; } +int32_t +monotime_coarse_diff_msec32_(const monotime_coarse_t *start, + const monotime_coarse_t *end) +{ + return (int32_t)monotime_coarse_diff_msec(start, end) +} + int64_t monotime_coarse_diff_usec(const monotime_coarse_t *start, const monotime_coarse_t *end) @@ -677,6 +718,15 @@ monotime_diff_nsec(const monotime_t *start, return (diff.tv_sec * ONE_BILLION + diff.tv_usec * 1000); } +int32_t +monotime_coarse_diff_msec32_(const monotime_coarse_t *start, + const monotime_coarse_t *end) +{ + struct timeval diff; + timersub(&end->tv_, &start->tv_, &diff); + return diff.tv_sec * 1000 + diff.tv_usec / 1000; +} + /* This value is ONE_MILLION >> 10. */ static const uint32_t STAMP_TICKS_PER_SECOND = 976; diff --git a/src/common/compat_time.h b/src/common/compat_time.h index 09dd6add3..57ab20ab1 100644 --- a/src/common/compat_time.h +++ b/src/common/compat_time.h @@ -173,6 +173,33 @@ void monotime_coarse_add_msec(monotime_coarse_t *out, #define monotime_coarse_add_msec monotime_add_msec #endif /* defined(MONOTIME_COARSE_TYPE_IS_DIFFERENT) */ +/** + * As monotime_coarse_diff_msec, but avoid 64-bit division. + * + * Requires that the difference fit into an int32_t; not for use with + * large time differences. + */ +int32_t monotime_coarse_diff_msec32_(const monotime_coarse_t *start, + const monotime_coarse_t *end); + +/** + * As monotime_coarse_diff_msec, but avoid 64-bit division if it is expensive. + * + * Requires that the difference fit into an int32_t; not for use with + * large time differences. + */ +static inline int32_t +monotime_coarse_diff_msec32(const monotime_coarse_t *start, + const monotime_coarse_t *end) +{ +#if SIZEOF_VOID_P == 8 + // on a 64-bit platform, let's assume 64/64 division is cheap. + return (int32_t) monotime_coarse_diff_msec(start, end); +#else + return monotime_coarse_diff_msec32_(start, end); +#endif +} + MOCK_DECL(void, tor_gettimeofday, (struct timeval *timeval)); #ifdef TOR_UNIT_TESTS diff --git a/src/or/circuitmux_ewma.c b/src/or/circuitmux_ewma.c index a8a645ca5..a360327f8 100644 --- a/src/or/circuitmux_ewma.c +++ b/src/or/circuitmux_ewma.c @@ -626,11 +626,8 @@ cell_ewma_get_current_tick_and_fraction(double *remainder_out) } monotime_coarse_t now; monotime_coarse_get(&now); - // XXXX this does a division operation that can be slow on 32-bit - // XXXX systems. - int32_t msec_diff = - (int32_t)monotime_coarse_diff_msec(&start_of_current_tick, - &now); + int32_t msec_diff = monotime_coarse_diff_msec32(&start_of_current_tick, + &now); if (msec_diff > (1000*EWMA_TICK_LEN)) { unsigned ticks_difference = msec_diff / (1000*EWMA_TICK_LEN); monotime_coarse_add_msec(&start_of_current_tick, diff --git a/src/test/test_util.c b/src/test/test_util.c index 350273bf4..9c028fd47 100644 --- a/src/test/test_util.c +++ b/src/test/test_util.c @@ -6035,6 +6035,9 @@ test_util_monotonic_time_add_msec(void *arg) monotime_coarse_add_msec(&ct2, &ct1, 1337); tt_i64_op(monotime_diff_msec(&t1, &t2), OP_EQ, 1337); tt_i64_op(monotime_coarse_diff_msec(&ct1, &ct2), OP_EQ, 1337); + // The 32-bit variant must be within 1% of the regular one. + tt_int_op(monotime_coarse_diff_msec32_(&ct1, &ct2), OP_GT, 1323); + tt_int_op(monotime_coarse_diff_msec32_(&ct1, &ct2), OP_LT, 1350); /* Add 1337 msec twice more; make sure that any second rollover issues * worked. */ @@ -6044,6 +6047,8 @@ test_util_monotonic_time_add_msec(void *arg) monotime_coarse_add_msec(&ct2, &ct2, 1337); tt_i64_op(monotime_diff_msec(&t1, &t2), OP_EQ, 1337*3); tt_i64_op(monotime_coarse_diff_msec(&ct1, &ct2), OP_EQ, 1337*3); + tt_int_op(monotime_coarse_diff_msec32_(&ct1, &ct2), OP_GT, 3970); + tt_int_op(monotime_coarse_diff_msec32_(&ct1, &ct2), OP_LT, 4051); done: ;