From c5a3e2ca44cfd3302a65ae247120fd4efb81a379 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Mon, 2 Apr 2018 15:02:53 -0400 Subject: [PATCH 1/6] Generic mechaism for "post-loop" callbacks We've been labeling some events as happening "outside the event loop", to avoid Libevent starvation. This patch provides a cleaner mechanism to avoid that starvation. For background, the problem here is that Libevent only scans for new events once it has run all its active callbacks. So if the callbacks keep activating new callbacks, they could potentially starve Libevent indefinitely and keep it from ever checking for timed, socket, or signal events. To solve this, we add the ability to label some events as "post-loop". The rule for a "post-loop" event is that any events _it_ activates can only be run after libevent has re-scanned for new events at least once. --- src/common/compat_libevent.c | 115 ++++++++++++++++++++++++++++++++--- src/common/compat_libevent.h | 3 + 2 files changed, 110 insertions(+), 8 deletions(-) diff --git a/src/common/compat_libevent.c b/src/common/compat_libevent.c index 0d0e4337b..9936c0aac 100644 --- a/src/common/compat_libevent.c +++ b/src/common/compat_libevent.c @@ -79,6 +79,43 @@ tor_event_free_(struct event *ev) /** Global event base for use by the main thread. */ static struct event_base *the_event_base = NULL; +/** + * @defgroup postloop post-loop event helpers + * + * If we're not careful, Libevent can susceptible to infinite event chains: + * one event can activate another, whose callback activates another, whose + * callback activates another, ad infinitum. While this is happening, + * Libevent won't be checking timeouts, socket-based events, signals, and so + * on. + * + * We solve this problem by marking some events as "post-loop". A post-loop + * event behaves like any ordinary event, but any events that _it_ activates + * cannot run until Libevent has checked for other events at least once. + * + * @{ */ + +/** + * An event that stops Libevent from running any more events on the current + * iteration of its loop, until it has re-checked for socket events, signal + * events, timeouts, etc. + */ +static struct event *rescan_mainloop_ev = NULL; + +/** + * Callback to implement rescan_mainloop_ev: it simply exits the mainloop, + * and relies on Tor to re-enter the mainloop since no error has occurred. + */ +static void +rescan_mainloop_cb(evutil_socket_t fd, short events, void *arg) +{ + (void)fd; + (void)events; + struct event_base *the_base = arg; + event_base_loopbreak(the_base); +} + +/** @} */ + /* This is what passes for version detection on OSX. We set * MACOSX_KQUEUE_IS_BROKEN to true iff we're on a version of OSX before * 10.4.0 (aka 1040). */ @@ -130,6 +167,15 @@ tor_libevent_initialize(tor_libevent_cfg *torcfg) /* LCOV_EXCL_STOP */ } + rescan_mainloop_ev = event_new(the_event_base, -1, 0, + rescan_mainloop_cb, the_event_base); + if (!rescan_mainloop_ev) { + /* LCOV_EXCL_START */ + log_err(LD_GENERAL, "Unable to create rescan event: cannot continue."); + exit(1); // exit ok: libevent is broken. + /* LCOV_EXCL_STOP */ + } + log_info(LD_GENERAL, "Initialized libevent version %s using method %s. Good.", event_get_version(), tor_libevent_get_method()); @@ -250,6 +296,52 @@ mainloop_event_cb(evutil_socket_t fd, short what, void *arg) mev->cb(mev, mev->userdata); } +/** + * As mainloop_event_cb, but implements a post-loop event. + */ +static void +mainloop_event_postloop_cb(evutil_socket_t fd, short what, void *arg) +{ + (void)fd; + (void)what; + + /* Note that if rescan_mainloop_ev is already activated, + * event_active() will do nothing: only the first post-loop event that + * happens each time through the event loop will cause it to be + * activated. + * + * Because event_active() puts events on a FIFO queue, every event + * that is made active _after_ rescan_mainloop_ev will get its + * callback run after rescan_mainloop_cb is called -- that is, on the + * next iteration of the loop. + */ + event_active(rescan_mainloop_ev, EV_READ, 1); + + mainloop_event_t *mev = arg; + mev->cb(mev, mev->userdata); +} + +/** + * Helper for mainloop_event_new() and mainloop_event_postloop_new(). + */ +static mainloop_event_t * +mainloop_event_new_impl(int postloop, + void (*cb)(mainloop_event_t *, void *), + void *userdata) +{ + tor_assert(cb); + + struct event_base *base = tor_libevent_get_base(); + mainloop_event_t *mev = tor_malloc_zero(sizeof(mainloop_event_t)); + mev->ev = tor_event_new(base, -1, 0, + postloop ? mainloop_event_postloop_cb : mainloop_event_cb, + mev); + tor_assert(mev->ev); + mev->cb = cb; + mev->userdata = userdata; + return mev; +} + /** * Create and return a new mainloop_event_t to run the function cb. * @@ -265,15 +357,21 @@ mainloop_event_t * mainloop_event_new(void (*cb)(mainloop_event_t *, void *), void *userdata) { - tor_assert(cb); + return mainloop_event_new_impl(0, cb, userdata); +} - struct event_base *base = tor_libevent_get_base(); - mainloop_event_t *mev = tor_malloc_zero(sizeof(mainloop_event_t)); - mev->ev = tor_event_new(base, -1, 0, mainloop_event_cb, mev); - tor_assert(mev->ev); - mev->cb = cb; - mev->userdata = userdata; - return mev; +/** + * As mainloop_event_new(), but create a post-loop event. + * + * A post-loop event behaves like any ordinary event, but any events + * that _it_ activates cannot run until Libevent has checked for other + * events at least once. + */ +mainloop_event_t * +mainloop_event_postloop_new(void (*cb)(mainloop_event_t *, void *), + void *userdata) +{ + return mainloop_event_new_impl(1, cb, userdata); } /** @@ -358,6 +456,7 @@ tor_init_libevent_rng(void) void tor_libevent_free_all(void) { + tor_event_free(rescan_mainloop_ev); if (the_event_base) event_base_free(the_event_base); the_event_base = NULL; diff --git a/src/common/compat_libevent.h b/src/common/compat_libevent.h index c8b037156..29c6ad375 100644 --- a/src/common/compat_libevent.h +++ b/src/common/compat_libevent.h @@ -37,6 +37,9 @@ void periodic_timer_free_(periodic_timer_t *); typedef struct mainloop_event_t mainloop_event_t; mainloop_event_t *mainloop_event_new(void (*cb)(mainloop_event_t *, void *), void *userdata); +mainloop_event_t * mainloop_event_postloop_new( + void (*cb)(mainloop_event_t *, void *), + void *userdata); void mainloop_event_activate(mainloop_event_t *event); int mainloop_event_schedule(mainloop_event_t *event, const struct timeval *delay); From 5719dfb48f87a54aeb5982ff03345303bc058ebb Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Mon, 2 Apr 2018 15:08:41 -0400 Subject: [PATCH 2/6] Move the "activate linked connections" logic to a postloop event. A linked connection_t is one that gets its I/O, not from the network, but from another connection_t. When such a connection has something to write, we want the corresponding connection to run its read callback ... but not immediately, to avoid infinite recursion and/or event loop starvation. Previously we handled this case by activating the read events outside the event loop. Now we use the "postloop event" logic. This lets us simplify do_main_loop_once() a little. --- src/or/main.c | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/src/or/main.c b/src/or/main.c index 4cc51de07..b499c1691 100644 --- a/src/or/main.c +++ b/src/or/main.c @@ -410,6 +410,27 @@ connection_unlink(connection_t *conn) connection_free(conn); } +/** + * Callback: used to activate read events for all linked connections, so + * libevent knows to call their read callbacks. This callback run as a + * postloop event, so that the events _it_ activates don't happen until + * Libevent has a chance to check for other events. + */ +static void +schedule_active_linked_connections_cb(mainloop_event_t *event, void *arg) +{ + (void)event; + (void)arg; + + /* All active linked conns should get their read events activated, + * so that libevent knows to run their callbacks. */ + SMARTLIST_FOREACH(active_linked_connection_lst, connection_t *, conn, + event_active(conn->read_event, EV_READ, 1)); +} + +/** Event that invokes schedule_active_linked_connections_cb. */ +static mainloop_event_t *schedule_active_linked_connections_event = NULL; + /** Initialize the global connection list, closeable connection list, * and active connection list. */ STATIC void @@ -803,10 +824,7 @@ connection_start_reading_from_linked_conn(connection_t *conn) if (!conn->active_on_link) { conn->active_on_link = 1; smartlist_add(active_linked_connection_lst, conn); - /* make sure that the event_base_loop() function exits at - * the end of its run through the current connections, so we can - * activate read events for linked connections. */ - tell_event_loop_to_run_external_code(); + mainloop_event_activate(schedule_active_linked_connections_event); } else { tor_assert(smartlist_contains(active_linked_connection_lst, conn)); } @@ -2579,6 +2597,11 @@ do_main_loop(void) initialize_periodic_events(); } + if (!schedule_active_linked_connections_event) { + schedule_active_linked_connections_event = + mainloop_event_postloop_new(schedule_active_linked_connections_cb, NULL); + } + /* initialize dns resolve map, spawn workers if needed */ if (dns_init() < 0) { if (get_options()->ServerDNSAllowBrokenConfig) @@ -2783,17 +2806,12 @@ run_main_loop_once(void) errno = 0; #endif - /* All active linked conns should get their read events activated, - * so that libevent knows to run their callbacks. */ - SMARTLIST_FOREACH(active_linked_connection_lst, connection_t *, conn, - event_active(conn->read_event, EV_READ, 1)); - if (get_options()->MainloopStats) { /* We always enforce that EVLOOP_ONCE is passed to event_base_loop() if we * are collecting main loop statistics. */ called_loop_once = 1; } else { - called_loop_once = smartlist_len(active_linked_connection_lst) ? 1 : 0; + called_loop_once = 0; } /* Make sure we know (about) what time it is. */ @@ -3496,6 +3514,7 @@ tor_free_all(int postfork) tor_event_free(shutdown_did_not_work_event); tor_event_free(initialize_periodic_events_event); mainloop_event_free(directory_all_unreachable_cb_event); + mainloop_event_free(schedule_active_linked_connections_event); #ifdef HAVE_SYSTEMD_209 periodic_timer_free(systemd_watchdog_timer); From 320bd2b3a50511643e5d1ca6cd16708f20fa7d68 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Mon, 2 Apr 2018 15:20:21 -0400 Subject: [PATCH 3/6] Move connection_ap_attach_pending(0) into a postloop event This is a second motivating case for our postloop event logic. --- src/or/connection_edge.c | 32 ++++++++++++++++++++++---------- src/or/main.c | 13 ------------- 2 files changed, 22 insertions(+), 23 deletions(-) diff --git a/src/or/connection_edge.c b/src/or/connection_edge.c index 948c8722b..955f942c5 100644 --- a/src/or/connection_edge.c +++ b/src/or/connection_edge.c @@ -611,6 +611,12 @@ static smartlist_t *pending_entry_connections = NULL; static int untried_pending_connections = 0; +/** + * Mainloop event to tell us to scan for pending connections that can + * be attached. + */ +static mainloop_event_t *attach_pending_entry_connections_ev = NULL; + /** Common code to connection_(ap|exit)_about_to_close. */ static void connection_edge_about_to_close(edge_connection_t *edge_conn) @@ -956,6 +962,14 @@ connection_ap_attach_pending(int retry) untried_pending_connections = 0; } +static void +attach_pending_entry_connections_cb(mainloop_event_t *ev, void *arg) +{ + (void)ev; + (void)arg; + connection_ap_attach_pending(0); +} + /** Mark entry_conn as needing to get attached to a circuit. * * And entry_conn must be in AP_CONN_STATE_CIRCUIT_WAIT, @@ -973,9 +987,13 @@ connection_ap_mark_as_pending_circuit_(entry_connection_t *entry_conn, if (conn->marked_for_close) return; - if (PREDICT_UNLIKELY(NULL == pending_entry_connections)) + if (PREDICT_UNLIKELY(NULL == pending_entry_connections)) { pending_entry_connections = smartlist_new(); - + } + if (PREDICT_UNLIKELY(NULL == attach_pending_entry_connections_ev)) { + attach_pending_entry_connections_ev = mainloop_event_postloop_new( + attach_pending_entry_connections_cb, NULL); + } if (PREDICT_UNLIKELY(smartlist_contains(pending_entry_connections, entry_conn))) { log_warn(LD_BUG, "What?? pending_entry_connections already contains %p! " @@ -999,14 +1017,7 @@ connection_ap_mark_as_pending_circuit_(entry_connection_t *entry_conn, untried_pending_connections = 1; smartlist_add(pending_entry_connections, entry_conn); - /* Work-around for bug 19969: we handle pending_entry_connections at - * the end of run_main_loop_once(), but in many cases that function will - * take a very long time, if ever, to finish its call to event_base_loop(). - * - * So the fix is to tell it right now that it ought to finish its loop at - * its next available opportunity. - */ - tell_event_loop_to_run_external_code(); + mainloop_event_activate(attach_pending_entry_connections_ev); } /** Mark entry_conn as no longer waiting for a circuit. */ @@ -4165,5 +4176,6 @@ connection_edge_free_all(void) untried_pending_connections = 0; smartlist_free(pending_entry_connections); pending_entry_connections = NULL; + mainloop_event_free(attach_pending_entry_connections_ev); } diff --git a/src/or/main.c b/src/or/main.c index b499c1691..257af8b18 100644 --- a/src/or/main.c +++ b/src/or/main.c @@ -2867,19 +2867,6 @@ run_main_loop_once(void) if (main_loop_should_exit) return 0; - /* And here is where we put callbacks that happen "every time the event loop - * runs." They must be very fast, or else the whole Tor process will get - * slowed down. - * - * Note that this gets called once per libevent loop, which will make it - * happen once per group of events that fire, or once per second. */ - - /* If there are any pending client connections, try attaching them to - * circuits (if we can.) This will be pretty fast if nothing new is - * pending. - */ - connection_ap_attach_pending(0); - return 1; } From 62f4d5a2659d2a786c92299fb29a6a6be44f8caf Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Mon, 2 Apr 2018 16:47:28 -0400 Subject: [PATCH 4/6] Add a unit test for post-loop events This test works by having two post-loop events activate one another in a tight loop. If the "post-loop" mechanism didn't work, this would be enough to starve all other events. --- src/test/test_compat_libevent.c | 60 +++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/src/test/test_compat_libevent.c b/src/test/test_compat_libevent.c index 376524c2d..85f69bd62 100644 --- a/src/test/test_compat_libevent.c +++ b/src/test/test_compat_libevent.c @@ -121,10 +121,70 @@ test_compat_libevent_header_version(void *ignored) (void)0; } +/* Test for postloop events */ + +/* Event callback to increment a counter. */ +static void +increment_int_counter_cb(periodic_timer_t *timer, void *arg) +{ + (void)timer; + int *ctr = arg; + ++*ctr; +} + +static int activated_counter = 0; + +/* Mainloop event callback to activate another mainloop event */ +static void +activate_event_cb(mainloop_event_t *ev, void *arg) +{ + (void)ev; + mainloop_event_t **other_event = arg; + mainloop_event_activate(*other_event); + ++activated_counter; +} + +static void +test_compat_libevent_postloop_events(void *arg) +{ + (void)arg; + mainloop_event_t *a = NULL, *b = NULL; + periodic_timer_t *timed = NULL; + + tor_libevent_postfork(); + + /* If postloop events don't work, then these events will activate one + * another ad infinitum and, and the periodic event will never occur. */ + b = mainloop_event_postloop_new(activate_event_cb, &a); + a = mainloop_event_postloop_new(activate_event_cb, &b); + + int counter = 0; + struct timeval fifty_ms = { 0, 10 * 1000 }; + timed = periodic_timer_new(tor_libevent_get_base(), &fifty_ms, + increment_int_counter_cb, &counter); + + mainloop_event_activate(a); + int r; + do { + r = tor_libevent_run_event_loop(tor_libevent_get_base(), 0); + if (r == -1) + break; + } while (counter < 5); + + tt_int_op(activated_counter, OP_GE, 2); + + done: + mainloop_event_free(a); + mainloop_event_free(b); + periodic_timer_free(timed); +} + struct testcase_t compat_libevent_tests[] = { { "logging_callback", test_compat_libevent_logging_callback, TT_FORK, NULL, NULL }, { "header_version", test_compat_libevent_header_version, 0, NULL, NULL }, + { "postloop_events", test_compat_libevent_postloop_events, + TT_FORK, NULL, NULL }, END_OF_TESTCASES }; From 2fe499eb3f4c4a15249f9f913a1cb4e59e94d03f Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Mon, 2 Apr 2018 16:48:54 -0400 Subject: [PATCH 5/6] Changes files for post-loop events (25374) --- changes/ticket25374 | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 changes/ticket25374 diff --git a/changes/ticket25374 b/changes/ticket25374 new file mode 100644 index 000000000..5fc59d158 --- /dev/null +++ b/changes/ticket25374 @@ -0,0 +1,6 @@ + o Code simplification and refactoring: + - Our main loop has been simplified so that all important operations + happen inside events. Previously, some operations had to happen + outside the event loop, to prevent infinite sequences of event + activations. Closes ticket 25374. + From 4c03af48806ac03e5754a7531ee9e915fc2f6c8c Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Fri, 13 Apr 2018 12:11:22 -0400 Subject: [PATCH 6/6] Remove tell_event_loop_to_run_external_code() per review (This function is no longer used.) --- src/or/main.c | 14 -------------- src/or/main.h | 1 - 2 files changed, 15 deletions(-) diff --git a/src/or/main.c b/src/or/main.c index 257af8b18..074746108 100644 --- a/src/or/main.c +++ b/src/or/main.c @@ -731,20 +731,6 @@ connection_should_read_from_linked_conn(connection_t *conn) return 0; } -/** If we called event_base_loop() and told it to never stop until it - * runs out of events, now we've changed our mind: tell it we want it to - * exit once the current round of callbacks is done, so that we can - * run external code, and then return to the main loop. */ -void -tell_event_loop_to_run_external_code(void) -{ - if (!called_loop_once) { - struct timeval tv = { 0, 0 }; - tor_libevent_exit_loop_after_delay(tor_libevent_get_base(), &tv); - called_loop_once = 1; /* hack to avoid adding more exit events */ - } -} - /** Event to run 'shutdown did not work callback'. */ static struct event *shutdown_did_not_work_event = NULL; diff --git a/src/or/main.h b/src/or/main.h index f01506fce..5ca435064 100644 --- a/src/or/main.h +++ b/src/or/main.h @@ -45,7 +45,6 @@ int connection_is_writing(connection_t *conn); MOCK_DECL(void,connection_stop_writing,(connection_t *conn)); MOCK_DECL(void,connection_start_writing,(connection_t *conn)); -void tell_event_loop_to_run_external_code(void); void tor_shutdown_event_loop_and_exit(int exitcode); int tor_event_loop_shutdown_is_pending(void);