Implement a real OOM-killer for too-long circuit queues.

This implements "algorithm 1" from my discussion of bug #9072: on OOM,
find the circuits with the longest queues, and kill them.  It's also a
fix for #9063 -- without the side-effects of bug #9072.

The memory bounds aren't perfect here, and you need to be sure to
allow some slack for the rest of Tor's usage.

This isn't a perfect fix; the rest of the solutions I describe on
codeable.
This commit is contained in:
Nick Mathewson 2013-06-16 09:55:44 -04:00
parent 2a95f31716
commit 2e1fe1fcf9
9 changed files with 172 additions and 2 deletions

15
changes/bug9063_redux Normal file
View File

@ -0,0 +1,15 @@
o Major bugfixes:
- When we have too much memory queued in circuits (according to a new
MaxMemInCellQueues option), close the circuits consuming the most
memory. This prevents us from running out of memory as a relay if
circuits fill up faster than they can be drained. Fixes
bug 9063; bugfix on the 54th commit of Tor. This bug is a further
fix beyond bug 6252, whose fix was merged into 0.2.3.21-rc.
Also fixes an earlier approach taken in 0.2.4.13-alpha, where we
tried to solve this issue simply by imposing an upper limit on the
number of queued cells for a single circuit. That approach proved to
be problematic, since there are ways to provoke clients to send a
number of cells in excess of any such reasonable limit.
Fixes bug 9072; bugfix on 0.2.4.13-alpha.

View File

@ -1475,6 +1475,15 @@ is non-zero):
localhost, RFC1918 addresses, and so on. This can create security issues;
you should probably leave it off. (Default: 0)
**MaxMemInCellQueues** __N__ **bytes**|**KB**|**MB**|**GB**::
This option configures a threshold above which Tor will assume that it
needs to stop queueing cells because it's about to run out of memory.
If it hits this threshold, it will begin killing circuits until it
has recovered at least 10% of this memory. Do not set this option too
low, or your relay may be unreliable under load. This option only
effects circuit queues, so the actual process size will be larger than
this. (Default: 8GB)
DIRECTORY SERVER OPTIONS
------------------------

View File

@ -22,6 +22,8 @@ void mp_pool_destroy(mp_pool_t *pool);
void mp_pool_assert_ok(mp_pool_t *pool);
void mp_pool_log_status(mp_pool_t *pool, int severity);
#define MP_POOL_ITEM_OVERHEAD (sizeof(void*))
#define MEMPOOL_STATS
#ifdef MEMPOOL_PRIVATE

View File

@ -1359,6 +1359,108 @@ _circuit_mark_for_close(circuit_t *circ, int reason, int line,
}
}
/** Given a marked circuit <b>circ</b>, aggressively free its cell queues to
* recover memory. */
static void
marked_circuit_free_cells(circuit_t *circ)
{
if (!circ->marked_for_close) {
log_warn(LD_BUG, "Called on non-marked circuit");
return;
}
cell_queue_clear(&circ->n_conn_cells);
if (! CIRCUIT_IS_ORIGIN(circ))
cell_queue_clear(& TO_OR_CIRCUIT(circ)->p_conn_cells);
}
/** Return the number of cells used by the circuit <b>c</b>'s cell queues. */
static size_t
n_cells_in_circ_queues(const circuit_t *c)
{
size_t n = c->n_conn_cells.n;
if (! CIRCUIT_IS_ORIGIN(c))
n += TO_OR_CIRCUIT((circuit_t*)c)->p_conn_cells.n;
return n;
}
/** helper to sort a list of circuit_q by total queue lengths, in descending
* order. */
static int
circuits_compare_by_queue_len_(const void **a_, const void **b_)
{
const circuit_t *a = *a_;
const circuit_t *b = *b_;
size_t a_n = n_cells_in_circ_queues(a);
size_t b_n = n_cells_in_circ_queues(b);
if (a_n < b_n)
return 1;
else if (a_n == b_n)
return 0;
else
return -1;
}
#define FRACTION_OF_CIRCS_TO_RETAIN_ON_OOM 0.90
/** We're out of memory for cells, having allocated <b>current_allocation</b>
* bytes' worth. Kill the 'worst' circuits until we're under
* FRACTION_OF_CIRCS_TO_RETAIN_ON_OOM of our maximum usage. */
void
circuits_handle_oom(size_t current_allocation)
{
/* Let's hope there's enough slack space for this allocation here... */
smartlist_t *circlist = smartlist_new();
circuit_t *circ;
size_t n_cells_removed=0, n_cells_to_remove;
int n_circuits_killed=0;
log_notice(LD_GENERAL, "We're low on memory. Killing circuits with "
"over-long queues. (This behavior is controlled by "
"MaxMemInCellQueues.)");
{
size_t mem_target = (size_t)(get_options()->MaxMemInCellQueues *
FRACTION_OF_CIRCS_TO_RETAIN_ON_OOM);
size_t mem_to_recover;
if (current_allocation <= mem_target)
return;
mem_to_recover = current_allocation - mem_target;
n_cells_to_remove = CEIL_DIV(mem_to_recover, packed_cell_mem_cost());
}
/* This algorithm itself assumes that you've got enough memory slack
* to actually run it. */
for (circ = global_circuitlist; circ; circ = circ->next)
smartlist_add(circlist, circ);
/* This is O(n log n); there are faster algorithms we could use instead.
* Let's hope this doesn't happen enough to be in the critical path. */
smartlist_sort(circlist, circuits_compare_by_queue_len_);
/* Okay, now the worst circuits are at the front of the list. Let's mark
* them, and reclaim their storage aggressively. */
SMARTLIST_FOREACH_BEGIN(circlist, circuit_t *, circ) {
size_t n = n_cells_in_circ_queues(circ);
if (! circ->marked_for_close) {
circuit_mark_for_close(circ, END_CIRC_REASON_RESOURCELIMIT);
}
marked_circuit_free_cells(circ);
++n_circuits_killed;
n_cells_removed += n;
if (n_cells_removed >= n_cells_to_remove)
break;
} SMARTLIST_FOREACH_END(circ);
clean_cell_pool(); /* In case this helps. */
log_notice(LD_GENERAL, "Removed "U64_FORMAT" bytes by killing %d circuits.",
U64_PRINTF_ARG(n_cells_removed * packed_cell_mem_cost()),
n_circuits_killed);
smartlist_free(circlist);
}
/** Verify that cpath layer <b>cp</b> has all of its invariants
* correct. Trigger an assert if anything is invalid.
*/

View File

@ -57,6 +57,7 @@ int circuit_count_pending_on_or_conn(or_connection_t *or_conn);
void assert_cpath_layer_ok(const crypt_path_t *cp);
void assert_circuit_ok(const circuit_t *c);
void circuit_free_all(void);
void circuits_handle_oom(size_t current_allocation);
#endif

View File

@ -343,6 +343,7 @@ static config_var_t _option_vars[] = {
V(MaxAdvertisedBandwidth, MEMUNIT, "1 GB"),
V(MaxCircuitDirtiness, INTERVAL, "10 minutes"),
V(MaxClientCircuitsPending, UINT, "32"),
V(MaxMemInCellQueues, MEMUNIT, "8 GB"),
V(MaxOnionsPending, UINT, "100"),
OBSOLETE("MonthlyAccountingStart"),
V(MyFamily, STRING, NULL),
@ -3668,6 +3669,12 @@ options_validate(or_options_t *old_options, or_options_t *options,
log_warn(LD_CONFIG, "EntryNodes is set, but UseEntryGuards is disabled. "
"EntryNodes will be ignored.");
if (options->MaxMemInCellQueues < (500 << 20)) {
log_warn(LD_CONFIG, "MaxMemInCellQueues must be at least 500 MB for now. "
"Ideally, have it as large as you can afford.");
options->MaxMemInCellQueues = (500 << 20);
}
options->_AllowInvalid = 0;
if (options->AllowInvalidNodes) {
SMARTLIST_FOREACH_BEGIN(options->AllowInvalidNodes, const char *, cp) {

View File

@ -3064,6 +3064,10 @@ typedef struct {
config_line_t *DirPort_lines;
config_line_t *DNSPort_lines; /**< Ports to listen on for DNS requests. */
uint64_t MaxMemInCellQueues; /**< If we have more memory than this allocated
* for circuit cell queues, run the OOM handler
*/
/** @name port booleans
*
* Derived booleans: True iff there is a non-listener port on an AF_INET or

View File

@ -1799,7 +1799,7 @@ circuit_consider_sending_sendme(circuit_t *circ, crypt_path_t *layer_hint)
#endif
/** The total number of cells we have allocated from the memory pool. */
static int total_cells_allocated = 0;
static size_t total_cells_allocated = 0;
/** A memory pool to allocate packed_cell_t objects. */
static mp_pool_t *cell_pool = NULL;
@ -1871,7 +1871,7 @@ dump_cell_pool_usage(int severity)
++n_circs;
}
log(severity, LD_MM, "%d cells allocated on %d circuits. %d cells leaked.",
n_cells, n_circs, total_cells_allocated - n_cells);
n_cells, n_circs, (int)total_cells_allocated - n_cells);
mp_pool_log_status(cell_pool, severity);
}
@ -1978,6 +1978,29 @@ cell_queue_pop(cell_queue_t *queue)
return cell;
}
/** Return the total number of bytes used for each packed_cell in a queue.
* Approximate. */
size_t
packed_cell_mem_cost(void)
{
return sizeof(packed_cell_t) + MP_POOL_ITEM_OVERHEAD +
get_options()->CellStatistics ?
(sizeof(insertion_time_elem_t)+MP_POOL_ITEM_OVERHEAD) : 0;
}
/** Check whether we've got too much space used for cells. If so,
* call the OOM handler and return 1. Otherwise, return 0. */
static int
cell_queues_check_size(void)
{
size_t alloc = total_cells_allocated * packed_cell_mem_cost();
if (alloc >= get_options()->MaxMemInCellQueues) {
circuits_handle_oom(alloc);
return 1;
}
return 0;
}
/** Return a pointer to the "next_active_on_{n,p}_conn" pointer of <b>circ</b>,
* depending on whether <b>conn</b> matches n_conn or p_conn. */
static INLINE circuit_t **
@ -2574,6 +2597,12 @@ append_cell_to_circuit_queue(circuit_t *circ, or_connection_t *orconn,
cell_queue_append_packed_copy(queue, cell);
if (PREDICT_UNLIKELY(cell_queues_check_size())) {
/* We ran the OOM handler */
if (circ->marked_for_close)
return;
}
/* If we have too many cells on the circuit, we should stop reading from
* the edge streams for a while. */
if (!streams_blocked && queue->n >= CELL_QUEUE_HIGHWATER_SIZE)

View File

@ -40,6 +40,7 @@ void init_cell_pool(void);
void free_cell_pool(void);
void clean_cell_pool(void);
void dump_cell_pool_usage(int severity);
size_t packed_cell_mem_cost(void);
void cell_queue_clear(cell_queue_t *queue);
void cell_queue_append(cell_queue_t *queue, packed_cell_t *cell);