Merge branch 'time_based_onionqueue_v2' of ssh://git-rw.torproject.org/nickm/tor

This commit is contained in:
Andrea Shepard 2013-01-24 08:10:12 -08:00
commit dfbd19df41
8 changed files with 302 additions and 81 deletions

11
changes/timed_onionqueue Normal file
View File

@ -0,0 +1,11 @@
o Minor features (relay):
- Instead of limiting the number of queued onionskins to a configured,
hard-to-configure number, we limit the size of the queue based on how
many we expect to be able to process in a given amount of time. We
estimate the time it will take to process an onionskin based on average
processing time of previous onionskins. Closes ticket 7291. You'll
never have to configure MaxOnionsPending again.
- We compute the overhead from passing onionskins back and forth to
cpuworkers, and report it when dumping statistics in response to
SIGUSR1.

View File

@ -1376,9 +1376,9 @@ is non-zero):
If set, and we are an exit node, allow clients to use us for IPv6
traffic. (Default: 0)
**MaxOnionsPending** __NUM__::
If you have more than this number of onionskins queued for decrypt, reject
new ones. (Default: 100)
**MaxOnionQueueDelay** __NUM__ [**msec**|**second**]::
If we have more onionskins queued for processing than we can process in
this amount of time, reject new ones. (Default: 1750 msec)
**MyFamily** __node__,__node__,__...__::
Declare that this Tor server is controlled or administered by a group or

View File

@ -296,7 +296,8 @@ static config_var_t option_vars_[] = {
V(MaxAdvertisedBandwidth, MEMUNIT, "1 GB"),
V(MaxCircuitDirtiness, INTERVAL, "10 minutes"),
V(MaxClientCircuitsPending, UINT, "32"),
V(MaxOnionsPending, UINT, "100"),
OBSOLETE("MaxOnionsPending"),
V(MaxOnionQueueDelay, MSEC_INTERVAL, "1750 msec"),
OBSOLETE("MonthlyAccountingStart"),
V(MyFamily, STRING, NULL),
V(NewCircuitPeriod, INTERVAL, "30 seconds"),

View File

@ -98,6 +98,11 @@ typedef struct cpuworker_request_t {
/** Task code. Must be one of CPUWORKER_TASK_* */
uint8_t task;
/** Flag: Are we timing this request? */
unsigned timed : 1;
/** If we're timing this request, when was it sent to the cpuworker? */
struct timeval started_at;
/** A create cell for the cpuworker to process. */
create_cell_t create_cell;
@ -113,6 +118,17 @@ typedef struct cpuworker_reply_t {
/** True iff we got a successful request. */
uint8_t success;
/** Are we timing this request? */
unsigned int timed : 1;
/** What handshake type was the request? (Used for timing) */
uint16_t handshake_type;
/** When did we send the request to the cpuworker? */
struct timeval started_at;
/** Once the cpuworker received the request, how many microseconds did it
* take? (This shouldn't overflow; 4 billion micoseconds is over an hour,
* and we'll never have an onion handshake that takes so long.) */
uint32_t n_usec;
/** Output of processing a create cell
*
* @{
@ -163,6 +179,110 @@ connection_cpu_reached_eof(connection_t *conn)
return 0;
}
/** Indexed by handshake type: how many onionskins have we processed and
* counted of that type? */
static uint64_t onionskins_n_processed[MAX_ONION_HANDSHAKE_TYPE+1];
/** Indexed by handshake type, corresponding to the onionskins counted in
* onionskins_n_processed: how many microseconds have we spent in cpuworkers
* processing that kind of onionskin? */
static uint64_t onionskins_usec_internal[MAX_ONION_HANDSHAKE_TYPE+1];
/** Indexed by handshake type, corresponding to onionskins counted in
* onionskins_n_processed: how many microseconds have we spent waiting for
* cpuworkers to give us answers for that kind of onionskin?
*/
static uint64_t onionskins_usec_roundtrip[MAX_ONION_HANDSHAKE_TYPE+1];
/** If any onionskin takes longer than this, we clip them to this
* time. (microseconds) */
#define MAX_BELIEVABLE_ONIONSKIN_DELAY (2*1000*1000)
/** Return true iff we'd like to measure a handshake of type
* <b>onionskin_type</b>. */
static int
should_time_request(uint16_t onionskin_type)
{
/* If we've never heard of this type, we shouldn't even be here. */
if (onionskin_type > MAX_ONION_HANDSHAKE_TYPE)
return 0;
/* Measure the first N handshakes of each type, to ensure we have a
* sample */
if (onionskins_n_processed[onionskin_type] < 4096)
return 1;
/** Otherwise, measure with P=1/128. We avoid doing this for every
* handshake, since the measurement itself can take a little time. */
return tor_weak_random() < (TOR_RAND_MAX/128);
}
/** Return an estimate of how many microseconds we will need for a single
* cpuworker to to process <b>n_requests</b> onionskins of type
* <b>onionskin_type</b>. */
uint64_t
estimated_usec_for_onionskins(uint32_t n_requests, uint16_t onionskin_type)
{
if (onionskin_type > MAX_ONION_HANDSHAKE_TYPE) /* should be impossible */
return 1000 * n_requests;
if (PREDICT_UNLIKELY(onionskins_n_processed[onionskin_type] < 100)) {
/* Until we have 100 data points, just asssume everything takes 1 msec. */
return 1000 * n_requests;
} else {
/* This can't overflow: we'll never have more than 500000 onionskins
* measured in onionskin_usec_internal, and they won't take anything near
* 1 sec each, and we won't have anything like 1 million queued
* onionskins. But that's 5e5 * 1e6 * 1e6, which is still less than
* UINT64_MAX. */
return (onionskins_usec_internal[onionskin_type] * n_requests) /
onionskins_n_processed[onionskin_type];
}
}
/** Compute the absolute and relative overhead of using the cpuworker
* framework for onionskins of type <b>onionskin_type</b>.*/
static int
get_overhead_for_onionskins(uint32_t *usec_out, double *frac_out,
uint16_t onionskin_type)
{
uint64_t overhead;
*usec_out = 0;
*frac_out = 0.0;
if (onionskin_type > MAX_ONION_HANDSHAKE_TYPE) /* should be impossible */
return -1;
if (onionskins_n_processed[onionskin_type] == 0 ||
onionskins_usec_internal[onionskin_type] == 0 ||
onionskins_usec_roundtrip[onionskin_type] == 0)
return -1;
overhead = onionskins_usec_roundtrip[onionskin_type] -
onionskins_usec_internal[onionskin_type];
*usec_out = (uint32_t)(overhead / onionskins_n_processed[onionskin_type]);
*frac_out = U64_TO_DBL(overhead) / onionskins_usec_internal[onionskin_type];
return 0;
}
/** If we've measured overhead for onionskins of type <b>onionskin_type</b>,
* log it. */
void
cpuworker_log_onionskin_overhead(int severity, int onionskin_type,
const char *onionskin_type_name)
{
uint32_t overhead;
double relative_overhead;
int r;
r = get_overhead_for_onionskins(&overhead, &relative_overhead,
onionskin_type);
if (!overhead || r<0)
return;
log_fn(severity, LD_OR,
"%s onionskins have averaged %u usec overhead (%.2f%%) in "
"cpuworker code ",
onionskin_type_name, (unsigned)overhead, relative_overhead*100);
}
/** Called when we get data from a cpuworker. If the answer is not complete,
* wait for a complete answer. If the answer is complete,
* process it as appropriate.
@ -190,6 +310,30 @@ connection_cpu_process_inbuf(connection_t *conn)
connection_fetch_from_buf((void*)&rpl,sizeof(cpuworker_reply_t),conn);
tor_assert(rpl.magic == CPUWORKER_REPLY_MAGIC);
if (rpl.timed && rpl.success &&
rpl.handshake_type <= MAX_ONION_HANDSHAKE_TYPE) {
/* Time how long this request took. The handshake_type check should be
needless, but let's leave it in to be safe. */
struct timeval tv_end, tv_diff;
int64_t usec_roundtrip;
tor_gettimeofday(&tv_end);
timersub(&tv_end, &rpl.started_at, &tv_diff);
usec_roundtrip = ((int64_t)tv_diff.tv_sec)*1000000 + tv_diff.tv_usec;
if (usec_roundtrip >= 0 &&
usec_roundtrip < MAX_BELIEVABLE_ONIONSKIN_DELAY) {
++onionskins_n_processed[rpl.handshake_type];
onionskins_usec_internal[rpl.handshake_type] += rpl.n_usec;
onionskins_usec_roundtrip[rpl.handshake_type] += usec_roundtrip;
if (onionskins_n_processed[rpl.handshake_type] >= 500000) {
/* Scale down every 500000 handshakes. On a busy server, that's
* less impressive than it sounds. */
onionskins_n_processed[rpl.handshake_type] /= 2;
onionskins_usec_internal[rpl.handshake_type] /= 2;
onionskins_usec_roundtrip[rpl.handshake_type] /= 2;
}
}
}
/* parse out the circ it was talking about */
tag_unpack(rpl.tag, &chan_id, &circ_id);
circ = NULL;
@ -289,7 +433,13 @@ cpuworker_main(void *data)
if (req.task == CPUWORKER_TASK_ONION) {
const create_cell_t *cc = &req.create_cell;
created_cell_t *cell_out = &rpl.created_cell;
struct timeval tv_start, tv_end;
int n;
rpl.timed = req.timed;
rpl.started_at = req.started_at;
rpl.handshake_type = cc->handshake_type;
if (req.timed)
tor_gettimeofday(&tv_start);
n = onion_skin_server_handshake(cc->handshake_type,
cc->onionskin, cc->handshake_len,
&onion_keys,
@ -321,6 +471,17 @@ cpuworker_main(void *data)
rpl.success = 1;
}
rpl.magic = CPUWORKER_REPLY_MAGIC;
if (req.timed) {
struct timeval tv_diff;
int64_t usec;
tor_gettimeofday(&tv_end);
timersub(&tv_end, &tv_start, &tv_diff);
usec = ((int64_t)tv_diff.tv_sec)*1000000 + tv_diff.tv_usec;
if (usec < 0 || usec > MAX_BELIEVABLE_ONIONSKIN_DELAY)
rpl.n_usec = MAX_BELIEVABLE_ONIONSKIN_DELAY;
else
rpl.n_usec = (uint32_t) usec;
}
if (write_all(fd, (void*)&rpl, sizeof(rpl), 1) != sizeof(rpl)) {
log_err(LD_BUG,"writing response buf failed. Exiting.");
goto end;
@ -478,6 +639,7 @@ assign_onionskin_to_cpuworker(connection_t *cpuworker,
cpuworker_request_t req;
time_t now = approx_time();
static time_t last_culled_cpuworkers = 0;
int should_time;
/* Checking for wedged cpuworkers requires a linear search over all
* connections, so let's do it only once a minute.
@ -512,16 +674,18 @@ assign_onionskin_to_cpuworker(connection_t *cpuworker,
return -1;
}
should_time = should_time_request(onionskin->handshake_type);
memset(&req, 0, sizeof(req));
req.magic = CPUWORKER_REQUEST_MAGIC;
tag_pack(req.tag, circ->p_chan->global_identifier,
circ->p_circ_id);
req.timed = should_time;
cpuworker->state = CPUWORKER_STATE_BUSY_ONION;
/* touch the lastwritten timestamp, since that's how we check to
* see how long it's been since we asked the question, and sometimes
* we check before the first call to connection_handle_write(). */
cpuworker->timestamp_lastwritten = time(NULL);
cpuworker->timestamp_lastwritten = now;
num_cpuworkers_busy++;
req.task = CPUWORKER_TASK_ONION;
@ -529,6 +693,9 @@ assign_onionskin_to_cpuworker(connection_t *cpuworker,
tor_free(onionskin);
if (should_time)
tor_gettimeofday(&req.started_at);
connection_write_to_buf((void*)&req, sizeof(req), cpuworker);
memwipe(&req, 0, sizeof(req));
}

View File

@ -22,5 +22,10 @@ int assign_onionskin_to_cpuworker(connection_t *cpuworker,
or_circuit_t *circ,
struct create_cell_t *onionskin);
uint64_t estimated_usec_for_onionskins(uint32_t n_requests,
uint16_t onionskin_type);
void cpuworker_log_onionskin_overhead(int severity, int onionskin_type,
const char *onionskin_type_name);
#endif

View File

@ -2202,6 +2202,9 @@ dumpstats(int severity)
100*(U64_TO_DBL(stats_n_data_bytes_received) /
U64_TO_DBL(stats_n_data_cells_received*RELAY_PAYLOAD_SIZE)) );
cpuworker_log_onionskin_overhead(severity, ONION_HANDSHAKE_TYPE_TAP, "TAP");
cpuworker_log_onionskin_overhead(severity, ONION_HANDSHAKE_TYPE_NTOR,"ntor");
if (now - time_of_process_start >= 0)
elapsed = now - time_of_process_start;
else

View File

@ -13,6 +13,7 @@
#include "or.h"
#include "circuitlist.h"
#include "config.h"
#include "cpuworker.h"
#include "onion.h"
#include "onion_fast.h"
#include "onion_ntor.h"
@ -20,29 +21,71 @@
#include "relay.h"
#include "rephist.h"
#include "router.h"
#include "tor_queue.h"
/** Type for a linked list of circuits that are waiting for a free CPU worker
* to process a waiting onion handshake. */
typedef struct onion_queue_t {
TAILQ_ENTRY(onion_queue_t) next;
or_circuit_t *circ;
create_cell_t *onionskin;
time_t when_added;
struct onion_queue_t *next;
} onion_queue_t;
/** 5 seconds on the onion queue til we just send back a destroy */
#define ONIONQUEUE_WAIT_CUTOFF 5
/** First and last elements in the linked list of circuits waiting for CPU
* workers, or NULL if the list is empty.
* @{ */
static onion_queue_t *ol_list=NULL;
static onion_queue_t *ol_tail=NULL;
/**@}*/
/** Length of ol_list */
static int ol_length=0;
/** Queue of circuits waiting for CPU workers, or NULL if the list is empty.*/
TAILQ_HEAD(onion_queue_head_t, onion_queue_t) ol_list =
TAILQ_HEAD_INITIALIZER(ol_list);
/* XXXX Check lengths vs MAX_ONIONSKIN_{CHALLENGE,REPLY}_LEN */
/** Number of entries of each type currently in ol_list. */
static int ol_entries[MAX_ONION_HANDSHAKE_TYPE+1];
static void onion_queue_entry_remove(onion_queue_t *victim);
/* XXXX024 Check lengths vs MAX_ONIONSKIN_{CHALLENGE,REPLY}_LEN.
*
* (By which I think I meant, "make sure that no
* X_ONIONSKIN_CHALLENGE/REPLY_LEN is greater than
* MAX_ONIONSKIN_CHALLENGE/REPLY_LEN." Also, make sure that we can pass
* over-large values via EXTEND2/EXTENDED2, for future-compatibility.*/
/** Return true iff we have room to queue another oninoskin of type
* <b>type</b>. */
static int
have_room_for_onionskin(uint16_t type)
{
const or_options_t *options = get_options();
int num_cpus;
uint64_t tap_usec, ntor_usec;
/* If we've got fewer than 50 entries, we always have room for one more. */
if (ol_entries[ONION_HANDSHAKE_TYPE_TAP] +
ol_entries[ONION_HANDSHAKE_TYPE_NTOR] < 50)
return 1;
num_cpus = get_num_cpus(options);
/* Compute how many microseconds we'd expect to need to clear all
* onionskins in the current queue. */
tap_usec = estimated_usec_for_onionskins(
ol_entries[ONION_HANDSHAKE_TYPE_TAP],
ONION_HANDSHAKE_TYPE_TAP) / num_cpus;
ntor_usec = estimated_usec_for_onionskins(
ol_entries[ONION_HANDSHAKE_TYPE_NTOR],
ONION_HANDSHAKE_TYPE_NTOR) / num_cpus;
/* See whether that exceeds MaxOnionQueueDelay. If so, we can't queue
* this. */
if ((tap_usec + ntor_usec) / 1000 > (uint64_t)options->MaxOnionQueueDelay)
return 0;
#ifdef CURVE25519_ENABLED
/* If we support the ntor handshake, then don't let TAP handshakes use
* more than 2/3 of the space on the queue. */
if (type == ONION_HANDSHAKE_TYPE_TAP &&
tap_usec / 1000 > (uint64_t)options->MaxOnionQueueDelay * 2 / 3)
return 0;
#endif
return 1;
}
/** Add <b>circ</b> to the end of ol_list and return 0, except
* if ol_list is too long, in which case do nothing and return -1.
@ -58,19 +101,7 @@ onion_pending_add(or_circuit_t *circ, create_cell_t *onionskin)
tmp->onionskin = onionskin;
tmp->when_added = now;
if (!ol_tail) {
tor_assert(!ol_list);
tor_assert(!ol_length);
ol_list = tmp;
ol_tail = tmp;
ol_length++;
return 0;
}
tor_assert(ol_list);
tor_assert(!ol_tail->next);
if (ol_length >= get_options()->MaxOnionsPending) {
if (!have_room_for_onionskin(onionskin->handshake_type)) {
#define WARN_TOO_MANY_CIRC_CREATIONS_INTERVAL (60)
static ratelim_t last_warned =
RATELIM_INIT(WARN_TOO_MANY_CIRC_CREATIONS_INTERVAL);
@ -87,13 +118,19 @@ onion_pending_add(or_circuit_t *circ, create_cell_t *onionskin)
return -1;
}
ol_length++;
ol_tail->next = tmp;
ol_tail = tmp;
while ((int)(now - ol_list->when_added) >= ONIONQUEUE_WAIT_CUTOFF) {
/* cull elderly requests. */
circ = ol_list->circ;
onion_pending_remove(ol_list->circ);
++ol_entries[onionskin->handshake_type];
circ->onionqueue_entry = tmp;
TAILQ_INSERT_TAIL(&ol_list, tmp, next);
/* cull elderly requests. */
while (1) {
onion_queue_t *head = TAILQ_FIRST(&ol_list);
if (now - head->when_added < (time_t)ONIONQUEUE_WAIT_CUTOFF)
break;
circ = head->circ;
circ->onionqueue_entry = NULL;
onion_queue_entry_remove(head);
log_info(LD_CIRC,
"Circuit create request is too old; canceling due to overload.");
circuit_mark_for_close(TO_CIRCUIT(circ), END_CIRC_REASON_RESOURCELIMIT);
@ -108,17 +145,21 @@ or_circuit_t *
onion_next_task(create_cell_t **onionskin_out)
{
or_circuit_t *circ;
onion_queue_t *head = TAILQ_FIRST(&ol_list);
if (!ol_list)
if (!head)
return NULL; /* no onions pending, we're done */
tor_assert(ol_list->circ);
tor_assert(ol_list->circ->p_chan); /* make sure it's still valid */
tor_assert(ol_length > 0);
circ = ol_list->circ;
*onionskin_out = ol_list->onionskin;
ol_list->onionskin = NULL; /* prevent free. */
onion_pending_remove(ol_list->circ);
tor_assert(head->circ);
tor_assert(head->circ->p_chan); /* make sure it's still valid */
circ = head->circ;
if (head->onionskin &&
head->onionskin->handshake_type <= MAX_ONION_HANDSHAKE_TYPE)
--ol_entries[head->onionskin->handshake_type];
*onionskin_out = head->onionskin;
head->onionskin = NULL; /* prevent free. */
circ->onionqueue_entry = NULL;
onion_queue_entry_remove(head);
return circ;
}
@ -128,37 +169,29 @@ onion_next_task(create_cell_t **onionskin_out)
void
onion_pending_remove(or_circuit_t *circ)
{
onion_queue_t *tmpo, *victim;
onion_queue_t *victim;
if (!ol_list)
return; /* nothing here. */
if (!circ)
return;
/* first check to see if it's the first entry */
tmpo = ol_list;
if (tmpo->circ == circ) {
/* it's the first one. remove it from the list. */
ol_list = tmpo->next;
if (!ol_list)
ol_tail = NULL;
ol_length--;
victim = tmpo;
} else { /* we need to hunt through the rest of the list */
for ( ;tmpo->next && tmpo->next->circ != circ; tmpo=tmpo->next) ;
if (!tmpo->next) {
log_debug(LD_GENERAL,
"circ (p_circ_id %d) not in list, probably at cpuworker.",
circ->p_circ_id);
return;
}
/* now we know tmpo->next->circ == circ */
victim = tmpo->next;
tmpo->next = victim->next;
if (ol_tail == victim)
ol_tail = tmpo;
ol_length--;
}
victim = circ->onionqueue_entry;
if (victim)
onion_queue_entry_remove(victim);
}
/* now victim points to the element that needs to be removed */
/** Remove a queue entry <b>victim</b> from the queue, unlinking it from
* its circuit and freeing it and any structures it owns.*/
static void
onion_queue_entry_remove(onion_queue_t *victim)
{
TAILQ_REMOVE(&ol_list, victim, next);
if (victim->circ)
victim->circ->onionqueue_entry = NULL;
if (victim->onionskin &&
victim->onionskin->handshake_type <= MAX_ONION_HANDSHAKE_TYPE)
--ol_entries[victim->onionskin->handshake_type];
tor_free(victim->onionskin);
tor_free(victim);
@ -168,14 +201,11 @@ onion_pending_remove(or_circuit_t *circ)
void
clear_pending_onions(void)
{
while (ol_list) {
onion_queue_t *victim = ol_list;
ol_list = victim->next;
tor_free(victim->onionskin);
tor_free(victim);
onion_queue_t *victim;
while ((victim = TAILQ_FIRST(&ol_list))) {
onion_queue_entry_remove(victim);
}
ol_list = ol_tail = NULL;
ol_length = 0;
memset(ol_entries, 0, sizeof(ol_entries));
}
/* ============================================================ */

View File

@ -2584,6 +2584,7 @@ struct ntor_handshake_state_t;
#define ONION_HANDSHAKE_TYPE_TAP 0x0000
#define ONION_HANDSHAKE_TYPE_FAST 0x0001
#define ONION_HANDSHAKE_TYPE_NTOR 0x0002
#define MAX_ONION_HANDSHAKE_TYPE 0x0002
typedef struct {
uint16_t tag;
union {
@ -3000,6 +3001,8 @@ typedef struct origin_circuit_t {
} origin_circuit_t;
struct onion_queue_t;
/** An or_circuit_t holds information needed to implement a circuit at an
* OR. */
typedef struct or_circuit_t {
@ -3013,6 +3016,9 @@ typedef struct or_circuit_t {
* cells to p_chan. NULL if we have no cells pending, or if we're not
* linked to an OR connection. */
struct circuit_t *prev_active_on_p_chan;
/** Pointer to an entry on the onion queue, if this circuit is waiting for a
* chance to give an onionskin to a cpuworker. Used only in onion.c */
struct onion_queue_t *onionqueue_entry;
/** The circuit_id used in the previous (backward) hop of this circuit. */
circid_t p_circ_id;
@ -3496,9 +3502,7 @@ typedef struct {
* and try a new circuit if the stream has been
* waiting for this many seconds. If zero, use
* our default internal timeout schedule. */
int MaxOnionsPending; /**< How many circuit CREATE requests do we allow
* to wait simultaneously before we start dropping
* them? */
int MaxOnionQueueDelay; /**<DOCDOC*/
int NewCircuitPeriod; /**< How long do we use a circuit before building
* a new one? */
int MaxCircuitDirtiness; /**< Never use circs that were first used more than