Change the way how directories that are configured with --enable-geoip-stats write geoip stats to disk.

- Write geoip stats to disk every 24 hours, not every hour.
- Remove configuration options and define reasonable defaults.
- Clear history of client requests every 24 hours (which wasn't done at
  all before).
This commit is contained in:
Karsten Loesing 2009-05-27 17:52:46 +02:00
parent bf7e5d6553
commit 54c97c9133
6 changed files with 45 additions and 28 deletions

View File

@ -6,6 +6,8 @@ Changes in version 0.2.2.1-alpha - 2009-??-??
- The memarea code now uses a sentinel value at the end of each area
to make sure nothing writes beyond the end of an area. This might
help debug some conceivable causes of bug 930.
- Directories that are configured with the --enable-geoip-stats flag
now write their GeoIP stats to disk exactly every 24 hours.
o Deprecated and removed features:
- The controller no longer accepts the old obsolete "addr-mappings/"

View File

@ -187,10 +187,10 @@ static config_var_t _option_vars[] = {
V(DirPortFrontPage, FILENAME, NULL),
OBSOLETE("DirPostPeriod"),
#ifdef ENABLE_GEOIP_STATS
V(DirRecordUsageByCountry, BOOL, "0"),
V(DirRecordUsageGranularity, UINT, "4"),
V(DirRecordUsageRetainIPs, INTERVAL, "14 days"),
V(DirRecordUsageSaveInterval, INTERVAL, "6 hours"),
OBSOLETE("DirRecordUsageByCountry"),
OBSOLETE("DirRecordUsageGranularity"),
OBSOLETE("DirRecordUsageRetainIPs"),
OBSOLETE("DirRecordUsageSaveInterval"),
#endif
VAR("DirServer", LINELIST, DirServers, NULL),
V(DNSPort, UINT, "0"),

View File

@ -12,6 +12,7 @@
#include "ht.h"
static void clear_geoip_db(void);
static void dump_geoip_stats(void);
/** An entry from the GeoIP file: maps an IP range to a country. */
typedef struct geoip_entry_t {
@ -21,9 +22,9 @@ typedef struct geoip_entry_t {
} geoip_entry_t;
/** For how many periods should we remember per-country request history? */
#define REQUEST_HIST_LEN 3
#define REQUEST_HIST_LEN 1
/** How long are the periods for which we should remember request history? */
#define REQUEST_HIST_PERIOD (8*60*60)
#define REQUEST_HIST_PERIOD (24*60*60)
/** A per-country record for GeoIP request history. */
typedef struct geoip_country_t {
@ -313,8 +314,7 @@ geoip_note_client_seen(geoip_client_action_t action,
#ifndef ENABLE_GEOIP_STATS
return;
#else
if (options->BridgeRelay || options->BridgeAuthoritativeDir ||
!options->DirRecordUsageByCountry)
if (options->BridgeRelay || options->BridgeAuthoritativeDir)
return;
#endif
}
@ -327,6 +327,14 @@ geoip_note_client_seen(geoip_client_action_t action,
current_request_period_starts = now;
break;
}
/* Also discard all items in the client history that are too old.
* (This only works here because bridge and directory stats are
* independent. Otherwise, we'd only want to discard those items
* with action GEOIP_CLIENT_NETWORKSTATUS{_V2}.) */
geoip_remove_old_clients(current_request_period_starts);
/* Before rotating, write the current stats to disk. */
dump_geoip_stats();
/* Now rotate request period */
SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, {
memmove(&c->n_v2_ns_requests[0], &c->n_v2_ns_requests[1],
sizeof(uint32_t)*(REQUEST_HIST_LEN-1));
@ -458,9 +466,13 @@ char *
geoip_get_client_history(time_t now, geoip_client_action_t action)
{
char *result = NULL;
unsigned min_observation_time = GEOIP_MIN_OBSERVATION_TIME;
#ifdef ENABLE_GEOIP_STATS
min_observation_time = DIR_RECORD_USAGE_MIN_OBSERVATION_TIME;
#endif
if (!geoip_is_loaded())
return NULL;
if (client_history_starts < (now - GEOIP_MIN_OBSERVATION_TIME)) {
if (client_history_starts < (now - min_observation_time)) {
char buf[32];
smartlist_t *chunks = NULL;
smartlist_t *entries = NULL;
@ -471,8 +483,7 @@ geoip_get_client_history(time_t now, geoip_client_action_t action)
unsigned total = 0;
unsigned granularity = IP_GRANULARITY;
#ifdef ENABLE_GEOIP_STATS
if (get_options()->DirRecordUsageByCountry)
granularity = get_options()->DirRecordUsageGranularity;
granularity = DIR_RECORD_USAGE_GRANULARITY;
#endif
HT_FOREACH(ent, clientmap, &client_history) {
int country;
@ -538,12 +549,13 @@ geoip_get_request_history(time_t now, geoip_client_action_t action)
smartlist_t *entries, *strings;
char *result;
unsigned granularity = IP_GRANULARITY;
unsigned min_observation_time = GEOIP_MIN_OBSERVATION_TIME;
#ifdef ENABLE_GEOIP_STATS
if (get_options()->DirRecordUsageByCountry)
granularity = get_options()->DirRecordUsageGranularity;
granularity = DIR_RECORD_USAGE_GRANULARITY;
min_observation_time = DIR_RECORD_USAGE_MIN_OBSERVATION_TIME;
#endif
if (client_history_starts >= (now - GEOIP_MIN_OBSERVATION_TIME))
if (client_history_starts >= (now - min_observation_time))
return NULL;
if (action != GEOIP_CLIENT_NETWORKSTATUS &&
action != GEOIP_CLIENT_NETWORKSTATUS_V2)
@ -584,7 +596,7 @@ geoip_get_request_history(time_t now, geoip_client_action_t action)
}
/** Store all our geoip statistics into $DATADIR/geoip-stats. */
void
static void
dump_geoip_stats(void)
{
#ifdef ENABLE_GEOIP_STATS
@ -601,7 +613,7 @@ dump_geoip_stats(void)
data_v3 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS);
format_iso_time(since, geoip_get_history_start());
format_iso_time(written, now);
out = start_writing_to_stdio_file(filename, OPEN_FLAGS_REPLACE,
out = start_writing_to_stdio_file(filename, OPEN_FLAGS_APPEND,
0600, &open_file);
if (!out)
goto done;

View File

@ -807,7 +807,6 @@ run_scheduled_events(time_t now)
static time_t time_to_clean_caches = 0;
static time_t time_to_recheck_bandwidth = 0;
static time_t time_to_check_for_expired_networkstatus = 0;
static time_t time_to_dump_geoip_stats = 0;
static time_t time_to_retry_dns_init = 0;
or_options_t *options = get_options();
int i;
@ -935,13 +934,6 @@ run_scheduled_events(time_t now)
time_to_check_for_expired_networkstatus = now + CHECK_EXPIRED_NS_INTERVAL;
}
if (time_to_dump_geoip_stats < now) {
#define DUMP_GEOIP_STATS_INTERVAL (60*60);
if (time_to_dump_geoip_stats)
dump_geoip_stats();
time_to_dump_geoip_stats = now + DUMP_GEOIP_STATS_INTERVAL;
}
/* Remove old information from rephist and the rend cache. */
if (time_to_clean_caches < now) {
rep_history_clean(now - options->RephistTrackTime);

View File

@ -2528,7 +2528,7 @@ typedef struct {
* the bridge authority guess which countries have blocked access to us. */
int BridgeRecordUsageByCountry;
#ifdef ENABLE_GEOIP_STATS
#if 0
/** If true, and Tor is built with GEOIP_STATS support, and we're a
* directory, record how many directory requests we get from each country. */
int DirRecordUsageByCountry;
@ -3583,6 +3583,15 @@ int dnsserv_launch_request(const char *name, int is_reverse);
/********************************* geoip.c **************************/
/** Round all GeoIP results to the next multiple of this value, to avoid
* leaking information. */
#define DIR_RECORD_USAGE_GRANULARITY 8
/** Time interval: Flush geoip data to disk this often. */
#define DIR_RECORD_USAGE_RETAIN_IPS (24*60*60)
/** How long do we have to have observed per-country request history before
* we are willing to talk about it? */
#define DIR_RECORD_USAGE_MIN_OBSERVATION_TIME (24*60*60)
#ifdef GEOIP_PRIVATE
int geoip_parse_entry(const char *line);
#endif
@ -3614,7 +3623,6 @@ char *geoip_get_request_history(time_t now, geoip_client_action_t action);
int getinfo_helper_geoip(control_connection_t *control_conn,
const char *question, char **answer);
void geoip_free_all(void);
void dump_geoip_stats(void);
/********************************* hibernate.c **********************/

View File

@ -1915,10 +1915,13 @@ extrainfo_get_client_geoip_summary(time_t now)
static time_t last_purged_at = 0;
int geoip_purge_interval = 48*60*60;
#ifdef ENABLE_GEOIP_STATS
if (get_options()->DirRecordUsageByCountry)
geoip_purge_interval = get_options()->DirRecordUsageRetainIPs;
geoip_purge_interval = DIR_RECORD_USAGE_RETAIN_IPS;
#endif
if (now > last_purged_at+geoip_purge_interval) {
/* (Note that this also discards items in the client history with
* action GEOIP_CLIENT_NETWORKSTATUS{_V2}, which doesn't matter
* because bridge and directory stats are independent. Keep in mind
* for future extensions, though.) */
geoip_remove_old_clients(now-geoip_purge_interval);
last_purged_at = now;
}