Add GeoIP database for IPv6 addresses

This commit is contained in:
nils 2012-02-29 17:04:45 -08:00 committed by Linus Nordberg
parent 167363403b
commit abb886014e
8 changed files with 10020 additions and 42 deletions

View File

@ -1458,7 +1458,10 @@ is non-zero):
does on behalf of clients. (Default: 1)
**GeoIPFile** __filename__::
A filename containing GeoIP data, for use with BridgeRecordUsageByCountry.
A filename containing IPv4 GeoIP data, for use with BridgeRecordUsageByCountry.
**GeoIPv6File** __filename__::
A filename containing IPv6 GeoIP data, for use with BridgeRecordUsageByCountry.
**CellStatistics** **0**|**1**::
When this option is enabled, Tor writes statistics on the mean time that

9752
src/config/geoip6 Normal file

File diff suppressed because it is too large Load Diff

View File

@ -243,9 +243,12 @@ static config_var_t _option_vars[] = {
V(FetchV2Networkstatus, BOOL, "0"),
#ifdef _WIN32
V(GeoIPFile, FILENAME, "<default>"),
V(GeoIPv6File, FILENAME, "<default>"),
#else
V(GeoIPFile, FILENAME,
SHARE_DATADIR PATH_SEPARATOR "tor" PATH_SEPARATOR "geoip"),
V(GeoIPv6File, FILENAME,
SHARE_DATADIR PATH_SEPARATOR "tor" PATH_SEPARATOR "geoip6"),
#endif
OBSOLETE("GiveGuardFlagTo_CVE_2011_2768_VulnerableRelays"),
OBSOLETE("Group"),
@ -1524,9 +1527,28 @@ options_act(const or_options_t *old_options)
tor_asprintf(&actual_fname, "%s\\geoip", conf_root);
}
#endif
geoip_load_file(actual_fname, options);
geoip_load_file(AF_INET, actual_fname, options);
tor_free(actual_fname);
}
/* And maybe load geoip ipv6 file */
if (options->GeoIPv6File &&
((!old_options || !opt_streq(old_options->GeoIPv6File, options->GeoIPv6File))
|| !geoip_is_loaded())) {
/* XXXX Don't use this "<default>" junk; make our filename options
* understand prefixes somehow. -NM */
/* XXXX023 Reload GeoIPFile on SIGHUP. -NM */
char *actual_fname = tor_strdup(options->GeoIPv6File);
#ifdef _WIN32
if (!strcmp(actual_fname, "<default>")) {
const char *conf_root = get_windows_conf_root();
tor_free(actual_fname);
tor_asprintf(&actual_fname, "%s\\geoip6", conf_root);
}
#endif
geoip_load_file(AF_INET6, actual_fname, options);
tor_free(actual_fname);
}
if (options->CellStatistics || options->DirReqStatistics ||
options->EntryStatistics || options->ExitPortStatistics ||

View File

@ -28,6 +28,13 @@ typedef struct geoip_ipv4_entry_t {
intptr_t country; /**< An index into geoip_countries */
} geoip_ipv4_entry_t;
/** An entry from the GeoIP IPv6 file: maps an IPv6 range to a country. */
typedef struct geoip_ipv6_entry_t {
struct in6_addr ip_low; /**< The lowest IP in the range, in host order */
struct in6_addr ip_high; /**< The highest IP in the range, in host order */
intptr_t country; /**< An index into geoip_countries */
} geoip_ipv6_entry_t;
/** A per-country record for GeoIP request history. */
typedef struct geoip_country_t {
char countrycode[3];
@ -42,7 +49,7 @@ static smartlist_t *geoip_countries = NULL;
* not found. */
static strmap_t *country_idxplus1_by_lc_code = NULL;
/** A list of all known geoip_ipv4_entry_t, sorted by ip_low. */
static smartlist_t *geoip_ipv4_entries = NULL;
static smartlist_t *geoip_ipv4_entries = NULL, *geoip_ipv6_entries = NULL;
/** SHA1 digest of the GeoIP file to include in extra-info descriptors. */
static char geoip_digest[DIGEST_LEN];
@ -159,6 +166,116 @@ _geoip_ipv4_compare_key_to_entry(const void *_key, const void **_member)
return 0;
}
/** Add an entry to the GeoIP IPv6 table, mapping all IPs between <b>low</b> and
* <b>high</b>, inclusive, to the 2-letter country code <b>country</b>.
*/
static void
geoip_ipv6_add_entry(struct in6_addr low, struct in6_addr high, const char *country)
{
intptr_t idx;
geoip_ipv6_entry_t *ent;
void *_idxplus1;
if (memcmp(&high, &low, sizeof(struct in6_addr)) < 0)
return;
_idxplus1 = strmap_get_lc(country_idxplus1_by_lc_code, country);
if (!_idxplus1) {
geoip_country_t *c = tor_malloc_zero(sizeof(geoip_country_t));
strlcpy(c->countrycode, country, sizeof(c->countrycode));
tor_strlower(c->countrycode);
smartlist_add(geoip_countries, c);
idx = smartlist_len(geoip_countries) - 1;
strmap_set_lc(country_idxplus1_by_lc_code, country, (void*)(idx+1));
} else {
idx = ((uintptr_t)_idxplus1)-1;
}
{
geoip_country_t *c = smartlist_get(geoip_countries, idx);
tor_assert(!strcasecmp(c->countrycode, country));
}
ent = tor_malloc_zero(sizeof(geoip_ipv6_entry_t));
ent->ip_low = low;
ent->ip_high = high;
ent->country = idx;
smartlist_add(geoip_ipv6_entries, ent);
}
/** Add an entry to the GeoIP ipv6 table, parsing it from <b>line</b>. The
* format is as for geoip_ipv6_load_file(). */
/*private*/ int
geoip_ipv6_parse_entry(const char *line)
{
char buf[512];
char *low_str, *high_str, *country;
struct in6_addr low, high;
char *strtok_state;
strlcpy(buf, line, sizeof(buf));
if (!geoip_countries)
init_geoip_countries();
if (!geoip_ipv6_entries)
geoip_ipv6_entries = smartlist_new();
while (TOR_ISSPACE(*line))
++line;
if (*line == '#')
return 0;
low_str = tor_strtok_r(buf, ",", &strtok_state);
if (!low_str)
goto fail;
high_str = tor_strtok_r(NULL, ",", &strtok_state);
if (!high_str)
goto fail;
country = tor_strtok_r(NULL, "\n", &strtok_state);
if (!country)
goto fail;
if (strlen(country) != 2)
goto fail;
if (tor_inet_pton(AF_INET6, low_str, &low) <= 0)
goto fail;
if (tor_inet_pton(AF_INET6, high_str, &high) <= 0)
goto fail;
geoip_ipv6_add_entry(low, high, country);
return 0;
fail:
log_warn(LD_GENERAL, "Unable to parse line from GEOIP IPV6 file: %s",
escaped(line));
return -1;
}
/** Sorting helper: return -1, 1, or 0 based on comparison of two
* geoip_ipv6_entry_t */
static int
_geoip_ipv6_compare_entries(const void **_a, const void **_b)
{
const geoip_ipv6_entry_t *a = *_a, *b = *_b;
return memcmp(&a->ip_low, &b->ip_low, sizeof(struct in6_addr));
}
/** bsearch helper: return -1, 1, or 0 based on comparison of an IPv6 (a pointer
* to a in6_addr in host order) to a geoip_ipv4_entry_t */
static int
_geoip_ipv6_compare_key_to_entry(const void *_key, const void **_member)
{
/* No alignment issue here, since _key really is a pointer to uint32_t */
const struct in6_addr *addr = (struct in6_addr *)_key;
const geoip_ipv6_entry_t *entry = *_member;
if (memcmp(addr, &entry->ip_low, sizeof(struct in6_addr)) < 0)
return -1;
else if (memcmp(addr, &entry->ip_high, sizeof(struct in6_addr)) > 0)
return 1;
else
return 0;
}
/** Return 1 if we should collect geoip stats on bridge users, and
* include them in our extrainfo descriptor. Else return 0. */
int
@ -199,13 +316,15 @@ init_geoip_countries(void)
* with '#' (comments).
*/
int
geoip_load_file(const char *filename, const or_options_t *options)
geoip_load_file(sa_family_t family, const char *filename, const or_options_t *options)
{
FILE *f;
const char *msg = "";
int severity = options_need_geoip_info(options, &msg) ? LOG_WARN : LOG_INFO;
crypto_digest_t *geoip_digest_env = NULL;
clear_geoip_db();
tor_assert(family == AF_INET || family == AF_INET6);
if (!(f = tor_fopen_cloexec(filename, "r"))) {
log_fn(severity, LD_GENERAL, "Failed to open GEOIP file %s. %s",
filename, msg);
@ -213,25 +332,41 @@ geoip_load_file(const char *filename, const or_options_t *options)
}
if (!geoip_countries)
init_geoip_countries();
if (geoip_ipv4_entries) {
SMARTLIST_FOREACH(geoip_ipv4_entries, geoip_ipv4_entry_t *, e, tor_free(e));
smartlist_free(geoip_ipv4_entries);
if (family == AF_INET) {
if (geoip_ipv4_entries) {
SMARTLIST_FOREACH(geoip_ipv4_entries, geoip_ipv4_entry_t *, e, tor_free(e));
smartlist_free(geoip_ipv4_entries);
}
geoip_ipv4_entries = smartlist_new();
} else { /* AF_INET6 */
if (geoip_ipv6_entries) {
SMARTLIST_FOREACH(geoip_ipv6_entries, geoip_ipv6_entry_t *, e, tor_free(e));
smartlist_free(geoip_ipv6_entries);
}
geoip_ipv6_entries = smartlist_new();
}
geoip_ipv4_entries = smartlist_new();
geoip_digest_env = crypto_digest_new();
log_notice(LD_GENERAL, "Parsing GEOIP file %s.", filename);
log_notice(LD_GENERAL, "Parsing GEOIP %s file %s.", (family == AF_INET) ? "ipv4" : "ipv6", filename);
while (!feof(f)) {
char buf[512];
if (fgets(buf, (int)sizeof(buf), f) == NULL)
break;
crypto_digest_add_bytes(geoip_digest_env, buf, strlen(buf));
/* FFFF track full country name. */
geoip_ipv4_parse_entry(buf);
if (family == AF_INET)
geoip_ipv4_parse_entry(buf);
else /* AF_INET6 */
geoip_ipv6_parse_entry(buf);
}
/*XXXX abort and return -1 if no entries/illformed?*/
fclose(f);
smartlist_sort(geoip_ipv4_entries, _geoip_ipv4_compare_entries);
if (family == AF_INET)
smartlist_sort(geoip_ipv4_entries, _geoip_ipv4_compare_entries);
else /* AF_INET6 */
smartlist_sort(geoip_ipv6_entries, _geoip_ipv6_compare_entries);
/* Okay, now we need to maybe change our mind about what is in which
* country. */
@ -261,6 +396,18 @@ geoip_get_country_by_ipv4(uint32_t ipaddr)
return ent ? (int)ent->country : 0;
}
int
geoip_get_country_by_ipv6(const struct in6_addr *addr)
{
geoip_ipv6_entry_t *ent;
if (!geoip_ipv6_entries)
return -1;
ent = smartlist_bsearch(geoip_ipv6_entries, addr, _geoip_ipv6_compare_key_to_entry);
return ent ? (int)ent->country : 0;
}
/** Given an IP address, return a number representing the country to which
* that address belongs, -1 for "No geoip information available", or 0 for
* the 'unknown country'. The return value will always be less than
@ -269,11 +416,13 @@ geoip_get_country_by_ipv4(uint32_t ipaddr)
int
geoip_get_country_by_addr(const tor_addr_t *addr)
{
if (tor_addr_family(addr) != AF_INET) {
/*XXXX IP6 support ipv6 geoip.*/
if (tor_addr_family(addr) == AF_INET) {
return geoip_get_country_by_ipv4(tor_addr_to_ipv4h(addr));
} else if (tor_addr_family(addr) == AF_INET6) {
return geoip_get_country_by_ipv6(tor_addr_to_in6(addr));
} else {
return -1;
}
return geoip_get_country_by_ipv4(tor_addr_to_ipv4h(addr));
}
/** Return the number of countries recognized by the GeoIP database. */
@ -301,7 +450,7 @@ geoip_get_country_name(country_t num)
int
geoip_is_loaded(void)
{
return geoip_countries != NULL && geoip_ipv4_entries != NULL;
return geoip_countries != NULL && (geoip_ipv4_entries != NULL || geoip_ipv6_entries != NULL);
}
/** Return the hex-encoded SHA1 digest of the loaded GeoIP file. The
@ -818,7 +967,8 @@ geoip_get_dirreq_history(geoip_client_action_t action,
* bridge, directory server, or entry guard. The entry format is cc=num
* where num is the number of IPs we've seen connecting from that country,
* and cc is a lowercased country code. Returns NULL if we don't want
* to export geoip data yet. */
* to export geoip data yet. This counts both IPv4 and IPv6 clients
* since they're in the same clientmap list. */
char *
geoip_get_client_history(geoip_client_action_t action)
{
@ -1469,9 +1619,14 @@ clear_geoip_db(void)
SMARTLIST_FOREACH(geoip_ipv4_entries, geoip_ipv4_entry_t *, ent, tor_free(ent));
smartlist_free(geoip_ipv4_entries);
}
if (geoip_ipv6_entries) {
SMARTLIST_FOREACH(geoip_ipv6_entries, geoip_ipv6_entry_t *, ent, tor_free(ent));
smartlist_free(geoip_ipv6_entries);
}
geoip_countries = NULL;
country_idxplus1_by_lc_code = NULL;
geoip_ipv4_entries = NULL;
geoip_ipv6_entries = NULL;
}
/** Release all storage held in this file. */

View File

@ -14,10 +14,12 @@
#ifdef GEOIP_PRIVATE
int geoip_ipv4_parse_entry(const char *line);
int geoip_ipv6_parse_entry(const char *line);
int geoip_get_country_by_ipv4(uint32_t ipaddr);
int geoip_get_country_by_ipv6(const struct in6_addr *addr);
#endif
int should_record_bridge_info(const or_options_t *options);
int geoip_load_file(const char *filename, const or_options_t *options);
int geoip_get_country_by_ipv4(uint32_t ipaddr);
int geoip_load_file(sa_family_t family, const char *filename, const or_options_t *options);
int geoip_get_country_by_addr(const tor_addr_t *addr);
int geoip_get_n_countries(void);
const char *geoip_get_country_name(country_t num);

View File

@ -3657,8 +3657,9 @@ typedef struct {
* the bridge authority guess which countries have blocked access to us. */
int BridgeRecordUsageByCountry;
/** Optionally, a file with GeoIP data. */
/** Optionally, IPv4 and IPv6 GeoIP data */
char *GeoIPFile;
char *GeoIPv6File;
/** If true, SIGHUP should reload the torrc. Sometimes controllers want
* to make this false. */

View File

@ -319,9 +319,13 @@ addr_is_in_cc_list(uint32_t addr, const smartlist_t *cc_list)
{
country_t country;
const char *name;
tor_addr_t tar;
if (!cc_list)
return 0;
country = geoip_get_country_by_ipv4(addr);
/* XXX: ipv4 specific */
tor_addr_from_ipv4h(&tar, addr);
country = geoip_get_country_by_addr(&tar);
name = geoip_get_country_name(country);
return smartlist_string_isin_case(cc_list, name);
}

View File

@ -1455,6 +1455,7 @@ test_geoip(void)
"entry-stats-end 2010-08-12 13:27:30 (86400 s)\n"
"entry-ips \n";
tor_addr_t addr;
struct in6_addr in6;
/* Populate the DB a bit. Add these in order, since we can't do the final
* 'sort' step. These aren't very good IP addresses, but they're perfectly
@ -1466,38 +1467,73 @@ test_geoip(void)
test_eq(0, geoip_ipv4_parse_entry("\"150\",\"190\",\"XY\""));
test_eq(0, geoip_ipv4_parse_entry("\"200\",\"250\",\"AB\""));
/* Populate the IPv6 DB equivalently with fake IPs in the same range */
test_eq(0, geoip_ipv6_parse_entry("::a,::32,AB"));
test_eq(0, geoip_ipv6_parse_entry("::34,::5a,XY"));
test_eq(0, geoip_ipv6_parse_entry("::5f,::64,AB"));
test_eq(0, geoip_ipv6_parse_entry("::69,::8c,ZZ"));
test_eq(0, geoip_ipv6_parse_entry("::96,::be,XY"));
test_eq(0, geoip_ipv6_parse_entry("::c8,::fa,AB"));
/* We should have 4 countries: ??, ab, xy, zz. */
test_eq(4, geoip_get_n_countries());
memset(&in6, 0, sizeof(in6));
/* Make sure that country ID actually works. */
#define NAMEFOR(x) geoip_get_country_name(geoip_get_country_by_ipv4(x))
test_streq("??", NAMEFOR(3));
#define SET_TEST_IPV6(i) in6.s6_addr32[3] = htonl((uint32_t) i)
#define CHECK_COUNTRY(country, val) do { \
/* test ipv4 country lookup */ \
test_streq(country, geoip_get_country_name(geoip_get_country_by_ipv4(val))); \
/* test ipv6 country lookup */ \
SET_TEST_IPV6(val); \
test_streq(country, geoip_get_country_name(geoip_get_country_by_ipv6(&in6))); \
} while(0)
CHECK_COUNTRY("??", 3);
CHECK_COUNTRY("ab", 32);
CHECK_COUNTRY("??", 5);
CHECK_COUNTRY("??", 51);
CHECK_COUNTRY("xy", 150);
CHECK_COUNTRY("xy", 190);
CHECK_COUNTRY("??", 2000);
test_eq(0, geoip_get_country_by_ipv4(3));
test_streq("ab", NAMEFOR(32));
test_streq("??", NAMEFOR(5));
test_streq("??", NAMEFOR(51));
test_streq("xy", NAMEFOR(150));
test_streq("xy", NAMEFOR(190));
test_streq("??", NAMEFOR(2000));
#undef NAMEFOR
SET_TEST_IPV6(3);
test_eq(0, geoip_get_country_by_ipv6(&in6));
#undef CHECK_COUNTRY
/* Record odd numbered fake-IPs using ipv6, even numbered fake-IPs
* using ipv4. Since our fake geoip database is the same between
* ipv4 and ipv6, we should get the same result no matter which
* address family we pick for each IP. */
#define SET_TEST_ADDRESS(i) do { \
if ((i) & 1) { \
SET_TEST_IPV6(i); \
tor_addr_from_in6(&addr, &in6); \
} else { \
tor_addr_from_ipv4h(&addr, (uint32_t) i); \
} \
} while(0)
get_options_mutable()->BridgeRelay = 1;
get_options_mutable()->BridgeRecordUsageByCountry = 1;
/* Put 9 observations in AB... */
for (i=32; i < 40; ++i) {
tor_addr_from_ipv4h(&addr, (uint32_t) i);
SET_TEST_ADDRESS(i);
geoip_note_client_seen(GEOIP_CLIENT_CONNECT, &addr, now-7200);
}
tor_addr_from_ipv4h(&addr, (uint32_t) 225);
SET_TEST_ADDRESS(i);
geoip_note_client_seen(GEOIP_CLIENT_CONNECT, &addr, now-7200);
/* and 3 observations in XY, several times. */
for (j=0; j < 10; ++j)
for (i=52; i < 55; ++i) {
tor_addr_from_ipv4h(&addr, (uint32_t) i);
SET_TEST_ADDRESS(i);
geoip_note_client_seen(GEOIP_CLIENT_CONNECT, &addr, now-3600);
}
/* and 17 observations in ZZ... */
for (i=110; i < 127; ++i) {
tor_addr_from_ipv4h(&addr, (uint32_t) i);
SET_TEST_ADDRESS(i);
geoip_note_client_seen(GEOIP_CLIENT_CONNECT, &addr, now);
}
s = geoip_get_client_history(GEOIP_CLIENT_CONNECT);
@ -1538,7 +1574,7 @@ test_geoip(void)
/* Start testing dirreq statistics by making sure that we don't collect
* dirreq stats without initializing them. */
tor_addr_from_ipv4h(&addr, (uint32_t) 100);
SET_TEST_ADDRESS(100);
geoip_note_client_seen(GEOIP_CLIENT_NETWORKSTATUS, &addr, now);
s = geoip_format_dirreq_stats(now + 86400);
test_assert(!s);
@ -1546,7 +1582,7 @@ test_geoip(void)
/* Initialize stats, note one connecting client, and generate the
* dirreq-stats history string. */
geoip_dirreq_stats_init(now);
tor_addr_from_ipv4h(&addr, (uint32_t) 100);
SET_TEST_ADDRESS(100);
geoip_note_client_seen(GEOIP_CLIENT_NETWORKSTATUS, &addr, now);
s = geoip_format_dirreq_stats(now + 86400);
test_streq(dirreq_stats_1, s);
@ -1555,7 +1591,7 @@ test_geoip(void)
/* Stop collecting stats, add another connecting client, and ensure we
* don't generate a history string. */
geoip_dirreq_stats_term();
tor_addr_from_ipv4h(&addr, (uint32_t) 101);
SET_TEST_ADDRESS(101);
geoip_note_client_seen(GEOIP_CLIENT_NETWORKSTATUS, &addr, now);
s = geoip_format_dirreq_stats(now + 86400);
test_assert(!s);
@ -1563,7 +1599,7 @@ test_geoip(void)
/* Re-start stats, add a connecting client, reset stats, and make sure
* that we get an all empty history string. */
geoip_dirreq_stats_init(now);
tor_addr_from_ipv4h(&addr, (uint32_t) 100);
SET_TEST_ADDRESS(100);
geoip_note_client_seen(GEOIP_CLIENT_NETWORKSTATUS, &addr, now);
geoip_reset_dirreq_stats(now);
s = geoip_format_dirreq_stats(now + 86400);
@ -1591,7 +1627,7 @@ test_geoip(void)
/* Start testing entry statistics by making sure that we don't collect
* anything without initializing entry stats. */
tor_addr_from_ipv4h(&addr, (uint32_t) 100);
SET_TEST_ADDRESS(100);
geoip_note_client_seen(GEOIP_CLIENT_CONNECT, &addr, now);
s = geoip_format_entry_stats(now + 86400);
test_assert(!s);
@ -1599,7 +1635,7 @@ test_geoip(void)
/* Initialize stats, note one connecting client, and generate the
* entry-stats history string. */
geoip_entry_stats_init(now);
tor_addr_from_ipv4h(&addr, (uint32_t) 100);
SET_TEST_ADDRESS(100);
geoip_note_client_seen(GEOIP_CLIENT_CONNECT, &addr, now);
s = geoip_format_entry_stats(now + 86400);
test_streq(entry_stats_1, s);
@ -1608,7 +1644,7 @@ test_geoip(void)
/* Stop collecting stats, add another connecting client, and ensure we
* don't generate a history string. */
geoip_entry_stats_term();
tor_addr_from_ipv4h(&addr, (uint32_t) 101);
SET_TEST_ADDRESS(101);
geoip_note_client_seen(GEOIP_CLIENT_CONNECT, &addr, now);
s = geoip_format_entry_stats(now + 86400);
test_assert(!s);
@ -1616,13 +1652,16 @@ test_geoip(void)
/* Re-start stats, add a connecting client, reset stats, and make sure
* that we get an all empty history string. */
geoip_entry_stats_init(now);
tor_addr_from_ipv4h(&addr, (uint32_t) 100);
SET_TEST_ADDRESS(100);
geoip_note_client_seen(GEOIP_CLIENT_CONNECT, &addr, now);
geoip_reset_entry_stats(now);
s = geoip_format_entry_stats(now + 86400);
test_streq(entry_stats_2, s);
tor_free(s);
#undef SET_TEST_ADDRESS
#undef SET_TEST_IPV6
/* Stop collecting entry statistics. */
geoip_entry_stats_term();
get_options_mutable()->EntryStatistics = 0;