tor/src/ext/ht.h

621 lines
34 KiB
C

/* Copyright (c) 2002, Christopher Clark.
* Copyright (c) 2005-2006, Nick Mathewson.
* Copyright (c) 2007-2017, The Tor Project, Inc. */
/* See license at end. */
/* Based on ideas by Christopher Clark and interfaces from Niels Provos. */
/*
These macros provide an intrustive implementation for a typesafe chaining
hash table, loosely based on the BSD tree.h and queue.h macros. Here's
how to use them.
First, pick a the structure that you'll be storing in the hashtable. Let's
say that's "struct dinosaur". To this structure, you add an HT_ENTRY()
member, as such:
struct dinosaur {
HT_ENTRY(dinosaur) node; // The name inside the () must match the
// struct.
// These are just fields from the dinosaur structure...
long dinosaur_id;
char *name;
long age;
int is_ornithischian;
int is_herbivorous;
};
You can declare the hashtable itself as:
HT_HEAD(dinosaur_ht, dinosaur);
This declares a new 'struct dinosaur_ht' type.
Now you need to declare two functions to help implement the hashtable: one
compares two dinosaurs for equality, and one computes the hash of a
dinosaur. Let's say that two dinosaurs are equal if they have the same ID
and name.
int
dinosaurs_equal(const struct dinosaur *d1, const struct dinosaur *d2)
{
return d1->dinosaur_id == d2->dinosaur_id &&
0 == strcmp(d1->name, d2->name);
}
unsigned
dinosaur_hash(const struct dinosaur *d)
{
// This is a very bad hash function. Use siphash24g instead.
return (d->dinosaur_id + d->name[0] ) * 1337 + d->name[1] * 1337;
}
Now you'll need to declare the functions that manipulate the hash table.
To do this, you put this declaration either in a header file, or inside
a regular module, depending on what visibility you want.
HT_PROTOTYPE(dinosaur_ht, // The name of the hashtable struct
dinosaur, // The name of the element struct,
node, // The name of HT_ENTRY member
dinosaur_hash, dinosaurs_equal);
Later, inside a C function, you use this macro to declare the hashtable
functions.
HT_GENERATE2(dinosaur_ht, dinosaur, node, dinosaur_hash, dinosaurs_equal,
0.6, tor_reallocarray, tor_free_);
Note the use of tor_free_, not tor_free. The 0.6 is magic.
Now you can use the hashtable! You can initialize one with
struct dinosaur_ht my_dinos = HT_INITIALIZER();
Or create one in core with
struct dinosaur_ht *dinos = tor_malloc(sizeof(dinosaur_ht));
HT_INIT(dinosaur_ht, dinos);
To the hashtable, you use the HT_FOO(dinosaur_ht, ...) macros. For
example, to put new_dino into dinos, you say:
HT_REPLACE(dinosaur_ht, dinos, new_dino);
If you're searching for an element, you need to use a dummy 'key' element in
the search. For example.
struct dinosaur dino_key;
dino_key.dinosaur_id = 12345;
dino_key.name = tor_strdup("Atrociraptor");
struct dinosaur *found = HT_FIND(dinosaurs_ht, dinos, &dino_key);
Have fun with your hash table!
*/
#ifndef HT_H_INCLUDED_
#define HT_H_INCLUDED_
#define HT_HEAD(name, type) \
struct name { \
/* The hash table itself. */ \
struct type **hth_table; \
/* How long is the hash table? */ \
unsigned hth_table_length; \
/* How many elements does the table contain? */ \
unsigned hth_n_entries; \
/* How many elements will we allow in the table before resizing it? */ \
unsigned hth_load_limit; \
/* Position of hth_table_length in the primes table. */ \
int hth_prime_idx; \
}
#define HT_INITIALIZER() \
{ NULL, 0, 0, 0, -1 }
#ifdef HT_NO_CACHE_HASH_VALUES
#define HT_ENTRY(type) \
struct { \
struct type *hte_next; \
}
#else
#define HT_ENTRY(type) \
struct { \
struct type *hte_next; \
unsigned hte_hash; \
}
#endif
/* || 0 is for -Wparentheses-equality (-Wall?) appeasement under clang */
#define HT_EMPTY(head) \
(((head)->hth_n_entries == 0) || 0)
/* How many elements in 'head'? */
#define HT_SIZE(head) \
((head)->hth_n_entries)
/* Return memory usage for a hashtable (not counting the entries themselves) */
#define HT_MEM_USAGE(head) \
(sizeof(*head) + (head)->hth_table_length * sizeof(void*))
#define HT_FIND(name, head, elm) name##_HT_FIND((head), (elm))
#define HT_INSERT(name, head, elm) name##_HT_INSERT((head), (elm))
#define HT_REPLACE(name, head, elm) name##_HT_REPLACE((head), (elm))
#define HT_REMOVE(name, head, elm) name##_HT_REMOVE((head), (elm))
#define HT_START(name, head) name##_HT_START(head)
#define HT_NEXT(name, head, elm) name##_HT_NEXT((head), (elm))
#define HT_NEXT_RMV(name, head, elm) name##_HT_NEXT_RMV((head), (elm))
#define HT_CLEAR(name, head) name##_HT_CLEAR(head)
#define HT_INIT(name, head) name##_HT_INIT(head)
#define HT_REP_IS_BAD_(name, head) name##_HT_REP_IS_BAD_(head)
#define HT_FOREACH_FN(name, head, fn, data) \
name##_HT_FOREACH_FN((head), (fn), (data))
/* Helper: */
static inline unsigned
ht_improve_hash(unsigned h)
{
/* Aim to protect against poor hash functions by adding logic here
* - logic taken from java 1.4 hashtable source */
h += ~(h << 9);
h ^= ((h >> 14) | (h << 18)); /* >>> */
h += (h << 4);
h ^= ((h >> 10) | (h << 22)); /* >>> */
return h;
}
#if 0
/** Basic string hash function, from Java standard String.hashCode(). */
static inline unsigned
ht_string_hash(const char *s)
{
unsigned h = 0;
int m = 1;
while (*s) {
h += ((signed char)*s++)*m;
m = (m<<5)-1; /* m *= 31 */
}
return h;
}
#endif
#if 0
/** Basic string hash function, from Python's str.__hash__() */
static inline unsigned
ht_string_hash(const char *s)
{
unsigned h;
const unsigned char *cp = (const unsigned char *)s;
h = *cp << 7;
while (*cp) {
h = (1000003*h) ^ *cp++;
}
/* This conversion truncates the length of the string, but that's ok. */
h ^= (unsigned)(cp-(const unsigned char*)s);
return h;
}
#endif
#ifndef HT_NO_CACHE_HASH_VALUES
#define HT_SET_HASH_(elm, field, hashfn) \
do { (elm)->field.hte_hash = hashfn(elm); } while (0)
#define HT_SET_HASHVAL_(elm, field, val) \
do { (elm)->field.hte_hash = (val); } while (0)
#define HT_ELT_HASH_(elm, field, hashfn) \
((elm)->field.hte_hash)
#else
#define HT_SET_HASH_(elm, field, hashfn) \
((void)0)
#define HT_ELT_HASH_(elm, field, hashfn) \
(hashfn(elm))
#define HT_SET_HASHVAL_(elm, field, val) \
((void)0)
#endif
#define HT_BUCKET_NUM_(head, field, elm, hashfn) \
(HT_ELT_HASH_(elm,field,hashfn) % head->hth_table_length)
/* Helper: alias for the bucket containing 'elm'. */
#define HT_BUCKET_(head, field, elm, hashfn) \
((head)->hth_table[HT_BUCKET_NUM_(head, field, elm, hashfn)])
#define HT_FOREACH(x, name, head) \
for ((x) = HT_START(name, head); \
(x) != NULL; \
(x) = HT_NEXT(name, head, x))
#ifndef HT_NDEBUG
#define HT_ASSERT_(x) tor_assert(x)
#else
#define HT_ASSERT_(x) (void)0
#endif
#define HT_PROTOTYPE(name, type, field, hashfn, eqfn) \
int name##_HT_GROW(struct name *ht, unsigned min_capacity); \
void name##_HT_CLEAR(struct name *ht); \
int name##_HT_REP_IS_BAD_(const struct name *ht); \
static inline void \
name##_HT_INIT(struct name *head) { \
head->hth_table_length = 0; \
head->hth_table = NULL; \
head->hth_n_entries = 0; \
head->hth_load_limit = 0; \
head->hth_prime_idx = -1; \
} \
/* Helper: returns a pointer to the right location in the table \
* 'head' to find or insert the element 'elm'. */ \
static inline struct type ** \
name##_HT_FIND_P_(struct name *head, struct type *elm) \
{ \
struct type **p; \
if (!head->hth_table) \
return NULL; \
p = &HT_BUCKET_(head, field, elm, hashfn); \
while (*p) { \
if (eqfn(*p, elm)) \
return p; \
p = &(*p)->field.hte_next; \
} \
return p; \
} \
/* Return a pointer to the element in the table 'head' matching 'elm', \
* or NULL if no such element exists */ \
ATTR_UNUSED static inline struct type * \
name##_HT_FIND(const struct name *head, struct type *elm) \
{ \
struct type **p; \
struct name *h = (struct name *) head; \
HT_SET_HASH_(elm, field, hashfn); \
p = name##_HT_FIND_P_(h, elm); \
return p ? *p : NULL; \
} \
/* Insert the element 'elm' into the table 'head'. Do not call this \
* function if the table might already contain a matching element. */ \
ATTR_UNUSED static inline void \
name##_HT_INSERT(struct name *head, struct type *elm) \
{ \
struct type **p; \
if (!head->hth_table || head->hth_n_entries >= head->hth_load_limit) \
name##_HT_GROW(head, head->hth_n_entries+1); \
++head->hth_n_entries; \
HT_SET_HASH_(elm, field, hashfn); \
p = &HT_BUCKET_(head, field, elm, hashfn); \
elm->field.hte_next = *p; \
*p = elm; \
} \
/* Insert the element 'elm' into the table 'head'. If there already \
* a matching element in the table, replace that element and return \
* it. */ \
ATTR_UNUSED static inline struct type * \
name##_HT_REPLACE(struct name *head, struct type *elm) \
{ \
struct type **p, *r; \
if (!head->hth_table || head->hth_n_entries >= head->hth_load_limit) \
name##_HT_GROW(head, head->hth_n_entries+1); \
HT_SET_HASH_(elm, field, hashfn); \
p = name##_HT_FIND_P_(head, elm); \
HT_ASSERT_(p != NULL); /* this holds because we called HT_GROW */ \
r = *p; \
*p = elm; \
if (r && (r!=elm)) { \
elm->field.hte_next = r->field.hte_next; \
r->field.hte_next = NULL; \
return r; \
} else { \
++head->hth_n_entries; \
return NULL; \
} \
} \
/* Remove any element matching 'elm' from the table 'head'. If such \
* an element is found, return it; otherwise return NULL. */ \
ATTR_UNUSED static inline struct type * \
name##_HT_REMOVE(struct name *head, struct type *elm) \
{ \
struct type **p, *r; \
HT_SET_HASH_(elm, field, hashfn); \
p = name##_HT_FIND_P_(head,elm); \
if (!p || !*p) \
return NULL; \
r = *p; \
*p = r->field.hte_next; \
r->field.hte_next = NULL; \
--head->hth_n_entries; \
return r; \
} \
/* Invoke the function 'fn' on every element of the table 'head', \
* using 'data' as its second argument. If the function returns \
* nonzero, remove the most recently examined element before invoking \
* the function again. */ \
ATTR_UNUSED static inline void \
name##_HT_FOREACH_FN(struct name *head, \
int (*fn)(struct type *, void *), \
void *data) \
{ \
unsigned idx; \
struct type **p, **nextp, *next; \
if (!head->hth_table) \
return; \
for (idx=0; idx < head->hth_table_length; ++idx) { \
p = &head->hth_table[idx]; \
while (*p) { \
nextp = &(*p)->field.hte_next; \
next = *nextp; \
if (fn(*p, data)) { \
--head->hth_n_entries; \
*p = next; \
} else { \
p = nextp; \
} \
} \
} \
} \
/* Return a pointer to the first element in the table 'head', under \
* an arbitrary order. This order is stable under remove operations, \
* but not under others. If the table is empty, return NULL. */ \
ATTR_UNUSED static inline struct type ** \
name##_HT_START(struct name *head) \
{ \
unsigned b = 0; \
while (b < head->hth_table_length) { \
if (head->hth_table[b]) { \
HT_ASSERT_(b == \
HT_BUCKET_NUM_(head,field,head->hth_table[b],hashfn)); \
return &head->hth_table[b]; \
} \
++b; \
} \
return NULL; \
} \
/* Return the next element in 'head' after 'elm', under the arbitrary \
* order used by HT_START. If there are no more elements, return \
* NULL. If 'elm' is to be removed from the table, you must call \
* this function for the next value before you remove it. \
*/ \
ATTR_UNUSED static inline struct type ** \
name##_HT_NEXT(struct name *head, struct type **elm) \
{ \
if ((*elm)->field.hte_next) { \
HT_ASSERT_(HT_BUCKET_NUM_(head,field,*elm,hashfn) == \
HT_BUCKET_NUM_(head,field,(*elm)->field.hte_next,hashfn)); \
return &(*elm)->field.hte_next; \
} else { \
unsigned b = HT_BUCKET_NUM_(head,field,*elm,hashfn)+1; \
while (b < head->hth_table_length) { \
if (head->hth_table[b]) { \
HT_ASSERT_(b == \
HT_BUCKET_NUM_(head,field,head->hth_table[b],hashfn)); \
return &head->hth_table[b]; \
} \
++b; \
} \
return NULL; \
} \
} \
ATTR_UNUSED static inline struct type ** \
name##_HT_NEXT_RMV(struct name *head, struct type **elm) \
{ \
unsigned h = HT_ELT_HASH_(*elm, field, hashfn); \
*elm = (*elm)->field.hte_next; \
--head->hth_n_entries; \
if (*elm) { \
return elm; \
} else { \
unsigned b = (h % head->hth_table_length)+1; \
while (b < head->hth_table_length) { \
if (head->hth_table[b]) \
return &head->hth_table[b]; \
++b; \
} \
return NULL; \
} \
}
#define HT_GENERATE2(name, type, field, hashfn, eqfn, load, reallocarrayfn, \
freefn) \
/* Primes that aren't too far from powers of two. We stop at */ \
/* P=402653189 because P*sizeof(void*) is less than SSIZE_MAX */ \
/* even on a 32-bit platform. */ \
static unsigned name##_PRIMES[] = { \
53, 97, 193, 389, \
769, 1543, 3079, 6151, \
12289, 24593, 49157, 98317, \
196613, 393241, 786433, 1572869, \
3145739, 6291469, 12582917, 25165843, \
50331653, 100663319, 201326611, 402653189 \
}; \
static unsigned name##_N_PRIMES = \
(unsigned)(sizeof(name##_PRIMES)/sizeof(name##_PRIMES[0])); \
/* Expand the internal table of 'head' until it is large enough to \
* hold 'size' elements. Return 0 on success, -1 on allocation \
* failure. */ \
int \
name##_HT_GROW(struct name *head, unsigned size) \
{ \
unsigned new_len, new_load_limit; \
int prime_idx; \
struct type **new_table; \
if (head->hth_prime_idx == (int)name##_N_PRIMES - 1) \
return 0; \
if (head->hth_load_limit > size) \
return 0; \
prime_idx = head->hth_prime_idx; \
do { \
new_len = name##_PRIMES[++prime_idx]; \
new_load_limit = (unsigned)(load*new_len); \
} while (new_load_limit <= size && \
prime_idx < (int)name##_N_PRIMES); \
if ((new_table = reallocarrayfn(NULL, new_len, sizeof(struct type*)))) { \
unsigned b; \
memset(new_table, 0, new_len*sizeof(struct type*)); \
for (b = 0; b < head->hth_table_length; ++b) { \
struct type *elm, *next; \
unsigned b2; \
elm = head->hth_table[b]; \
while (elm) { \
next = elm->field.hte_next; \
b2 = HT_ELT_HASH_(elm, field, hashfn) % new_len; \
elm->field.hte_next = new_table[b2]; \
new_table[b2] = elm; \
elm = next; \
} \
} \
if (head->hth_table) \
freefn(head->hth_table); \
head->hth_table = new_table; \
} else { \
unsigned b, b2; \
new_table = reallocarrayfn(head->hth_table, new_len, sizeof(struct type*)); \
if (!new_table) return -1; \
memset(new_table + head->hth_table_length, 0, \
(new_len - head->hth_table_length)*sizeof(struct type*)); \
for (b=0; b < head->hth_table_length; ++b) { \
struct type *e, **pE; \
for (pE = &new_table[b], e = *pE; e != NULL; e = *pE) { \
b2 = HT_ELT_HASH_(e, field, hashfn) % new_len; \
if (b2 == b) { \
pE = &e->field.hte_next; \
} else { \
*pE = e->field.hte_next; \
e->field.hte_next = new_table[b2]; \
new_table[b2] = e; \
} \
} \
} \
head->hth_table = new_table; \
} \
head->hth_table_length = new_len; \
head->hth_prime_idx = prime_idx; \
head->hth_load_limit = new_load_limit; \
return 0; \
} \
/* Free all storage held by 'head'. Does not free 'head' itself, or \
* individual elements. */ \
void \
name##_HT_CLEAR(struct name *head) \
{ \
if (head->hth_table) \
freefn(head->hth_table); \
head->hth_table_length = 0; \
name##_HT_INIT(head); \
} \
/* Debugging helper: return false iff the representation of 'head' is \
* internally consistent. */ \
int \
name##_HT_REP_IS_BAD_(const struct name *head) \
{ \
unsigned n, i; \
struct type *elm; \
if (!head->hth_table_length) { \
if (!head->hth_table && !head->hth_n_entries && \
!head->hth_load_limit && head->hth_prime_idx == -1) \
return 0; \
else \
return 1; \
} \
if (!head->hth_table || head->hth_prime_idx < 0 || \
!head->hth_load_limit) \
return 2; \
if (head->hth_n_entries > head->hth_load_limit) \
return 3; \
if (head->hth_table_length != name##_PRIMES[head->hth_prime_idx]) \
return 4; \
if (head->hth_load_limit != (unsigned)(load*head->hth_table_length)) \
return 5; \
for (n = i = 0; i < head->hth_table_length; ++i) { \
for (elm = head->hth_table[i]; elm; elm = elm->field.hte_next) { \
if (HT_ELT_HASH_(elm, field, hashfn) != hashfn(elm)) \
return 1000 + i; \
if (HT_BUCKET_NUM_(head,field,elm,hashfn) != i) \
return 10000 + i; \
++n; \
} \
} \
if (n != head->hth_n_entries) \
return 6; \
return 0; \
}
#define HT_GENERATE(name, type, field, hashfn, eqfn, load, mallocfn, \
reallocfn, freefn) \
static void * \
name##_reallocarray(void *arg, size_t a, size_t b) \
{ \
if ((b) && (a) > SIZE_MAX / (b)) \
return NULL; \
if (arg) \
return reallocfn((arg),(a)*(b)); \
else \
return mallocfn((a)*(b)); \
} \
HT_GENERATE2(name, type, field, hashfn, eqfn, load, \
name##_reallocarray, freefn)
/** Implements an over-optimized "find and insert if absent" block;
* not meant for direct usage by typical code, or usage outside the critical
* path.*/
#define HT_FIND_OR_INSERT_(name, field, hashfn, head, eltype, elm, var, y, n) \
{ \
struct name *var##_head_ = head; \
struct eltype **var; \
if (!var##_head_->hth_table || \
var##_head_->hth_n_entries >= var##_head_->hth_load_limit) \
name##_HT_GROW(var##_head_, var##_head_->hth_n_entries+1); \
HT_SET_HASH_((elm), field, hashfn); \
var = name##_HT_FIND_P_(var##_head_, (elm)); \
HT_ASSERT_(var); /* Holds because we called HT_GROW */ \
if (*var) { \
y; \
} else { \
n; \
} \
}
#define HT_FOI_INSERT_(field, head, elm, newent, var) \
{ \
HT_SET_HASHVAL_(newent, field, (elm)->field.hte_hash); \
newent->field.hte_next = NULL; \
*var = newent; \
++((head)->hth_n_entries); \
}
/*
* Copyright 2005, Nick Mathewson. Implementation logic is adapted from code
* by Christopher Clark, retrofit to allow drop-in memory management, and to
* use the same interface as Niels Provos's tree.h. This is probably still
* a derived work, so the original license below still applies.
*
* Copyright (c) 2002, Christopher Clark
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name of the original author; nor the names of any contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#endif