HashMap
的數(shù)據(jù)結(jié)構(gòu)
-
HashMap
的數(shù)據(jù)結(jié)構(gòu)很簡單糙箍,就是一個根節(jié)點腾誉、一個迭代器還有一個析構(gòu)函數(shù) -
HashMap
比較復(fù)雜的地方在于其節(jié)點swHashMap_node
的UT_hash_handle
數(shù)據(jù)成員,該數(shù)據(jù)成員是C
語言hash
庫uthash
胀莹,HashMap
大部分功能依賴于這個uthash
醋闭。 -
swHashMap_node
中key_int
是鍵值的長度涯冠,key_str
是具體的鍵值乃正,data
是value
數(shù)據(jù)
typedef void (*swHashMap_dtor)(void *data);
typedef struct
{
struct swHashMap_node *root;
struct swHashMap_node *iterator;
swHashMap_dtor dtor;
} swHashMap;
typedef struct swHashMap_node
{
uint64_t key_int;
char *key_str;
void *data;
UT_hash_handle hh;
} swHashMap_node;
HashMap
由于 HashMap
是在底層 uthash
哈希表的基礎(chǔ)上構(gòu)建的瘦癌,如果想要詳細了解其原理大家可以先看看下一節(jié)內(nèi)容后再閱讀本小節(jié)恋捆。
HashMap
的初始化
-
HashMap
的初始化主要是對底層uthash
哈希表進行內(nèi)存的分配栅螟、初始化 -
uthash
哈希表的初始化包括tbl
幅疼、buckets
的初始化凹嘲,成員變量的具體意義可以參考下一節(jié)內(nèi)容
swHashMap* swHashMap_new(uint32_t bucket_num, swHashMap_dtor dtor)
{
swHashMap *hmap = sw_malloc(sizeof(swHashMap));
if (!hmap)
{
swWarn("malloc[1] failed.");
return NULL;
}
swHashMap_node *root = sw_malloc(sizeof(swHashMap_node));
if (!root)
{
swWarn("malloc[2] failed.");
sw_free(hmap);
return NULL;
}
bzero(hmap, sizeof(swHashMap));
hmap->root = root;
bzero(root, sizeof(swHashMap_node));
root->hh.tbl = (UT_hash_table*) sw_malloc(sizeof(UT_hash_table));
if (!(root->hh.tbl))
{
swWarn("malloc for table failed.");
sw_free(hmap);
return NULL;
}
memset(root->hh.tbl, 0, sizeof(UT_hash_table));
root->hh.tbl->tail = &(root->hh);
root->hh.tbl->num_buckets = SW_HASHMAP_INIT_BUCKET_N;
root->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2;
root->hh.tbl->hho = (char*) (&root->hh) - (char*) root;
root->hh.tbl->buckets = (UT_hash_bucket*) sw_malloc(SW_HASHMAP_INIT_BUCKET_N * sizeof(struct UT_hash_bucket));
if (!root->hh.tbl->buckets)
{
swWarn("malloc for buckets failed.");
sw_free(hmap);
return NULL;
}
memset(root->hh.tbl->buckets, 0, SW_HASHMAP_INIT_BUCKET_N * sizeof(struct UT_hash_bucket));
root->hh.tbl->signature = HASH_SIGNATURE;
hmap->dtor = dtor;
return hmap;
}
HashMap
的新元素添加
- 首先需要新建一個
swHashMap_node
呜魄,為key_str
悔叽、key_int
與data
- 將新建的
swHashMap_node
添加到哈希表中 - 為
UT_hash_handler
的prev
、next
爵嗅、key
娇澎、keylen
、hashv
睹晒、tbl
成員變量賦值趟庄,將新的UT_hash_handler
放入雙向鏈表的尾部,更新tbl
的tail
成員 - 利用
HASH_ADD_TO_BKT
函數(shù)將UT_hash_handler
插入到哈希桶中
int swHashMap_add(swHashMap* hmap, char *key, uint16_t key_len, void *data)
{
swHashMap_node *node = (swHashMap_node*) sw_malloc(sizeof(swHashMap_node));
if (node == NULL)
{
swWarn("malloc failed.");
return SW_ERR;
}
bzero(node, sizeof(swHashMap_node));
swHashMap_node *root = hmap->root;
node->key_str = sw_strndup(key, key_len);
node->key_int = key_len;
node->data = data;
return swHashMap_node_add(root, node);
}
static sw_inline int swHashMap_node_add(swHashMap_node *root, swHashMap_node *add)
{
unsigned _ha_bkt;
add->hh.next = NULL;
add->hh.key = add->key_str;
add->hh.keylen = add->key_int;
root->hh.tbl->tail->next = add;
add->hh.prev = ELMT_FROM_HH(root->hh.tbl, root->hh.tbl->tail);
root->hh.tbl->tail = &(add->hh);
root->hh.tbl->num_items++;
add->hh.tbl = root->hh.tbl;
add->hh.hashv = swoole_hash_jenkins(add->key_str, add->key_int);
_ha_bkt = add->hh.hashv & (root->hh.tbl->num_buckets - 1);
HASH_ADD_TO_BKT(root->hh.tbl->buckets[_ha_bkt], &add->hh);
return SW_OK;
}
swHashMap_add_int
添加 int 類型元素
-
swHashMap_add_int
直接調(diào)用HASH_ADD_INT
更新整個哈希表册招,比起swHashMap_add
函數(shù)岔激,沒有了復(fù)雜的uthash
數(shù)據(jù)結(jié)構(gòu)的更新
int swHashMap_add_int(swHashMap *hmap, uint64_t key, void *data)
{
swHashMap_node *node = (swHashMap_node*) sw_malloc(sizeof(swHashMap_node));
swHashMap_node *root = hmap->root;
if (node == NULL)
{
swWarn("malloc failed");
return SW_ERR;
}
node->key_int = key;
node->data = data;
node->key_str = NULL;
HASH_ADD_INT(root, key_int, node);
return SW_OK;
}
swHashMap_find
查找元素
- 首先先通過哈希鍵計算哈希值,找出哈希桶的索引
-
HASH_FIND_IN_BKT
會根據(jù)哈希桶來查找具體的元素
void* swHashMap_find(swHashMap* hmap, char *key, uint16_t key_len)
{
swHashMap_node *root = hmap->root;
swHashMap_node *ret = swHashMap_node_find(root, key, key_len);
if (ret == NULL)
{
return NULL;
}
return ret->data;
}
static sw_inline swHashMap_node *swHashMap_node_find(swHashMap_node *root, char *key_str, uint16_t key_len)
{
swHashMap_node *out;
unsigned bucket, hash;
out = NULL;
if (root)
{
hash = swoole_hash_jenkins(key_str, key_len);
bucket = hash & (root->hh.tbl->num_buckets - 1);
HASH_FIND_IN_BKT(root->hh.tbl, hh, (root)->hh.tbl->buckets[bucket], key_str, key_len, out);
}
return out;
}
swHashMap_find_int
函數(shù)
-
swHashMap_find_int
函數(shù)直接調(diào)用HASH_FIND_INT
查找
void* swHashMap_find_int(swHashMap* hmap, uint64_t key)
{
swHashMap_node *ret = NULL;
swHashMap_node *root = hmap->root;
HASH_FIND_INT(root, &key, ret);
if (ret == NULL)
{
return NULL;
}
return ret->data;
}
swHashMap_each
遍歷
-
swHashMap_each
利用迭代器不斷獲取下一個元素
void* swHashMap_each(swHashMap* hmap, char **key)
{
swHashMap_node *node = swHashMap_node_each(hmap);
if (node)
{
*key = node->key_str;
return node->data;
}
else
{
return NULL;
}
}
static sw_inline swHashMap_node* swHashMap_node_each(swHashMap* hmap)
{
swHashMap_node *iterator = hmap->iterator;
swHashMap_node *tmp;
if (hmap->root->hh.tbl->num_items == 0)
{
return NULL;
}
if (iterator == NULL)
{
iterator = hmap->root;
}
tmp = iterator->hh.next;
if (tmp)
{
hmap->iterator = tmp;
return tmp;
}
else
{
hmap->iterator = NULL;
return NULL;
}
}
swHashMap_count
函數(shù)
uint32_t swHashMap_count(swHashMap* hmap)
{
if (hmap == NULL)
{
return 0;
}
return HASH_COUNT(hmap->root);
}
swHashMap_del
刪除元素
- 刪除元素首先需要
swHashMap_node_delete
函數(shù)來重構(gòu)哈希表是掰,然后調(diào)用swHashMap_node_free
釋放內(nèi)存
int swHashMap_del(swHashMap* hmap, char *key, uint16_t key_len)
{
swHashMap_node *root = hmap->root;
swHashMap_node *node = swHashMap_node_find(root, key, key_len);
if (node == NULL)
{
return SW_ERR;
}
swHashMap_node_delete(root, node);
swHashMap_node_free(hmap, node);
return SW_OK;
}
static sw_inline void swHashMap_node_free(swHashMap *hmap, swHashMap_node *node)
{
swHashMap_node_dtor(hmap, node);
sw_free(node->key_str);
sw_free(node);
}
- 刪除重構(gòu)哈希表流程較為復(fù)雜虑鼎,步驟和
HASH_DELETE
函數(shù)邏輯一致,詳細可以看下一節(jié)
static int swHashMap_node_delete(swHashMap_node *root, swHashMap_node *del_node)
{
unsigned bucket;
struct UT_hash_handle *_hd_hh_del;
if ((del_node->hh.prev == NULL) && (del_node->hh.next == NULL))
{
sw_free(root->hh.tbl->buckets);
sw_free(root->hh.tbl);
}
else
{
_hd_hh_del = &(del_node->hh);
if (del_node == ELMT_FROM_HH(root->hh.tbl, root->hh.tbl->tail))
{
root->hh.tbl->tail = (UT_hash_handle*) ((ptrdiff_t) (del_node->hh.prev) + root->hh.tbl->hho);
}
if (del_node->hh.prev)
{
((UT_hash_handle*) ((ptrdiff_t) (del_node->hh.prev) + root->hh.tbl->hho))->next = del_node->hh.next;
}
else
{
DECLTYPE_ASSIGN(root, del_node->hh.next);
}
if (_hd_hh_del->next)
{
((UT_hash_handle*) ((ptrdiff_t) _hd_hh_del->next + root->hh.tbl->hho))->prev = _hd_hh_del->prev;
}
HASH_TO_BKT(_hd_hh_del->hashv, root->hh.tbl->num_buckets, bucket);
HASH_DEL_IN_BKT(hh, root->hh.tbl->buckets[bucket], _hd_hh_del);
root->hh.tbl->num_items--;
}
return SW_OK;
}
swHashMap_del_int
函數(shù)
-
swHashMap_del_int
函數(shù)沒有復(fù)雜邏輯键痛,直接調(diào)用了HASH_DEL
這個第三方庫
int swHashMap_del_int(swHashMap *hmap, uint64_t key)
{
swHashMap_node *ret = NULL;
swHashMap_node *root = hmap->root;
HASH_FIND_INT(root, &key, ret);
if (ret == NULL)
{
return SW_ERR;
}
HASH_DEL(root, ret);
swHashMap_node_free(hmap, ret);
return SW_OK;
}
swHashMap_free
銷毀哈希表
- 銷毀哈希表需要循環(huán)所有的哈希節(jié)點元素炫彩,逐個刪除
-
HASH_ITER
用于循環(huán)所有的哈希節(jié)點元素
void swHashMap_free(swHashMap* hmap)
{
swHashMap_node *find, *tmp = NULL;
swHashMap_node *root = hmap->root;
HASH_ITER(hh, root, find, tmp)
{
if (find == root) continue;
swHashMap_node_delete(root, find);
swHashMap_node_free(hmap, find);
}
sw_free(hmap->root->hh.tbl->buckets);
sw_free(hmap->root->hh.tbl);
sw_free(hmap->root);
sw_free(hmap);
}
uthash
哈希表
uthash
是使用開鏈法實現(xiàn)的哈希表,其代碼均是宏函數(shù)編寫絮短,首先我們先看看這個哈希表的數(shù)據(jù)結(jié)構(gòu):
-
uthash
由三種數(shù)據(jù)結(jié)構(gòu)構(gòu)成:UT_hash_table
江兢、UT_hash_bucket
、UT_hash_handle
image
UT_hash_table
-
UT_hash_table
是整個哈希表的核心丁频,UT_hash_bucket
是根據(jù)哈希值排列的數(shù)組杉允,UT_hash_handle
是開鏈法中哈希沖突的鏈表
image
-
從上圖可以清楚的看出來
UT_hash_table
的數(shù)據(jù)結(jié)構(gòu):-
buckets
是哈希桶數(shù)組的首地址邑贴;num_buckets
是哈希桶的數(shù)量;log2_num_buckets
是log2(num_buckets)
的值 -
tail
是哈希鏈表的最后那個元素地址;num_items
是哈希鏈表的元素個數(shù) -
hho
:成員變量UT_hash_handle
相對于用戶結(jié)構(gòu)體首部的位置 -
ideal_chain_maxlen
:在理想情況下叔磷,即所有的元素剛好平坦到每個buckets
指向的鏈表中拢驾,任何兩個鏈表的數(shù)目相差不超過1時,一個鏈表中能夠容納的元素數(shù)目改基,實際上就等于num_items / num_buckets + (num_items % num_buckets == 0 ? 0 : 1)
繁疤; -
nonideal_items
:實際上buckets
的數(shù)目超過ideal_chain_maxlen
的鏈表數(shù); -
noexpand
:當這個值為1時秕狰,永遠不會對buckets
的大小進行擴充 -
ineff_expands
:當某個buckets
的鏈表過長時稠腊,需要對buckets
指向的數(shù)組的大小進行擴充,然后對整個鏈表重新分配各自的哈希桶鸣哀;擴張后如果nonideal_items
仍然大于num_items
的一半時架忌,也就是說明當前哈希表嚴重不平衡,哈希沖突很嚴重诺舔,這個時候說明當前的鍵值有問題鳖昌,或者哈希算法有問題,并不是擴充buckets
數(shù)組能夠解決的低飒。這個時候许昨,就會遞增ineff_expands
的值,當ineff_expands
大于 1 的時候褥赊,就會設(shè)置noexpand
設(shè)置為 1糕档,永遠不會擴充buckets
的大小。 -
bloom_bv
:指向一個uint8_t
類型的數(shù)組拌喉,用來標記buckets
中每個鏈表是否為空速那,可以優(yōu)化查找的速度,因為這個數(shù)組中每個元素是一個字節(jié)尿背,所以每個元素可以標記8個鏈表端仰,例如要判斷bucket[1]->hh_head
是否為空,只要判斷(bloom_bv[0] & 2)
是否為0即可; -
bloom_nbits
:bloom_bv
指向的數(shù)組大小為 (1 <<bloom_nbits
)田藐。
-
typedef struct UT_hash_table {
UT_hash_bucket *buckets;
unsigned num_buckets, log2_num_buckets;
unsigned num_items;
struct UT_hash_handle *tail;
ptrdiff_t hho;
unsigned ideal_chain_maxlen;
unsigned nonideal_items;
unsigned ineff_expands, noexpand;
uint32_t signature; /* used only to test bloom exists in external analysis */
#ifdef HASH_BLOOM
uint32_t bloom_sig; /* used only to test bloom exists in external analysis */
uint8_t *bloom_bv;
char bloom_nbits;
#endif
} UT_hash_table;
UT_hash_handle
-
UT_hash_handle
是存儲數(shù)據(jù)的真正地方荔烧,也是哈希表的最小結(jié)構(gòu)單元,如下圖汽久,不同于一般的開鏈法鹤竭,只有在哈希沖突的時候才會將兩個元素用鏈表連接起來,uthash
哈希表將所有UT_hash_handle
元素構(gòu)成了兩種雙向鏈表-
prev
景醇、next
構(gòu)成的雙向鏈表將所有UT_hash_handle
元素都連接到了一起臀稚,這個是為了能夠快速的訪問所有的數(shù)據(jù), -
hh_prev
三痰、hh_next
將所有哈希沖突的吧寺、哈希值相同的元素歸并到了一起
-
-
key
窜管、keylen
是存儲的鍵值與長度,hashv
是鍵值的哈希值 -
tbl
是上一小節(jié)的UT_hash_table
typedef struct UT_hash_handle {
struct UT_hash_table *tbl;
void *prev; /* prev element in app order */
void *next; /* next element in app order */
struct UT_hash_handle *hh_prev; /* previous hh in bucket order */
struct UT_hash_handle *hh_next; /* next hh in bucket order */
void *key; /* ptr to enclosing struct's key */
unsigned keylen; /* enclosing struct's key len */
unsigned hashv; /* result of hash-fcn(key) */
} UT_hash_handle;
image
UT_hash_bucket
- 哈希桶是哈希表非常重要的成員稚机,位于同一個哈希桶內(nèi)的
UT_hash_handle
元素擁有相同的哈希值hashv
微峰,不過這種概率很小。 - 由于
buckets
指向的數(shù)組可能比較惺闱(初始值為32,這個值一定是2的指數(shù)次方)颜凯,所以會先對UT_hash_handle
元素 中的hashv
進行一次按位與操作(idx = (hashv & (num_buckets-1)))
谋币,然后被插入到buckets[idx]->hh_head
指向的雙向鏈表中 -
count
:hh_head
指向的鏈表中的元素數(shù)目; -
expand_mult
:當count
的值大于(expand_mult+1)*10
時症概,則對buckets
指向的數(shù)組的大小進行擴充蕾额;在擴充之后expand_mult
被設(shè)定為count / ideal_chain_maxlen
。
typedef struct UT_hash_bucket {
struct UT_hash_handle *hh_head;
unsigned count;
unsigned expand_mult;
} UT_hash_bucket;
ELMT_FROM_HH
函數(shù)
我們之前說 UT_hash_handle
元素構(gòu)成了兩套雙向鏈表彼城,prev
诅蝶、next
構(gòu)成了其中一套,但是確切地說 prev
募壕、next
指向的地址并不是 UT_hash_handle
的地址调炬,而是它的上一層。例如我們之前說的:
typedef struct swHashMap_node
{
uint64_t key_int;
char *key_str;
void *data;
UT_hash_handle hh;
} swHashMap_node;
prev
舱馅、next
指向的地址實際是 swHashMap_node
的地址缰泡,這個 swHashMap_node
與 UT_hash_handle
之間還有用戶自定義的 header
數(shù)據(jù),這個數(shù)據(jù)的大小就是 UT_hash_table
的 hho
成員變量的值代嗤。
ELMT_FROM_HH
就是通過 UT_hash_handle
的地址反算 swHashMap_node
地址的函數(shù):
#define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho)))
HASH_TO_BKT
函數(shù)
-
HASH_TO_BKT
函數(shù)根據(jù)哈希值計算哈希桶的索引值棘钞,因為哈希值會很大,必然要轉(zhuǎn)為哈希桶數(shù)組的index
#define HASH_TO_BKT( hashv, num_bkts, bkt ) \
do { \
bkt = ((hashv) & ((num_bkts) - 1)); \
} while(0)
HASH_MAKE_TABLE
函數(shù)
-
HASH_MAKE_TABLE
函數(shù)用于創(chuàng)建UT_hash_table
#define HASH_MAKE_TABLE(hh,head) \
do { \
(head)->hh.tbl = (UT_hash_table*)uthash_malloc( \
sizeof(UT_hash_table)); \
if (!((head)->hh.tbl)) { uthash_fatal( "out of memory"); } \
memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \
(head)->hh.tbl->tail = &((head)->hh); \
(head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \
(head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \
(head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \
(head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \
HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \
if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); } \
memset((head)->hh.tbl->buckets, 0, \
HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \
HASH_BLOOM_MAKE((head)->hh.tbl); \
(head)->hh.tbl->signature = HASH_SIGNATURE; \
} while(0)
HASH_ADD_TO_BKT
函數(shù)
-
HASH_ADD_TO_BKT
函數(shù)用于向UT_hash_bucket
中添加新的UT_hash_handle
元素 -
head
是通過哈希已經(jīng)計算好的哈希桶干毅,addhh
是要新添加的UT_hash_handle
元素 - 新添加的元素會替換哈希桶的
hh_head
- 如果當前哈希桶中的
UT_hash_handle
元素數(shù)量過多宜猜,就會考慮擴充UT_hash_bucket
的數(shù)量,并且重新分配
/* add an item to a bucket */
#define HASH_ADD_TO_BKT(head,addhh) \
do { \
head.count++; \
(addhh)->hh_next = head.hh_head; \
(addhh)->hh_prev = NULL; \
if (head.hh_head) { (head).hh_head->hh_prev = (addhh); } \
(head).hh_head=addhh; \
if (head.count >= ((head.expand_mult+1) * HASH_BKT_CAPACITY_THRESH) \
&& (addhh)->tbl->noexpand != 1) { \
HASH_EXPAND_BUCKETS((addhh)->tbl); \
} \
} while(0)
HASH_EXPAND_BUCKETS
函數(shù)
-
HASH_EXPAND_BUCKETS
函數(shù)用于擴充哈希桶的數(shù)量 - 每次擴充都會增長一倍硝逢,并且重新計算
ideal_chain_maxlen
- 遍歷所有的
UT_hash_handle
元素姨拥,并且根據(jù)他們的hashv
重新計算它們歸屬的哈希桶的索引,并將其放入新的哈希桶中 - 更新
UT_hash_table
的num_buckets
趴捅、log2_num_buckets
- 重新計算
nonideal_items
值垫毙,如果大于元素的一半,說明哈希沖突仍然嚴重拱绑,哈希桶的擴容并不能解決問題综芥,那么就將ineff_expands
遞增,必要的時候禁止哈希桶的擴容
#define HASH_EXPAND_BUCKETS(tbl) \
do { \
unsigned _he_bkt; \
unsigned _he_bkt_i; \
struct UT_hash_handle *_he_thh, *_he_hh_nxt; \
UT_hash_bucket *_he_new_buckets, *_he_newbkt; \
_he_new_buckets = (UT_hash_bucket*)uthash_malloc( \
2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \
if (!_he_new_buckets) { uthash_fatal( "out of memory"); } \
memset(_he_new_buckets, 0, \
2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \
tbl->ideal_chain_maxlen = \
(tbl->num_items >> (tbl->log2_num_buckets+1)) + \
((tbl->num_items & ((tbl->num_buckets*2)-1)) ? 1 : 0); \
tbl->nonideal_items = 0; \
for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) \
{ \
_he_thh = tbl->buckets[ _he_bkt_i ].hh_head; \
while (_he_thh) { \
_he_hh_nxt = _he_thh->hh_next; \
HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2, _he_bkt); \
_he_newbkt = &(_he_new_buckets[ _he_bkt ]); \
if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \
tbl->nonideal_items++; \
_he_newbkt->expand_mult = _he_newbkt->count / \
tbl->ideal_chain_maxlen; \
} \
_he_thh->hh_prev = NULL; \
_he_thh->hh_next = _he_newbkt->hh_head; \
if (_he_newbkt->hh_head) _he_newbkt->hh_head->hh_prev = \
_he_thh; \
_he_newbkt->hh_head = _he_thh; \
_he_thh = _he_hh_nxt; \
} \
} \
uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
tbl->num_buckets *= 2; \
tbl->log2_num_buckets++; \
tbl->buckets = _he_new_buckets; \
tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? \
(tbl->ineff_expands+1) : 0; \
if (tbl->ineff_expands > 1) { \
tbl->noexpand=1; \
uthash_noexpand_fyi(tbl); \
} \
uthash_expand_fyi(tbl); \
} while(0)
HASH_ADD_INT
函數(shù)
-
HASH_ADD_INT
函數(shù)是HASH_ADD_TO_BKT
的int
特例 - 首先判斷當前哈希表是否存在猎拨,如果不存在膀藐,那么就用
HASH_MAKE_TABLE
創(chuàng)建一個哈希表 - 如果哈希表存在屠阻,那么就將
UT_hash_handle
放入雙向鏈表表尾 - 利用
HASH_FCN
計算哈希值,并利用HASH_ADD_TO_BKT
將其放入對應(yīng)的哈希桶中 -
HASH_BLOOM_ADD
函數(shù)為bloom_bv
設(shè)置位额各,用于快速判斷當前hashv
值存在元素
#define HASH_ADD_INT(head,intfield,add) \
HASH_ADD(hh,head,intfield,sizeof(int),add)
#define HASH_ADD(hh,head,fieldname,keylen_in,add) \
HASH_ADD_KEYPTR(hh,head,&((add)->fieldname),keylen_in,add)
#define HASH_BLOOM_ADD(tbl,hashv) \
HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1)))
#define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8] |= (1U << ((idx)%8)))
#define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \
do { \
unsigned _ha_bkt; \
(add)->hh.next = NULL; \
(add)->hh.key = (char*)(keyptr); \
(add)->hh.keylen = (unsigned)(keylen_in); \
if (!(head)) { \
head = (add); \
(head)->hh.prev = NULL; \
HASH_MAKE_TABLE(hh,head); \
} else { \
(head)->hh.tbl->tail->next = (add); \
(add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \
(head)->hh.tbl->tail = &((add)->hh); \
} \
(head)->hh.tbl->num_items++; \
(add)->hh.tbl = (head)->hh.tbl; \
HASH_FCN(keyptr,keylen_in, (head)->hh.tbl->num_buckets, \
(add)->hh.hashv, _ha_bkt); \
HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt],&(add)->hh); \
HASH_BLOOM_ADD((head)->hh.tbl,(add)->hh.hashv); \
HASH_EMIT_KEY(hh,head,keyptr,keylen_in); \
HASH_FSCK(hh,head); \
} while(0)
HASH_FIND_IN_BKT
函數(shù)
-
HASH_FIND_IN_BKT
用于根據(jù)keyptr
在head
哈希桶中尋找UT_hash_handle
-
DECLTYPE_ASSIGN
用于轉(zhuǎn)化out
為用戶自定義的數(shù)據(jù)類型(也就是swHashMap_node
) - 不斷循環(huán)
hh_next
国觉、hh_pre
組成的雙向鏈表,找出與keyptr
相同的元素
#define HASH_KEYCMP(a,b,len) memcmp(a,b,len)
#define DECLTYPE(x) (__typeof(x))
#endif
#define DECLTYPE_ASSIGN(dst,src) \
do { \
(dst) = DECLTYPE(dst)(src); \
} while(0)
#endif
#define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out) \
do { \
if (head.hh_head) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,head.hh_head)); \
else out=NULL; \
while (out) { \
if ((out)->hh.keylen == keylen_in) { \
if ((HASH_KEYCMP((out)->hh.key,keyptr,keylen_in)) == 0) break; \
} \
if ((out)->hh.hh_next) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,(out)->hh.hh_next)); \
else out = NULL; \
} \
} while(0)
HASH_FIND_INT
函數(shù)
-
HASH_FIND_INT
函數(shù)是上一個函數(shù)的特殊化虾啦,專門查找int
類型的鍵值 -
HASH_FCN
實際上是Jenkins
哈希算法麻诀,用于計算哈希值 -
HASH_BLOOM_TEST
用于快速判斷哈希桶內(nèi)到底有沒有元素,如果沒有那么沒有必要進行下去
#define HASH_FIND_INT(head,findint,out) \
HASH_FIND(hh,head,findint,sizeof(int),out)
#define HASH_FCN HASH_JEN
#endif
#define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8] & (1U << ((idx)%8)))
#define HASH_BLOOM_TEST(tbl,hashv) \
HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1)))
#define HASH_FIND(hh,head,keyptr,keylen,out) \
do { \
unsigned _hf_bkt,_hf_hashv; \
out=NULL; \
if (head) { \
HASH_FCN(keyptr,keylen, (head)->hh.tbl->num_buckets, _hf_hashv, _hf_bkt); \
if (HASH_BLOOM_TEST((head)->hh.tbl, _hf_hashv)) { \
HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], \
keyptr,keylen,out); \
} \
} \
} while (0)
HASH_COUNT
函數(shù)
-
HASH_COUNT
函數(shù)用于計算所有元素的數(shù)量
#define HASH_COUNT(head) HASH_CNT(hh,head)
#define HASH_CNT(hh,head) ((head)?((head)->hh.tbl->num_items):0)
HASH_DEL_IN_BKT
函數(shù)
-
HASH_DEL_IN_BKT
函數(shù)用于刪除已知的哈希桶的某一個鏈表元素
#define HASH_DEL_IN_BKT(hh,head,hh_del) \
(head).count--; \
if ((head).hh_head == hh_del) { \
(head).hh_head = hh_del->hh_next; \
} \
if (hh_del->hh_prev) { \
hh_del->hh_prev->hh_next = hh_del->hh_next; \
} \
if (hh_del->hh_next) { \
hh_del->hh_next->hh_prev = hh_del->hh_prev; \
}
HASH_DEL
函數(shù)
-
HASH_DEL
函數(shù)也是刪除哈希表中的元素傲醉,但是不同于上一個小節(jié)HASH_DEL_IN_BKT
函數(shù)蝇闭,這個函數(shù)不需要知道元素落在了哪個哈希桶中 -
HASH_DEL
函數(shù)如果發(fā)現(xiàn)當前要刪除的是哈希表唯一的元素,這個函數(shù)還好進一步刪除整個哈希表硬毕,這一特性與HASH_ADD
對應(yīng) -
HASH_DEL
函數(shù)不僅更新了哈希桶的鏈表結(jié)構(gòu)呻引,還更新了UT_hash_handle
雙向鏈表結(jié)構(gòu)和UT_hash_table
的tail
成員變量 -
HASH_DEL
函數(shù)最后利用了HASH_DEL_IN_BKT
函數(shù)更新哈希桶的鏈表數(shù)據(jù)
#define HASH_DEL(head,delptr) \
HASH_DELETE(hh,head,delptr)
#define HASH_DELETE(hh,head,delptr) \
do { \
unsigned _hd_bkt; \
struct UT_hash_handle *_hd_hh_del; \
if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) ) { \
uthash_free((head)->hh.tbl->buckets, \
(head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
HASH_BLOOM_FREE((head)->hh.tbl); \
uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \
head = NULL; \
} else { \
_hd_hh_del = &((delptr)->hh); \
if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) { \
(head)->hh.tbl->tail = \
(UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \
(head)->hh.tbl->hho); \
} \
if ((delptr)->hh.prev) { \
((UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \
(head)->hh.tbl->hho))->next = (delptr)->hh.next; \
} else { \
DECLTYPE_ASSIGN(head,(delptr)->hh.next); \
} \
if (_hd_hh_del->next) { \
((UT_hash_handle*)((ptrdiff_t)_hd_hh_del->next + \
(head)->hh.tbl->hho))->prev = \
_hd_hh_del->prev; \
} \
HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \
HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \
(head)->hh.tbl->num_items--; \
} \
HASH_FSCK(hh,head); \
} while (0)
HASH_ITER
函數(shù)
-
HASH_ITER
函數(shù)用于循環(huán)所有的哈希表的元素
#define HASH_ITER(hh,head,el,tmp) \
for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL); \
el; (el)=(tmp),(tmp)=DECLTYPE(el)((tmp)?(tmp)->hh.next:NULL))
#endif
哈希算法
-
swoole_hash_php
算法
static inline uint64_t swoole_hash_php(char *key, uint32_t len)
{
register ulong_t hash = 5381;
/* variant with the hash unrolled eight times */
for (; len >= 8; len -= 8)
{
hash = ((hash << 5) + hash) + *key++;
hash = ((hash << 5) + hash) + *key++;
hash = ((hash << 5) + hash) + *key++;
hash = ((hash << 5) + hash) + *key++;
hash = ((hash << 5) + hash) + *key++;
hash = ((hash << 5) + hash) + *key++;
hash = ((hash << 5) + hash) + *key++;
hash = ((hash << 5) + hash) + *key++;
}
switch (len)
{
case 7: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
/* no break */
case 6: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
/* no break */
case 5: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
/* no break */
case 4: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
/* no break */
case 3: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
/* no break */
case 2: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
/* no break */
case 1: hash = ((hash << 5) + hash) + *key++; break;
case 0: break;
default: break;
}
return hash;
}
-
swoole_hash_austin
算法
static inline uint32_t swoole_hash_austin(char *key, unsigned int keylen)
{
unsigned int h, k;
h = 0 ^ keylen;
while (keylen >= 4)
{
k = key[0];
k |= key[1] << 8;
k |= key[2] << 16;
k |= key[3] << 24;
k *= 0x5bd1e995;
k ^= k >> 24;
k *= 0x5bd1e995;
h *= 0x5bd1e995;
h ^= k;
key += 4;
keylen -= 4;
}
switch (keylen)
{
case 3:
h ^= key[2] << 16;
/* no break */
case 2:
h ^= key[1] << 8;
/* no break */
case 1:
h ^= key[0];
h *= 0x5bd1e995;
}
h ^= h >> 13;
h *= 0x5bd1e995;
h ^= h >> 15;
return h;
}