C语言数据结构之哈希表(HASHTABLE)的实现
哈希表的每个节点保存的数据格式为key : value,其中key为字符串,根据字符串内容采用不同方法(哈希函数)生成一个无符号整型哈希码,根据表的长度,采用取余法,将数据存入表单元,如果此表单元中已存在数据,则以此表单元为链表头,向链表追加数据,这样表的长度不是数据的总量,总量可以大于表的长度!!!
哈希表的数据结构如下所示:
-----
0 | O | -> NULL
----- ----- ----- -----
1 | O | -> | O | -> | O | -> | O | -> NULL
----- ----- ----- -----
. ... ... ... ....
. ... ... ... ....
. ... ... ... ....
----- -----
n | O | -> | O | -> NULL
----- -----
- 每个节点保存一个Entry数据格式为(key : value)
- 每个节点都是一个链表节点,有一个next指针指向下一个节点
- 生成哈希码函数 gen_hashcode
- 创建哈希表函数 hash_table_new
- 释放哈希表函数 hash_table_free
代码如下:
/* filename : ht.c */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* define datatype uint */
typedef unsigned int uint;
/* define function pointer HTFunc for HashTable */
typedef void (*HTFunc) (void *key, void *val);
/* define datatype Entry */
typedef struct _Entry Entry;
struct _Entry {
void *key;
void *val;
Entry *next;
};
/* define datatype HashTable */
typedef struct _HashTable HashTable;
struct _HashTable {
Entry **elts;
uint size;
uint length;
};
/* define gen_hashcode function to generate hash code */
static uint
gen_hashcode (const char *p)
{
unsigned int h = *p;
if (h)
for (p += 1; *p != '\0'; p++)
h = (h << 5) - h + *p;
return h;
}
/* create a new hash table */
HashTable *
hash_table_new (uint length)
{
HashTable *ht = (HashTable*) malloc (sizeof(HashTable));
ht->elts = (Entry**) malloc (length * sizeof(Entry*));
ht->length = length;
ht->size = 0;
for (int i = 0; i < ht->length; i++)
{
ht->elts[i] = (Entry*) malloc (sizeof(Entry));
ht->elts[i]->key = NULL;
ht->elts[i]->val = NULL;
ht->elts[i]->next = NULL;
}
return ht;
}
/* free the hash table */
void
hash_table_free (HashTable *ht)
{
for (uint i = 0; i < ht->length; i++)
{
Entry *tmp = ht->elts[i];
while (tmp != NULL)
{
Entry *node = tmp;
tmp = tmp->next;
free (node);
}
}
free (ht->elts);
free (ht);
}
/*------------------------------*/
/**/
void
test_hashtable (void)
{
HashTable *ht = hash_table_new (17);
hash_table_free (ht);
}
/**/
int
main (int argc, char *argv[])
{
test_hashtable ();
return 0;
}
/* -- end -- */
编译运行,检查内存,效果如下:
songvm@ubuntu:~/works/xdn/loo$ gcc ht.c -o ht
songvm@ubuntu:~/works/xdn/loo$ ./ht
songvm@ubuntu:~/works/xdn/loo$ valgrind --leak-check=yes ./ht
==2835== Memcheck, a memory error detector
==2835== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==2835== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==2835== Command: ./ht
==2835==
==2835==
==2835== HEAP SUMMARY:
==2835== in use at exit: 0 bytes in 0 blocks
==2835== total heap usage: 19 allocs, 19 frees, 560 bytes allocated
==2835==
==2835== All heap blocks were freed -- no leaks are possible
==2835==
==2835== For counts of detected and suppressed errors, rerun with: -v
==2835== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
songvm@ubuntu:~/works/xdn/loo$
向哈希表推入数据 PUT 从哈希表中取出数据 GET
- 向哈希表推入数据函数 hash_table_put
- 从哈希表中取出数据函数 hash_table_get
代码如下:
/* put keyval into hash table */
void
hash_table_put (HashTable *ht, void *key, void *val)
{
uint hc = gen_hashcode (key);
uint idx = hc % ht->length;
Entry *tmp = ht->elts[idx];
if (tmp->key == NULL)
{
tmp->key = key;
tmp->val = val;
}
else
{
Entry *node = (Entry*) malloc (sizeof(Entry));
node->key = key;
node->val = val;
node->next = NULL;
while (tmp->next != NULL)
tmp = tmp->next;
tmp->next = node;
}
ht->size = ht->size + 1;
}
/* get val by key from hash table */
void *
hash_table_get (HashTable *ht, void *key)
{
uint hc = gen_hashcode (key);
uint idx = hc % ht->length;
Entry *tmp = ht->elts[idx];
if (tmp->key == NULL) return NULL;
if (0 == strcmp (tmp->key, key))
return tmp->val;
else
{
while (tmp->next != NULL)
{
tmp = tmp->next;
if (0 == strcmp (tmp->key, key))
return tmp->val;
}
}
return NULL;
}
- 测试函数,定义字符串数组,保存英文、中文名字一一对应
- 将字符串数组中的数据循环推入哈希表
- 根英文名字在哈希表中查找Entry,然后输出中文名字
- 输出哈希表相关信息
代码如下:
/**/
void
test_hashtable (void)
{
char *buff[] = {
"Angus", "安格斯", "Anthony", "安东尼", "Apollo", "阿波罗",
"Arnold", "阿诺德", "Arthur", "亚瑟", "August", "奥古斯特",
"Austin", "奥斯汀", "Ben", "本", "Benjamin", "本杰明",
"Bert", "伯特", "Benson", "本森", "Bill", "比尔",
"Billy", "比利", "Blake", "布莱克", "Bob", "鲍伯",
"Bobby", "鲍比", "Brad", "布拉德", "Brandon", "布兰登",
"Brant", "布兰特", "Brent", "布伦特", "Brian/Bryan", "布赖恩",
"Brown", "布朗", "Bruce", "布鲁斯", "Caleb", "迦勒",
"Cameron", "卡梅伦", "Carl", "卡尔", "Carlos", "卡洛斯",
"Cary", "凯里", "Caspar", "卡斯帕", "Cecil", "塞西",
"Charles", "查尔斯", "Cheney", "采尼" };
char *key;
void *val;
HashTable *ht = hash_table_new (17);
int len = sizeof(buff)/sizeof(char*);
for (int i = 0; i < len/2; i++)
hash_table_put (ht, buff[2*i], buff[2*i+1]);
key = "Bill";
printf ("HashTable get key [%s] : val [%s]\n",
key, (char*)(hash_table_get(ht, key)));
key = "Austin";
printf ("HashTable get key [%s] : val [%s]\n",
key, (char*)(hash_table_get(ht, key)));
key = "Chinese";
val = hash_table_get(ht, key);
printf ("HashTable get key [%s] : val [%s]\n",
key, (val == NULL) ? "NULL" : (char*)(val));
key = "Brent";
val = hash_table_get(ht, key);
printf ("HashTable get key [%s] : val [%s]\n",
key, (val == NULL) ? "NULL" : (char*)(val));
printf ("HashTable length is %d\n", ht->length);
printf ("HashTable size is %d\n", ht->size);
hash_table_free (ht);
}
编译运行,结果如下,达到预期:
songvm@ubuntu:~/works/xdn/loo$ gcc ht.c -o ht
songvm@ubuntu:~/works/xdn/loo$ ./ht
HashTable get key [Bill] : val [比尔]
HashTable get key [Austin] : val [奥斯汀]
HashTable get key [Chinese] : val [NULL]
HashTable get key [Brent] : val [布伦特]
HashTable length is 17
HashTable size is 32
songvm@ubuntu:~/works/xdn/loo$
遍历哈希表,逐项操作函数FOREACH,参数为函数指针HTFunc
- 定义整型变量count,统计哈希表中Entry的数量,输出count的值
- 对每个Entry节点调用HTFunc指向的函数,输出相关信息
代码如下:
/* foreach node run htfunc of hashtable */
void
hash_table_foreach (HashTable *ht, HTFunc htfunc)
{
int count = 0;
for (uint i = 0; i < ht->length; i++)
{
Entry *tmp = ht->elts[i];
if (tmp->key != NULL)
{
printf ("%2d, ", count);
htfunc (tmp->key, tmp->val);
count++;
while (tmp->next != NULL)
{
tmp = tmp->next;
printf ("%2d, ", count);
htfunc (tmp->key, tmp->val);
count++;
}
}
}
}
- 测试函数,遍历哈希表,输出KEYVAL
代码如下:
/* define function for HTFunc */
void
out_keyval (void *key, void *val)
{
printf ("Key : [%s], Value : [%s]\n", (char*)key, (char*)val);
}
/**/
void
test_hashtable_foreach (void)
{
char *buff[] = {
"Angus", "安格斯", "Anthony", "安东尼", "Apollo", "阿波罗",
"Arnold", "阿诺德", "Arthur", "亚瑟", "August", "奥古斯特",
"Austin", "奥斯汀", "Ben", "本", "Benjamin", "本杰明",
"Bert", "伯特", "Benson", "本森", "Bill", "比尔",
"Billy", "比利", "Blake", "布莱克", "Bob", "鲍伯",
"Bobby", "鲍比", "Brad", "布拉德", "Brandon", "布兰登",
"Brant", "布兰特", "Brent", "布伦特", "Brian/Bryan", "布赖恩",
"Brown", "布朗", "Bruce", "布鲁斯", "Caleb", "迦勒",
"Cameron", "卡梅伦", "Carl", "卡尔", "Carlos", "卡洛斯",
"Cary", "凯里", "Caspar", "卡斯帕", "Cecil", "塞西",
"Charles", "查尔斯", "Cheney", "采尼" };
char *key;
void *val;
HashTable *ht = hash_table_new (17);
int len = sizeof(buff)/sizeof(char*);
for (int i = 0; i < len/2; i++)
hash_table_put (ht, buff[2*i], buff[2*i+1]);
hash_table_foreach (ht, out_keyval);
hash_table_free (ht);
}
编译运行,结果如下,符合预期:
songvm@ubuntu:~/works/xdn/loo$ gcc ht.c -o ht
songvm@ubuntu:~/works/xdn/loo$ ./ht
0, Key : [Austin], Value : [奥斯汀]
1, Key : [Bill], Value : [比尔]
2, Key : [Cecil], Value : [塞西]
3, Key : [Bob], Value : [鲍伯]
4, Key : [Anthony], Value : [安东尼]
5, Key : [Brandon], Value : [布兰登]
6, Key : [Brant], Value : [布兰特]
7, Key : [Carl], Value : [卡尔]
8, Key : [August], Value : [奥古斯特]
9, Key : [Brad], Value : [布拉德]
10, Key : [Brent], Value : [布伦特]
11, Key : [Caleb], Value : [迦勒]
12, Key : [Bert], Value : [伯特]
13, Key : [Blake], Value : [布莱克]
14, Key : [Ben], Value : [本]
15, Key : [Benjamin], Value : [本杰明]
16, Key : [Brian/Bryan], Value : [布赖恩]
17, Key : [Angus], Value : [安格斯]
18, Key : [Brown], Value : [布朗]
19, Key : [Caspar], Value : [卡斯帕]
20, Key : [Carlos], Value : [卡洛斯]
21, Key : [Bruce], Value : [布鲁斯]
22, Key : [Apollo], Value : [阿波罗]
23, Key : [Benson], Value : [本森]
24, Key : [Bobby], Value : [鲍比]
25, Key : [Arnold], Value : [阿诺德]
26, Key : [Arthur], Value : [亚瑟]
27, Key : [Billy], Value : [比利]
28, Key : [Cameron], Value : [卡梅伦]
29, Key : [Cary], Value : [凯里]
30, Key : [Charles], Value : [查尔斯]
31, Key : [Cheney], Value : [采尼]
songvm@ubuntu:~/works/xdn/loo$
遍历哈希表,逐项操作,输出哈希表的结构,空值输出NULL
代码如下:
/* foreach node run htfunc of hashtable */
void
hash_table_foreach (HashTable *ht, HTFunc htfunc)
{
for (uint i = 0; i < ht->length; i++)
{
int count = 0;
Entry *tmp = ht->elts[i];
if (tmp->key != NULL)
{
printf ("Line %2d, %d, ", i, count);
htfunc (tmp->key, tmp->val);
count++;
while (tmp->next != NULL)
{
tmp = tmp->next;
printf (", %d, ", count);
htfunc (tmp->key, tmp->val);
count++;
}
printf ("\n");
}
else
{
printf ("Line %2d, NULL!\n", i);
}
}
}
- 测试函数,修改一下HTFunc指针指向的函数,便于查看结果
代码如下:
/* define function for HTFunc */
void
out_keyval (void *key, void *val)
{
//printf ("Key : [%s], Value : [%s]\n", (char*)key, (char*)val);
printf ("[%s : %s]", (char*)key, (char*)val);
}
编译运行,结果如下,符合预期:
songvm@ubuntu:~/works/xdn/loo$ gcc ht.c -o ht
songvm@ubuntu:~/works/xdn/loo$ ./ht
Line 0, NULL!
Line 1, 0, [Austin : 奥斯汀], 1, [Bill : 比尔], 2, [Cecil : 塞西]
Line 2, 0, [Bob : 鲍伯]
Line 3, 0, [Anthony : 安东尼], 1, [Brandon : 布兰登], 2, [Brant : 布兰特], 3, [Carl : 卡尔]
Line 4, 0, [August : 奥古斯特]
Line 5, 0, [Brad : 布拉德], 1, [Brent : 布伦特], 2, [Caleb : 迦勒]
Line 6, 0, [Bert : 伯特], 1, [Blake : 布莱克]
Line 7, NULL!
Line 8, NULL!
Line 9, NULL!
Line 10, 0, [Ben : 本], 1, [Benjamin : 本杰明], 2, [Brian/Bryan : 布赖恩]
Line 11, 0, [Angus : 安格斯], 1, [Brown : 布朗]
Line 12, 0, [Caspar : 卡斯帕]
Line 13, 0, [Carlos : 卡洛斯]
Line 14, 0, [Bruce : 布鲁斯]
Line 15, 0, [Apollo : 阿波罗], 1, [Benson : 本森], 2, [Bobby : 鲍比]
Line 16, 0, [Arnold : 阿诺德], 1, [Arthur : 亚瑟], 2, [Billy : 比利], 3, [Cameron : 卡梅伦], 4, [Cary : 凯里], 5, [Charles : 查尔斯], 6, [Cheney : 采尼]
songvm@ubuntu:~/works/xdn/loo$
读文件到哈希表
- 找到前面文件读写实践中输出的文件name.dat
- 将没有中文名字的加上中文译名: Raymond 雷蒙德
- 将/符号去掉,加上空格 : Tanya/谭雅坦尼娅 ,Tanya 谭雅坦尼娅
- 读文件需要分配内存,所以要释放每一个节点的key和value函数:hash_table_free_elements
代码如下:
/* free keyvals all of the hash table */
void
hash_table_free_elements (HashTable *ht)
{
for (uint i = 0; i < ht->length; i++)
{
Entry *tmp = ht->elts[i];
while (tmp != NULL)
{
Entry *node = tmp;
tmp = tmp->next;
free (node->key);
free (node->val);
free (node);
}
}
free (ht->elts);
free (ht);
}
- 读文件函数 test_hashtable_from_file
- 每读一个字符串都在之前分配内存,将内存指针做为参数传给hash_table_put函数
- 操作结束后调用hash_table_free_elements释放分配的内存和哈希表
代码如下:
/**/
void
test_hashtable_from_file (void)
{
int tmp = 1;
char *key, *val;
FILE *fpi;
HashTable *ht = hash_table_new (1031);
fpi = fopen ("name.dat", "r");
while (tmp != EOF)
{
key = (char*) malloc (64*sizeof(char));
val = (char*) malloc (64*sizeof(char));
memset (key, 0, 64);
memset (val, 0, 64);
tmp = fscanf (fpi, "%s", key);
tmp = fscanf (fpi, "%s", val);
hash_table_put (ht, key, val);
}
hash_table_foreach (ht, out_keyval);
hash_table_free_elements (ht);
fclose (fpi);
}
编译运行,结果如下,符合预期:
songvm@ubuntu:~/works/xdn/loo$ gcc ht.c -o ht
songvm@ubuntu:~/works/xdn/loo$ valgrind --leak-check=yes ./ht
==3300== Memcheck, a memory error detector
==3300== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==3300== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==3300== Command: ./ht
==3300==
Line 0, 0, [ : ]
Line 1, 0, [Ann : 安], 1, [Louise : 路易丝]
Line 2, 0, [Derek : 德里克]
Line 3, NULL!
Line 4, 0, [Kelsey : 凯尔西]
......
Line 1028, 0, [Louisa : 路易莎]
Line 1029, 0, [Diana : 黛安娜]
Line 1030, 0, [Amber : 安伯], 1, [Rachel : 雷切尔]
==3300==
==3300== HEAP SUMMARY:
==3300== in use at exit: 0 bytes in 0 blocks
==3300== total heap usage: 2,364 allocs, 2,364 frees, 117,832 bytes allocated
==3300==
==3300== All heap blocks were freed -- no leaks are possible
==3300==
==3300== For counts of detected and suppressed errors, rerun with: -v
==3300== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
songvm@ubuntu:~/works/xdn/loo$
下一步研究树型数据结构!!!
标签:tmp,hash,哈希,val,ht,C语言,HASHTABLE,key,Key From: https://blog.csdn.net/gwsong52/article/details/143319319