Hash Table 哈希表工作原理介绍及C/C++/Python实现

标签：node map hash cur Python C++ key 哈希 Hash

Hash Table 哈希表工作原理介绍及C/C++/Python实现

哈希表（Hash Table），也称为散列表，是一种通过哈希函数将键（Key）映射到表中一个位置以便快速访问记录的数据结构。它提供了非常高效的数据检索、插入和删除操作。哈希表的基本原理是使用一个哈希函数将输入（通常是字符串）转换为一个索引值，这个索引值决定了数据在哈希表中的存储位置。

哈希表的工作原理

哈希表的工作原理涉及几个关键概念：哈希函数、数组、冲突解决策略和动态调整大小。

哈希函数

哈希函数是哈希表的核心，它接受一个键（key）作为输入，并生成一个整数作为输出。这个输出值通常称为哈希码（hash code），用于确定键在哈希表中的存储位置。理想的哈希函数应该具备以下特性：

确定性：相同的键总是映射到相同的哈希值。
快速计算：哈希函数应该尽可能快速地执行。
均匀分布：不同的键应该均匀地映射到哈希表的各个位置，以减少冲突。
最小化冲突：尽可能减少不同键映射到相同哈希值的情况。

数组

哈希表通常是一个数组，数组的每个元素称为桶（bucket）。哈希函数将键映射到数组的索引，每个桶存储一个键值对（key-value pair）。

冲突解决策略

由于哈希函数的输出范围有限，而键的数量可能非常大，因此不同键可能会映射到相同的哈希值，这种情况称为冲突。解决冲突的常见策略包括：

链地址法（Separate Chaining）：每个桶存储一个链表，所有具有相同哈希值的键值对都存储在这个链表中。当插入、查找或删除操作发生时，首先计算键的哈希值，然后在对应的链表中进行操作。
开放地址法（Open Addressing）：当发生冲突时，通过探测（Probing）方法寻找下一个可用的桶。常见的探测方法包括线性探测、二次探测和双重散列。
双重哈希（Double Hashing）：使用两个哈希函数来计算冲突时的步长，以减少聚集效应。

动态调整大小

随着键值对的插入，哈希表可能会变得拥挤，这会影响性能。为了保持操作的效率，一些哈希表实现会在负载因子（即已存储的键值对数量与数组大小的比率）超过某个阈值时动态调整大小。这通常涉及：

重新哈希：创建一个新的、更大的数组，并将所有现有键值对重新插入到新数组中。
调整哈希函数：根据新的数组大小调整哈希函数，以确保键均匀分布。

哈希表的C语言实现代码

下面是一个简单的哈希表实现，使用链地址法解决冲突。

// 定义哈希表节点结构
typedef struct hash_node {
    int key;
    int value;
    struct hash_node *next;
} hash_node;

// 定义哈希表结构
typedef struct hash_map {
    hash_node **nodes;
    int size;
} hash_map;

// 初始化哈希表
hash_map* hash_map_init(int size) {
    // 分配哈希表内存
    hash_map *map = (hash_map*) malloc(sizeof(hash_map));
    map->size = size;
    // 分配哈希表节点内存
    map->nodes = (hash_node**) malloc(sizeof(hash_node*) * size);
    // 初始化哈希表节点
    memset(map->nodes, 0, sizeof(hash_node*) * size);
    return map;
}

// 哈希函数
int hash(int key, int size) {
    return key % size;
}

// 插入哈希表
void hash_map_insert(hash_map *map, int key, int value) {
    // 计算哈希值
    int index = hash(key, map->size);
    // 创建哈希表节点
    hash_node *node = (hash_node*) malloc(sizeof(hash_node));
    node->key = key;
    node->value = value;
    node->next = NULL;
    // 插入哈希表
    if (map->nodes[index] == NULL) {
        map->nodes[index] = node;
    } else {
        hash_node *cur = map->nodes[index];
        while (cur->next != NULL) {
            cur = cur->next;
        }
        cur->next = node;
    }
}

// 查找哈希表
int hash_map_find(hash_map *map, int key) {
    // 计算哈希值
    int index = hash(key, map->size);
    // 查找哈希表
    hash_node *cur = map->nodes[index];
    while (cur != NULL) {
        if (cur->key == key) {
            return cur->value;
        }
        cur = cur->next;
    }
    return -1;
}

// 删除对应的哈希表节点
void hash_map_delete(hash_map *map, int key) {
    // 计算哈希值
    int index = hash(key, map->size);
    // 查找哈希表
    hash_node *cur = map->nodes[index];
    hash_node *pre = NULL;
    while (cur != NULL) {
        if (cur->key == key) {
            if (pre == NULL) {
                map->nodes[index] = cur->next;
            } else {
                pre->next = cur->next;
            }
            free(cur);
            return;
        }
        pre = cur;
        cur = cur->next;
    }
}

// 释放哈希表
void hash_map_free(hash_map *map) {
    for (int i = 0; i < map->size; i++) {
        hash_node *cur = map->nodes[i];
        while (cur != NULL) {
            hash_node *tmp = cur;
            cur = cur->next;
            free(tmp);
        }
    }
    free(map->nodes);
    free(map);
}

哈希表的C++实现代码

// 定义哈希表节点结构
struct hash_node {
    int key;
    int value;
    hash_node *next;
    hash_node(int k, int v) : key(k), value(v), next(NULL) {}
};

// 定义哈希表结构
class hash_map {
private:
    vector<hash_node*> nodes;
    int size;
public:
    hash_map(int s) : size(s) {
        nodes.resize(size, NULL);
    }

    // 哈希函数
    int hash(int key) {
        return key % size;
    }

    // 插入哈希表
    void insert(int key, int value) {
        int index = hash(key);
        hash_node *node = new hash_node(key, value);
        if (nodes[index] == NULL) {
            nodes[index] = node;
        } else {
            hash_node *cur = nodes[index];
            while (cur->next != NULL) {
                cur = cur->next;
            }
            cur->next = node;
        }
    }

    // 查找哈希表
    int find(int key) {
        int index = hash(key);
        hash_node *cur = nodes[index];
        while (cur != NULL) {
            if (cur->key == key) {
                return cur->value;
            }
            cur = cur->next;
        }
        return -1;
    }

    // 删除哈希表节点
    void remove(int key) {
        int index = hash(key);
        hash_node *cur = nodes[index];
        hash_node *pre = NULL;
        while (cur != NULL) {
            if (cur->key == key) {
                if (pre == NULL) {
                    nodes[index] = cur->next;
                } else {
                    pre->next = cur->next;
                }
                delete cur;
                return;
            }
            pre = cur;
            cur = cur->next;
        }
    }

    // 释放哈希表
    ~hash_map() {
        for (int i = 0; i < size; i++) {
            hash_node *cur = nodes[i];
            while (cur != NULL) {
                hash_node *tmp = cur;
                cur = cur->next;
                delete tmp;
            }
        }
    }
};

哈希表的Python实现代码

class hash_map:
    def __init__(self, size):
        self.size = size
        self.nodes = [None] * size

    # 哈希函数
    def hash(self, key):
        return key % self.size

    # 插入哈希表
    def insert(self, key, value):
        index = self.hash(key)
        node = [key, value]
        if self.nodes[index] is None:
            self.nodes[index] = [node]
        else:
            self.nodes[index].append(node)

    # 查找哈希表
    def find(self, key):
        index = self.hash(key)
        if self.nodes[index] is not None:
            for node in self.nodes[index]:
                if node[0] == key:
                    return node[1]
        return -1

    # 删除哈希表节点
    def remove(self, key):
        index = self.hash(key)
        if self.nodes[index] is not None:
            for i, node in enumerate(self.nodes[index]):
                if node[0] == key:
                    self.nodes[index].pop(i)
                    return

    # 释放哈希表
    def __del__(self):
        for i in range(self.size):
            if self.nodes[i] is not None:
                del self.nodes[i]

标签：node,map,hash,cur,Python,C++,key,哈希,Hash
From： https://blog.csdn.net/qq_45641147/article/details/142220458

Hash Table 哈希表工作原理介绍及C/C++/Python实现