算法原理: 将一个字符串看成是一个P 进制的数字。
代码模板:
def __init__(self, s): n = len(s) self.BASE = BASE = 131 # 进制 131,131313 self.MOD = MOD = 10 ** 13 + 7 # 10**9+7,998244353,10**13+7 self.h = h = [0] * (n + 1) self.p = p = [1] * (n + 1) for i in range(1, n + 1): p[i] = (p[i - 1] * BASE) % MOD h[i] = (h[i - 1] * BASE % MOD + ord(s[i - 1])) % MOD def get_hash(self, l, r): return (self.h[r] - self.h[l - 1] * self.p[r - l + 1] % self.MOD) % self.MOD h = sh.get_hash(l, r)
// 注意这里有个映射,我们的字符串哈希值从1开始,题目中给出的字符串多从0开始,从str -> h 只要所有下标都加1 即可。
class StringHash: def __init__(self, s: str): n = len(s) self.MOD = MOD = 10**13 + 7 self.BASE = BASE = 131 self.h = h = [0] * (n + 10) self.p = p = [1] * (n + 10) for i in range(1, n + 1): p[i] = (p[i - 1] * BASE) % MOD h[i] = (h[i - 1] * BASE % MOD + ord(s[i - 1])*2) % MOD def get_hash(self, l, r): return (self.h[r + 1] - self.h[l] * self.p[r - l + 1] % self.MOD) % self.MOD class Solution: def findRepeatedDnaSequences(self, s: str) -> List[str]: hs = StringHash(s) n = len(s) vis = Counter() ans = [] for i in range(n - 9): h = hs.get_hash(i,i+9) vis[h] += 1 if vis[h] == 2: ans.append(s[i:i+10]) return ans
标签:__,10,self,BASE,哈希,字符串,def,MOD From: https://www.cnblogs.com/zk6696/p/17810236.html