1 def get_info_by_pattern(text, pattern): 2 p = re.compile(pattern) 3 p_res = p.findall(text) 4 return p_res 5 6 # 把包含uincode字符串变成中文 7 def unicode_to_chinese(text): 8 pattern_unicode = 'u[0-9a-z]{4}' 9 p_res = get_info_by_pattern(text, pattern_unicode) 10 unicode_dict = {} 11 if len(p_res) > 0: 12 list1 = [] 13 for ucode in p_res: 14 unicode_str = '\\' + ucode 15 if unicode_str in text and unicode_str not in list1: 16 list1.append(unicode_str) 17 for ucode in list1: 18 unicode_str = ucode 19 try: 20 chinese_str = bytes(unicode_str, 'utf-8').decode('unicode_escape') 21 print(unicode_str, chinese_str) 22 if unicode_str not in unicode_dict.keys(): 23 unicode_dict[unicode_str] = chinese_str 24 text = text.replace(unicode_str, chinese_str) 25 except: 26 pass 27 return text
text=' text=":[]"aggregated_ranges":[]"ranges":[]"color_ranges":[]"text":"\\u5730\\u65b9\\u6027\\u5546\\u5bb6 \\u00b7 2 位粉丝"}"'标签:unicode,chinese,python,text,ranges,str,pattern,字符串,uincode From: https://www.cnblogs.com/shaosks/p/17925680.html
a=unicode_to_chinese(text)
print(a
输出:text=":[]"aggregated_ranges":[]"ranges":[]"color_ranges":[]"text":"地方性商家 · 2 位粉丝"}"