例如:在python中,我有一个字典,类似于{s1:[{s11:0,s12:2},{s13:3,s14:4}],s2:'s2',s3:{s31:0,s32:2}},我想使用递归提取其中所有的字典的key值和value,并在key值中包含它在字典中的位置信息,形成一个新的字段,如{s1_0_s11:0,s1_0_s12:2,s1_1_s13:3,s1_1_s14:4,s2:'s2',s3_s31:0,s3_s32:2}
import json import multiprocessing import os from collections import defaultdict def extract_dict(d, key_prefix = ""): result = {} for k, v in d.items(): if isinstance(v, dict): result.update(extract_dict(v, key_prefix + k + "_")) elif isinstance(v, list): for i, item in enumerate(v): if isinstance(item, dict): result.update(extract_dict(item, key_prefix + k + "_" + str(i) + "_")) else: result[key_prefix + k] = v return result def merge_dicts(dict_list): merged_dict = defaultdict(list) for d in dict_list: for k, v in d.items(): merged_dict[k].append(v) # 去重 for k, v in merged_dict.items(): merged_dict[k] = list(set(v)) return dict(merged_dict) def extract_dict_worker(json_file, dict_save = 'save-dict'): with open(json_file, "r") as f: d = json.load(f) result = extract_dict(d) del result['format_filename'] with open(os.path.join(dict_save, os.path.basename(json_file)), 'w') as f: json.dump(result, f) result result def extract_dicts_parallel(json_dir, n_workers=None): json_str_list = [os.path.join(json_dir, jd) for jd in os.listdir(json_dir)] pool = multiprocessing.Pool(n_workers) result_list = pool.map(extract_dict_worker, json_str_list) pool.close() pool.join() # 对新的txt进行汇总,去重 new_dict = merge_dicts(result_list) with open('merge_dict.json', 'w') as f: json.dump(new_dict, f) if __name__ == '__main__': json_dir = './5000-json' # 多进程保存新的txt extract_dicts_parallel(json_dir, 20)
标签:list,json,dict,result,key,extract From: https://www.cnblogs.com/chentiao/p/17117000.html