import jieba txt = open("红楼梦.txt","r",encoding = 'UTF-8').read() words = jieba.lcut(txt) count = {} for word in words: if len(word) ==1: continue else: count[word] =count.get(word,0) +1 cut = ['什么','一个','我们','那里','你们','如今','说道','知道','起来','姑娘','这里','出来', '他们','众人','自己','一面','只见','两个','没有','怎么','不是','不知','这个','听见', '这样','进来','东西','告诉','就是','咱们','回来','大家','只是','只得','这些','不敢', '丫头','出去','所以','不过','的话','不好','鸳鸯','探春','一时','不能','过来','心里', '银子','如此','今日','几个','二人','答应','还有','只管','说话','这么','一回','那边'] for j in range(60): del count[cut[j]] paixu = list(count.items()) paixu.sort(key=lambda word:word[1],reverse = True) for i in range(20): word,counts = paixu[i] print("{0:<5} {1:>5}".format(word,counts))
标签:count,cut,word,paixu,words,txt,分词 From: https://www.cnblogs.com/wubianxuyu/p/17933728.html