1 def getText(): 2 txt = open('I:\Hamlet_-Prince-of-Denmark_哈姆雷特_.txt','r').read() 3 txt = txt.lower() 4 for ch in '!"#$%&()*+,-./:;<=>?@[\\]^_‘{|}~': 5 txt = txt.replace(ch, " ") 6 return txt 7 8 hamletTxt = getText() 9 words = hamletTxt.split() 10 counts = {} 11 for word in words: 12 counts[word] = counts.get(word,0) + 1 13 items = list(counts.items()) 14 items.sort(key=lambda x:x[1], reverse=True) 15 for i in range(10): 16 word, count = items[i] 17 print("{0:<10} {1:>5}".format(word,count))
标签:word,items,Hamlet,词频,counts,txt,统计 From: https://www.cnblogs.com/T-2187803607/p/17137589.html