点击查看代码
import jieba
import wordcloud
def takeSecond(elem):
return elem[1]
def createWordCloud(text): #生成词云函数
w=wordcloud.WordCloud(font_path="STZHONGS.TTF", width=1000, height=500, background_color="white")
w.generate(text)
if __name__=='__main__':
path = r"红楼梦.txt"
file = open(path, "r", encoding="utf-8")
text = file.read()
print(text)
file.close()
words = jieba.lcut(text) # jieba分词
counts = {}
for word in words: # 如果词长度不为1就留起来 (不是单个的字就留起来)
if len(word) == 1:
continue
else:
counts[word] = counts.get(word, 0) + 1
fourStopwords = open(r"cn_stopwords.txt", "r", encoding='utf-8').read() # 打开中文停词表
StopWord = fourStopwords.split("\n")
for delWord in StopWord:
try:
del counts[delWord]
except:
continue
items = list(counts.items())
items.sort(key=takeSecond, reverse=True)
print(items)