# =============================================
# --*-- coding: utf-8 --*--
# @Time : 2020-06-18
# @Author : AXYZdong
# @CSDN : https://blog.csdn.net/qq_43328313
# @FileName: wc.py
# @Software: Python3.7
# =============================================
import jieba
import re
import collections
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
from PIL import Image
import matplotlib.pyplot as plt
file = open("**群.txt", "r", encoding="utf-8")
one_name = ["***"]
two_name = ["***"]
timepat = re.compile(r"\d{4}-\d{1,2}-\d{1,2}")
#建立颜色数组,可更改颜色
color_list=['#CD853F','#DC143C','#00FF7F','#FF6347','#8B008B','#00FFFF','#0000FF','#8B0000','#FF8C00',
'#1E90FF','#00FF00','#FFD700','#008080','#008B8B','#8A2BE2','#228B22','#FA8072','#808080']
#调用
colormap=colors.ListedColormap(color_list)
## 分析群聊天记录,并通过jieba分词
flag = 0
lines = file.readlines()
one_s = []
two_s = []
for line in lines:
line = line.replace("[图片]", "")
line = line.replace("[表情]", "")
line = line.replace("\n", "")
if flag == "one":
one_s.append(line)
flag = 0
if flag == "two":
two_s.append(line)
flag = 0
if re.search(timepat, line):
for w in one_name:
if w in line:
flag = "one"
break
for w in two_name:
if w in line:
flag = "two"
break
print(len(one_s))
print(len(two_s))
##print(one_s[0:99])
remove_words = [u'的', u',',u'和', u'是', u'随着', u'对于', u'对',u'等',u'能',u'都',u'。',u' ',u'、',u'中',u'在',u'了',
u'通常',u'如果',u'我们',u'需要',u'我',u'你',u'?',u"",u" ",u"就",u"不","啊",
u"吧",u"也",u"不是",u"就是",u"什么",u"怎么",u"这个",u"这么",u"一个"]
k = 0
words = []
for s in two_s:
thelist = jieba.cut(s, cut_all = False)
for word in thelist:
if word not in remove_words:
if len(word) > 1:
words.append(word)
word_counts = collections.Counter(words)
words_top10 = word_counts.most_common(10)
print(words_top10) #打印前10词频
## 制作词云
backgroud_Image = plt.imread('面纱.jpg') #选择背景图片,图片要与.py文件同一目录
print('加载图片成功!')
## 设置词云样式
wc = WordCloud(
background_color='white', # 设置背景颜色
mask=backgroud_Image, # 设置背景图片
font_path='C:\Windows\Fonts\msyhl.ttc', # 若是有中文的话,这句代码必须添加,不然会出现方框,不出现汉字
max_words=2000, # 设置最大现实的字数
stopwords=STOPWORDS, # 设置停用词
colormap=colormap, # 设置文字颜色
max_font_size=150, # 设置字体最大值
random_state=30 # 设置有多少种随机生成状态,即有多少种配色方案
)
wc.generate_from_frequencies(word_counts) #通过频率生成词云
print('开始加载文本')
#img_colors = ImageColorGenerator(backgroud_Image) #改变字体颜色
#wc.recolor(color_func=img_colors) #字体颜色为背景图片的颜色
plt.imshow(wc) # 显示词云图
plt.axis('off') # 是否显示x轴、y轴下标
plt.show()
print('生成词云成功!')
评论