針對前段時間爬取的購物狂育兒板塊帖子,用結(jié)巴分詞進行分詞通惫,并排除無意義的停用詞茂翔,并對詞頻結(jié)果生成詞云圖。分析一下大家目前針對小BABY最關(guān)注哪些方面履腋。
import jieba.analyse
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from wordcloud import WordCloud,ImageColorGenerator
f = open('c:/1.txt','r')
text = f.read()
result=jieba.analyse.textrank(text,topK=200,withWeight=True)
keywords = dict()
stopword = ["推薦","求助","請問","知道","兒童","請教","沒有","問題","需要","記錄","大家","分享",
"適合","方法","重慶","有沒有","麻麻","小朋友","看看","牌子","寶媽","攝影",
"問題","開始","地方","時間","小兒","經(jīng)驗","時間","不吃","媽媽","娃兒","孩子",
"爸爸","咨詢","體驗","不能","時候","還有","活動","起來","成長","嬰兒","育兒",
"母嬰","進來","父母","新手","家長","親們","喜歡","東西","東西","出生","妹妹",
"幫忙","小孩","好用","照片","有點","感覺","免費","應(yīng)該","準備","好用","娃娃",
"媽咪","沒得","注意","看到","支招","選擇","購物狂","不會","出來","婆子",
"日記","參加","遇到","辣媽","生育","新生兒","美媽","情況","覺得","發(fā)現(xiàn)",
"臺歷","添加","幼兒","轉(zhuǎn)讓","座椅","了解","歸來","報告","急求","跪求",
"朋友","糾結(jié)","辦法","經(jīng)歷",]
for i in result:
if i[0] in stopword:
pass
else:
keywords[i[0]]=i[1]
print(keywords)
image= Image.open('c:/1.jpg')
graph = np.array(image)
wc = WordCloud(font_path='./fonts/simhei.ttf',background_color='White',max_words=50,mask=graph)
wc.generate_from_frequencies(keywords)
image_color = ImageColorGenerator(graph)
# plt.imshow(wc)
# plt.imshow(wc.recolor(color_func=image_color))
# plt.axis("off")
# plt.show()
#plt.savefig('test.jpg',dpi=600)
wc.to_file('gwk.jpg')
gwk.jpg