用到的模塊
- matplotlib 用來畫圖
- wordcloud 生成詞云
- jieba 中文分詞
- numpy 圖像矩陣處理
- PIL 圖像讀取
推薦使用jupyter-notebook玩耍
簡單的詞云
使用默認參數(shù)
from wordcloud import WordCloud
import jieba
import matplotlib.pyplot as plt
# 設置工作目錄
RUN_PATH = "./word_cloud/"
# 設置字體
FONT = "_fonts/simhei.ttf"
# 設置文檔
FILE_SOURCE = "_source/平凡的世界.txt"
# 文檔讀取
text_raw = open(RUN_PATH + FILE_SOURCE,'r',encoding = 'UTF-8').read()
# 分詞處理
text_jieba = jieba.cut(text_raw,cut_all = True)
text_jieba_space = " ".join(text_jieba)
# 詞云生成
wd_gen = WordCloud(font_path = RUN_PATH + FONT, # 字體
width = 800,
height = 600,
background_color = 'black').generate(text_jieba_space)
# 保存圖片
plt.imsave(RUN_PATH + FILE_SOURCE[8:].replace('.txt','.png'),wd_gen)
# 圖片顯示
plt.imshow(wd_gen)
plt.axis("off")
plt.show()
效果(《平凡的世界》)
平凡的世界.png
帶蒙版的詞云
from wordcloud import WordCloud
import jieba
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
RUN_PATH = "./word_cloud/"
FONT = "_fonts/simhei.ttf"
FILE_SOURCE = "_source/What_I_talk_about_when_I_talk_about_running.txt"
MASK_IMG = "_mask/nike2.jpg"
text_raw = open(RUN_PATH + FILE_SOURCE,'r',encoding = 'UTF-8').read()
text_jieba = jieba.cut(text_raw,cut_all = True)
text_jieba_space = " ".join(text_jieba)
mask_img = np.array(Image.open(RUN_PATH + MASK_IMG))
wd_gen = WordCloud(font_path = RUN_PATH + FONT,
mask = mask_img,
contour_width = 3,
contour_color = 'steelblue',
max_words = 1000,
background_color = 'black').generate(text_jieba_space)
plt.imsave(RUN_PATH + FILE_SOURCE[8:].replace('.txt','.png'),wd_gen)
plt.imshow(wd_gen)
plt.axis("off")
plt.show()
效果(《當我談跑步時我談些什么》)
What_I_talk_about_when_I_talk_about_running.png