import jieba from collections import Counter import wordcloud # 读取文件 withopen("词频统计_AIjob.csv", "r", encoding="utf-8") as f: desc = f.read()
# 加载停用词列表 stop_words = [] withopen("stopwords.txt", "r",encoding='utf-8') as f: for line in f: stop_words.append(line.strip())
jieba.load_userdict("人工智能词汇.txt") # 分词 words = jieba.cut(desc, cut_all=False) # 过滤停用词 filtered_words = [] for word in words: if word notin stop_words andlen(word) > 1: filtered_words.append(word)
# 统计词频 word_counts = Counter(filtered_words)
w100=word_counts.most_common(500) # 使用字典推导将列表转换为字典 dict_result = {key: value for key, value in w100}
from PIL import Image import numpy as np import matplotlib.pyplot as plt from scipy.ndimage import gaussian_gradient_magnitude from wordcloud import WordCloud, ImageColorGenerator defpic_wordcloud(dict_result,img_path,out_path):
# create mask white is "masked out" parrot_mask = parrot_color.copy() parrot_mask[parrot_mask.sum(axis=2) == 0] = 255
edges = np.mean([gaussian_gradient_magnitude(parrot_color[:, :, i] / 255., 2) for i inrange(3)], axis=0) parrot_mask[edges > .08] = 255
# acurately but it makes a better picture wc = WordCloud(max_words=1000, mask=parrot_mask, max_font_size=40, random_state=42, font_path=r"C:\Users\10921\AppData\Local\Microsoft\Windows\Fonts\方正正准黑简体.ttf",relative_scaling=0, #width=1920, height=1080 )
# generate word cloud wc.generate_from_frequencies(dict_result) # plt.imshow(wc)