|
# 用到的库
from sklearn.manifold import TSNE
from bokeh.io import output_notebook
from bokeh.models import HoverTool
from bokeh.plotting import figure, show, ColumnDataSource
# 画图相关代码
# 作者-主题对应图t-SNE
# tsne(聚类结果可视化工具)提供了一种有效的降维方式,对高于2维数据的聚类结果以二维方式展示
# 参数smallest_author可以将一些长尾作者剔除
tsne = TSNE(n_components=2, random_state=0)
smallest_author = 0 # Ignore authors with documents less than this.
authors = [at_model.author2id[a] for a in at_model.author2id.keys() if len(at_model.author2doc[a]) >= smallest_author]
_ = tsne.fit_transform(at_model.state.gamma[authors, :]) # Result stored in tsne.embedding_
# Tell Bokeh to display plots inside the notebook.
# 输出到电脑屏幕上
output_notebook()
x = tsne.embedding_[:, 0]
y = tsne.embedding_[:, 1]
author_names = [at_model.id2author[a] for a in authors]
# Radius of each point corresponds to the number of documents attributed to that author.
# 每个点的半径对应于该作者的文档数量。
scale = 0.1
author_sizes = [len(at_model.author2doc[a]) for a in author_names]
radii = [size * scale for size in author_sizes]
source = ColumnDataSource(
data=dict(
x=x,
y=y,
author_names=author_names,
author_sizes=author_sizes,
radii=radii,
)
)
# Add author names and sizes to mouse-over info.
hover = HoverTool(
tooltips=[
("author", "@author_names"),
("size", "@author_sizes"),
]
)
p = figure(tools=[hover, 'crosshair,pan,wheel_zoom,box_zoom,reset,save,lasso_select'])
# 画散点图
p.scatter('x', 'y', radius='radii', source=source, fill_alpha=0.6, line_color=None)
show(p)
|
|