import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.manifold import TSNE
import plotly.express as px
# 1. 加载数据并转化为DataFrame
data = pd.read_csv('ma2.csv', header=None)
# 假设第一列至第三列分别为年龄聚类、城市聚类和满意度体验聚类
features_df = data.iloc[:, :3]
# 2. 对类别特征进行独热编码
ohc = OneHotEncoder(sparse=False)
encoded_data = ohc.fit_transform(features_df)
# 3. 使用t-SNE进行降维
tsne_model = TSNE(n_components=2, random_state=42)
transformed_data = tsne_model.fit_transform(encoded_data)
# 将原始分类信息与降维后的数据结合
combined_data = pd.DataFrame(transformed_data, columns=['Dim_1', 'Dim_2'])
combined_data['Satisfaction'] = features_df.iloc[:, 2]
combined_data['Age'] = features_df.iloc[:, 0]
combined_data['City'] = features_df.iloc[:, 1]
# 4. 创建交互式散点图
fig = px.scatter(combined_data,
x='Dim_1',
y='Dim_2',
color='Satisfaction',
hover_data=['Age', 'City'],
title='t-SNE Visualization with Combined Features Colors (Interactive)',
labels={'Satisfaction': '满意度', 'Age': '年龄聚类', 'City': '城市聚类'})
fig.show()
标签:features,df,py,combined,聚类,iloc,data From: https://www.cnblogs.com/azwz/p/18084233