import pandas as pd import numpy as np from sklearn.linear_model import LinearRegression import matplotlib.pyplot as plt import seaborn as sns #data = np.array([[3, -1.5, 2, -5.4], [0, 4, -0.3, 2.1], [1, 3.3, -1.9, -4.3]]) #对数据的预处理 求均值 标准差 标准化 ''' print(data) print("Mean: ",data.mean(axis = 0)) #main函数用于返回 算数平均数 #axis声明了函数计算所作用的数据轴 (0表示列, 1表示行) print("Standard Deviation: ",data.std(axis = 0)) #std用于返回标准差 data_standardized = preprocessing.scale(data) #scale 函数对数据进行标准化操作 即:以均值为中心点,并调整大小得到单位方差 print("Mean: ",data_standardized.mean(axis = 0)) print("Standard Deviation: ",data_standardized.std(axis = 0)) ''' #数据缩放 #先看看未缩放前: ''' print("Min: ", data.min(axis = 0)) print("Max: ", data.max(axis = 0)) data_scaler = preprocessing.MinMaxScaler(feature_range = (0, 1)) data_scaled = data_scaler.fit_transform(data) #这里是缩放之后的: print("Min: ", data_scaled.min(axis = 0)) print("Max: ", data_scaled.max(axis = 0)) print(data_scaled) ''' #归一化(失败了) ''' data_normalized = preprocessing.normalize(data, axis = 0) #书上代码是这样 但这样会报错…… data_normalized = preprocessing.normalize(data, norm = '11', axis = 0) print(data_normalized) #校验数组各列数据总和是否为1 data_norm_abs = np.abs(data_normalized) print(data_norm_abs.sum(axis = 0)) ''' #二值化(用于数字图像处理领域) ''' data_binarized = preprocessing.Binarizer(threshold = 1.4).transform(data) print(data_binarized) ''' #one-hot编码(用于稀疏的数据) ''' data = np.array([[1, 1, 2], [0, 2, 3], [1, 0, 1], [0, 1, 0]]) print(data) encoder = preprocessing.OneHotEncoder() encoder.fit(data) encoded_vector = encoder.transform([[1, 2, 3]]).toarray() print(encoded_vector) ''' #读取csv后缀的文件 ''' data = pd.read_csv('VehiclesItaly.csv') data.head() data.info() x = data[['x']] y = data[['y']] lf = LinearRegression() lf.fit(x,y) print(lf.coef_) print(lf.intercept_) pred = pd.DataFrame(lf.predict(x)) print(pred.columns.values) sns.relplot(x='x',y='y',data=data) sns.lineplot(x=data['x'],y=pred[0]) '''
标签:lf,机器,day1,学习,preprocessing,print,import,data,axis From: https://www.cnblogs.com/fighting-huihui/p/17437066.html