首页 > 其他分享 >基于pyspark的随机森林

基于pyspark的随机森林

时间:2023-02-02 10:32:39浏览次数:47  
标签:基于 27 15 14 22 pyspark 13 随机 2.5


关于随机森林的定义就不赘叙

import findspark
from numpy import frompyfunc
from pyspark.ml import classification
from pyspark.sql.functions import spark_partition_id
findspark.init()
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName('test1').getOrCreate()
df = spark.read.csv('D:/PythonCode/Python_dataming/pythons/.vscode/机器学习/affairs.csv',inferSchema=True,header=True)
print(df.count())
print(df.printSchema())
from pyspark.ml.feature import VectorAssembler
df_ass=VectorAssembler(inputCols=['rate_marriage','age','yrs_married','children','religious'],outputCol='features')
df= df_ass.transform(df)
print(df.printSchema())
model_df = df.select(['features','affairs'])
train_df,test_df= model_df.randomSplit([0.75,0.25])
from pyspark.ml.classification import RandomForestClassificationModel, RandomForestClassifier
rf= RandomForestClassifier(labelCol='affairs',numTrees=50).fit(train_df)
rf_pre=rf.transform(test_df)
print(rf_pre.show(100))
from pyspark.ml.evaluation import MulticlassClassificationEvaluator
from pyspark.ml.evaluation import BinaryClassificationEvaluator
rf_acc=MulticlassClassificationEvaluator(labelCol='affairs',metricName='accuracy').evaluate(rf_pre)
print(rf_acc)
rf_auc = MulticlassClassificationEvaluator(labelCol='affairs',metricName='weightedPrecision').evaluate(rf_pre)
print(rf_auc)
print(rf.featureImportances)
from pyspark.ml.classification import RandomForestClassificationModel
#rf.save('./model')
#rf=RandomForestClassificationModel.load('./model')
#new_pre= rf.transform(new_df)
rate_marriage,age,yrs_married,children,religious,affairs
5,32,6,1,3,0
4,22,2.5,0,2,0
3,32,9,3,3,1
3,27,13,3,1,1
4,22,2.5,0,1,1
4,37,16.5,4,3,1
5,27,9,1,1,1
4,27,9,0,2,1
5,37,23,5.5,2,1
5,37,23,5.5,2,1
3,22,2.5,0,2,1
3,27,6,0,1,1
2,27,6,2,1,1
5,27,6,2,3,1
3,37,16.5,5.5,1,1
5,27,6,0,2,1
4,22,6,1,1,1
4,37,9,2,2,1
4,27,6,1,1,1
1,37,23,5.5,4,1
2,42,23,2,2,1
4,37,6,0,2,1
5,22,2.5,0,2,1
3,37,16.5,5.5,2,1
3,42,23,5.5,3,1
2,27,9,2,4,1
4,27,6,1,2,1
5,27,2.5,0,3,1
2,27,6,2,2,1
5,37,13,1,3,1
2,32,16.5,2,2,1
3,27,6,1,1,1
3,32,16.5,4,3,1
3,27,9,2,1,1
3,37,16.5,3,3,1
4,32,16.5,5.5,4,1
5,42,16.5,4,3,1
3,27,9,2,2,1
3,17.5,0.5,0,1,1
4,42,23,5.5,2,1
5,37,16.5,3,3,1
4,22,2.5,1,2,1
4,27,2.5,0,2,1
4,22,2.5,0,2,1
4,37,13,3,2,1
4,22,2.5,0,2,1
4,22,2.5,0,1,1
5,22,2.5,0,3,1
5,22,2.5,0,3,1
3,42,23,4,3,1
5,32,13,3,3,1
5,22,6,2,2,1
3,27,2.5,1,4,1
2,42,23,3,3,1
4,22,2.5,0,1,1
2,42,23,3,3,1
4,42,23,2,2,1
4,42,23,3,3,1
4,37,16.5,2,3,1
4,27,2.5,0,2,1
2,32,9,2,2,1
4,42,13,0,1,1
4,22,6,2,1,1
5,32,16.5,3,3,1
4,42,13,0,2,1
5,27,9,1,3,1
5,22,6,2,2,1
2,27,16.5,2,3,1
5,37,13,2,1,1
5,27,6,0,2,1
2,27,2.5,1,1,1
5,42,23,5.5,4,1
5,27,6,0,2,1
5,37,16.5,3,2,1
2,32,9,2,2,1
3,37,16.5,5.5,3,1
5,27,6,2,2,1
5,32,16.5,3,1,1
5,27,9,2,1,1
4,22,2.5,0,2,1
5,32,16.5,2,4,1
2,22,6,2,2,1
3,32,13,3,1,1
5,32,16.5,3,2,1
3,27,6,2,1,1
5,22,2.5,0,1,1
3,32,9,2,3,1
3,22,2.5,1,2,1
3,22,2.5,0,2,1
5,27,9,2,1,1
3,42,23,2,2,1
3,37,16.5,3,2,1
5,32,13,2,2,1
1,27,13,2,2,1
5,27,2.5,0,2,1
5,27,9,0,1,1
5,32,13,2,2,1
4,27,9,2,2,1
3,22,2.5,1,3,1
2,37,23,2,2,1
2,27,6,2,1,1
4,27,2.5,1,2,1
3,37,13,5.5,2,1
5,37,16.5,4,1,1
5,22,2.5,0,1,1
4,42,13,0,1,1
5,27,9,1,1,1
4,22,2.5,0,1,1
4,22,2.5,1,2,1
4,27,6,2,3,1
2,32,16.5,1,2,1
3,27,13,2,1,1
4,22,0.5,0,2,1
5,27,6,1,1,1
2,27,9,2,2,1
3,27,9,2,2,1
4,32,16.5,3,3,1
5,22,2.5,0,1,1
4,22,2.5,1,2,1
1,22,2.5,0,2,1
3,32,13,2,1,1


标签:基于,27,15,14,22,pyspark,13,随机,2.5
From: https://blog.51cto.com/u_15063934/6033062

相关文章