from sklearn.datasets import load_iris
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import lightgbm as lgb
from lightgbm import LGBMClassifier
import numpy as np
import pandas as pd
from sklearn import metrics
import warnings
warnings.filterwarnings("ignore")
iris = load_iris()
X,y = iris.data,iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2022)
gbm = lgb.LGBMClassifier(max_depth=10,
learning_rate=0.01,
n_estimators=2000,#提升迭代次数
objective='multi:softmax',#默认regression,用于设置损失函数
num_class=3 ,
nthread=-1,#LightGBM 的线程数
min_child_weight=1,
max_delta_step=0,
subsample=0.85,
colsample_bytree=0.7,
reg_alpha=0,#L1正则化系数
reg_lambda=1,#L2正则化系数
scale_pos_weight=1,
seed=0,
missing=None)
gbm.fit(X_train, y_train)
y_pred = gbm.predict(X_test)
# 计算准确率
accuracy = accuracy_score(y_test,y_pred)
print("accuarcy: %.2f%%" % (accuracy*100.0))
这里引入了鸢尾花的样本集,前四列为特征,最后一列为分类的标签,这个训练的模型就是用gbm去分类鸢尾花的。
其中比较好用的方法,accuracy_score(y_test,y_pred) 这个函数可以比较两个数组里面元素相同的个数比 配合第二步的print直接转成百分数 好用 。
from sklearn.model_selection import train_test_split 可以直接将数据集分成训练和测试两部分
具体的模型里面的参数我们现在看看:
gbm = lgb.LGBMClassifier(max_depth=10,
learning_rate=0.01,
n_estimators=2000,#提升迭代次数
objective='multi:softmax',#默认regression,用于设置损失函数
num_class=3 ,
nthread=-1,#LightGBM 的线程数
min_child_weight=1,
max_delta_step=0,
subsample=0.85,
colsample_bytree=0.7,
reg_alpha=0,#L1正则化系数
reg_lambda=1,#L2正则化系数
scale_pos_weight=1,
seed=0,
missing=None)
gbm.fit(X_train, y_train)
#使用pickle来保存模型
import pickle
with open('model.pkl', 'wb') as fout:
pickle.dump(gbm, fout)
# load model with pickle to predict
with open('model.pkl', 'rb') as fin:
pkl_bst = pickle.load(fin)
# can predict with any iteration when loaded in pickle way
y_pred = pkl_bst.predict(X_test)
accuracy = accuracy_score(y_test,y_pred)
print("accuarcy: %.2f%%" % (accuracy*100.0))
标签:lightgbm,gbm,笔记,学习,train,test,import,pickle,accuracy From: https://www.cnblogs.com/GY-Zhu/p/16812798.html