模型蒸馏的代码

标签：蒸馏模型代码 batch self student tf data teacher
模型蒸馏
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, f1_score
from sklearn.metrics import roc_auc_score
import numpy as np
import random

# 设置 Python 的随机种子
seed_value = 42
np.random.seed(seed_value)
random.seed(seed_value)

# 设置 TensorFlow 的全局随机种子
tf.random.set_seed(seed_value)


def action_recall_accuracy(y_pred, y_true):
    cm = confusion_matrix(y_true, y_pred)

    # 计算每个类别的准确率和召回率
    num_classes = cm.shape[0]
    accuracy = []
    recall = []

    for i in range(num_classes):
        # 计算准确率：预测正确的样本数 / 实际属于该类别的样本数
        acc = cm[i, i] / sum(cm[i, :])
        accuracy.append(acc)

        # 计算召回率：预测正确的样本数 / 预测为该类别的样本数
        rec = cm[i, i] / sum(cm[:, i])
        recall.append(rec)

    # 打印结果
    for i in range(num_classes):
        print(f"类别 {i} 的准确率: {accuracy[i]:.3f}")
        print(f"类别 {i} 的召回率: {recall[i]:.3f}")

    scores = []

    for i in range(num_classes):
        # 计算F1分数
        f1 = f1_score(y_true, y_pred, average=None)[i]
        scores.append(f1)

        # 打印F1分数
        print(f"类别 {i} 的F1分数: {scores[i]:.3f}")

    # 打印各类别F1-score的平均值
    average_f1 = sum(scores) / len(scores)
    print(f"各类别F1-score的平均值: {average_f1:.3f}")


# 读取训练文件
train_data = pd.read_csv('train_new.csv')

# 将特征与标签分开
X = train_data.drop('label', axis=1)
y = train_data['label']

# 数据预处理
X = X.astype('float32')
y = y.astype('int32')

# 划分训练集和验证集
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.001, random_state=42)

# 读取测试文件
test_data = pd.read_csv('test_new.csv')

# 对测试样本进行预测
X_test = test_data.drop('label', axis=1).astype('float32')
true_labels = test_data['label'].astype('int32')


def custom_loss(y_true, y_pred, soft_labels, T, rate):
    # Adjust the predictions and soft_labels with temperature T
    y_pred_T = tf.nn.softmax(tf.keras.layers.Lambda(lambda x: x / T)(y_pred))
    soft_labels_T = tf.nn.softmax(tf.keras.layers.Lambda(lambda x: x / T)(soft_labels))

    # Compute the binary cross-entropy loss for hard labels
    hard_loss = tf.keras.losses.binary_crossentropy(tf.reshape(y_true, (-1, 1)), y_pred)

    # Compute the binary cross-entropy loss for soft labels, scaled by T^2
    soft_loss = tf.keras.losses.binary_crossentropy(soft_labels_T, y_pred_T) * (T ** 2)

    return hard_loss * rate + soft_loss * (1 - rate)


optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=True,
                                     name='Adam')
optimizer_teacher = tf.keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=True,
                                             name='Adam')


@tf.function
def train_step(batch_X_student, batch_X_teacher, model_student, model_teacher, batch_y, T, rate):
    with tf.GradientTape(persistent=True) as tape:
        predictions = model_student(batch_X_student, training=True)
        batch_soft_labels = model_teacher(batch_X_teacher, training=True)
        loss = custom_loss(batch_y, predictions, batch_soft_labels, T=T, rate=rate)
        teacher_loss = tf.keras.losses.binary_crossentropy(tf.reshape(batch_y, (-1, 1)), batch_soft_labels)

    gradients = tape.gradient(loss, model_student.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model_student.trainable_variables))

    gradients_teacher = tape.gradient(teacher_loss, model_teacher.trainable_variables)
    optimizer_teacher.apply_gradients(zip(gradients_teacher, model_teacher.trainable_variables))

    del tape  # 删除持久化tape以释放资源
    return loss, teacher_loss


def build_student_model():
    # 构建学生模型
    student_input = tf.keras.layers.Input(shape=(60,), name='relevance_input', dtype='float32')
    x = tf.keras.layers.Dense(128, activation='relu')(student_input)
    x = tf.keras.layers.Dense(64, activation='relu')(x)
    x = tf.keras.layers.Dense(32, activation='relu')(x)
    outputs = tf.keras.layers.Dense(1, activation='sigmoid', name='student_output')(x)
    student_model = tf.keras.Model(inputs=student_input, outputs=outputs)
    return student_model


def build_teacher_model():
    # 构建教师模型
    teacher_input = tf.keras.layers.Input(shape=(124,), name='relevance_input', dtype='float32')
    x = tf.keras.layers.Dense(128, activation='relu')(teacher_input)
    x = tf.keras.layers.Dense(64, activation='relu')(x)
    x = tf.keras.layers.Dense(32, activation='relu')(x)
    outputs = tf.keras.layers.Dense(1, activation='sigmoid', name='teacher_output')(x)
    teacher_model = tf.keras.Model(inputs=teacher_input, outputs=outputs)
    return teacher_model


class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, X_data, y_data, batch_size):
        self.X_data_student = X_data.iloc[:, :60]
        self.X_data_teacher = X_data
        self.y_data = y_data
        self.batch_size = batch_size
        # self.on_epoch_end()  # Shuffle data at the end of each epoch

    def __len__(self):
        return int(np.ceil(len(self.y_data) / self.batch_size))

    def __getitem__(self, index):
        batch_X_student = self.X_data_student[index * self.batch_size:(index + 1) * self.batch_size].to_numpy()
        batch_X_teacher = self.X_data_teacher[index * self.batch_size:(index + 1) * self.batch_size].to_numpy()
        batch_y = self.y_data[index * self.batch_size:(index + 1) * self.batch_size].to_numpy()
        return batch_X_student, batch_X_teacher, batch_y

    # def on_epoch_end(self):
    #     # Shuffle the data
    #     indices = np.arange(len(self.y_data))
    #     np.random.shuffle(indices)
    #     self.X_data_student = self.X_data_student.iloc[indices]
    #     self.X_data_teacher = self.X_data_teacher.iloc[indices]
    #     self.y_data = self.y_data.iloc[indices]


# 创建数据生成器
train_generator = DataGenerator(X_train, y_train, batch_size=16)

# 构建teacher和student模型
student_model = build_student_model()
teacher_model = build_teacher_model()

# loss中温度参数T和损失比例参数的调参
t_list = [1, 2, 3, 4, 5, 6, 7,8,0.5]
rate_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
for t in t_list:
    for rate in rate_list:
        print("当前的参数T和rate值为：", t, rate)
        for epoch in range(11):
            print("...............................epoch:", epoch)
            # 使用自定义数据生成器进行训练
            for batch_X_student, batch_X_teacher, batch_y in train_generator:
                loss, teacher_loss = train_step(batch_X_student, batch_X_teacher, student_model, teacher_model, batch_y, t,
                                                rate)

            # # 在每个epoch之后打乱数据
            # train_generator.on_epoch_end()

            predictions = student_model.predict(X_test.iloc[:, :60])
            auc = roc_auc_score(list(np.array(true_labels)), predictions[:, 0])
            print(f"Testing AUC: {auc}")

            # 使用最佳阈值进行预测
            pred_labels = [int(i > 0.5) for i in predictions[:, 0]]
            true_labels_list = list(np.array(true_labels))
            action_recall_accuracy(pred_labels, true_labels_list)

            if epoch == 10:
                student_model.save(f"./dnn2_student_model", save_format='tf')

        print("||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||")
标签：蒸馏,模型,代码,batch,self,student,tf,data,teacher
From： https://www.cnblogs.com/qiaoqifa/p/18260555
相关文章

赞助商

阅读排行