首页 > 其他分享 >模型蒸馏的代码

模型蒸馏的代码

时间:2024-06-21 15:10:42浏览次数:27  
标签:蒸馏 模型 代码 batch self student tf data teacher

模型蒸馏
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, f1_score
from sklearn.metrics import roc_auc_score
import numpy as np
import random

# 设置 Python 的随机种子
seed_value = 42
np.random.seed(seed_value)
random.seed(seed_value)

# 设置 TensorFlow 的全局随机种子
tf.random.set_seed(seed_value)


def action_recall_accuracy(y_pred, y_true):
    cm = confusion_matrix(y_true, y_pred)

    # 计算每个类别的准确率和召回率
    num_classes = cm.shape[0]
    accuracy = []
    recall = []

    for i in range(num_classes):
        # 计算准确率:预测正确的样本数 / 实际属于该类别的样本数
        acc = cm[i, i] / sum(cm[i, :])
        accuracy.append(acc)

        # 计算召回率:预测正确的样本数 / 预测为该类别的样本数
        rec = cm[i, i] / sum(cm[:, i])
        recall.append(rec)

    # 打印结果
    for i in range(num_classes):
        print(f"类别 {i} 的准确率: {accuracy[i]:.3f}")
        print(f"类别 {i} 的召回率: {recall[i]:.3f}")

    scores = []

    for i in range(num_classes):
        # 计算F1分数
        f1 = f1_score(y_true, y_pred, average=None)[i]
        scores.append(f1)

        # 打印F1分数
        print(f"类别 {i} 的F1分数: {scores[i]:.3f}")

    # 打印各类别F1-score的平均值
    average_f1 = sum(scores) / len(scores)
    print(f"各类别F1-score的平均值: {average_f1:.3f}")


# 读取训练文件
train_data = pd.read_csv('train_new.csv')

# 将特征与标签分开
X = train_data.drop('label', axis=1)
y = train_data['label']

# 数据预处理
X = X.astype('float32')
y = y.astype('int32')

# 划分训练集和验证集
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.001, random_state=42)

# 读取测试文件
test_data = pd.read_csv('test_new.csv')

# 对测试样本进行预测
X_test = test_data.drop('label', axis=1).astype('float32')
true_labels = test_data['label'].astype('int32')


def custom_loss(y_true, y_pred, soft_labels, T, rate):
    # Adjust the predictions and soft_labels with temperature T
    y_pred_T = tf.nn.softmax(tf.keras.layers.Lambda(lambda x: x / T)(y_pred))
    soft_labels_T = tf.nn.softmax(tf.keras.layers.Lambda(lambda x: x / T)(soft_labels))

    # Compute the binary cross-entropy loss for hard labels
    hard_loss = tf.keras.losses.binary_crossentropy(tf.reshape(y_true, (-1, 1)), y_pred)

    # Compute the binary cross-entropy loss for soft labels, scaled by T^2
    soft_loss = tf.keras.losses.binary_crossentropy(soft_labels_T, y_pred_T) * (T ** 2)

    return hard_loss * rate + soft_loss * (1 - rate)


optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=True,
                                     name='Adam')
optimizer_teacher = tf.keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=True,
                                             name='Adam')


@tf.function
def train_step(batch_X_student, batch_X_teacher, model_student, model_teacher, batch_y, T, rate):
    with tf.GradientTape(persistent=True) as tape:
        predictions = model_student(batch_X_student, training=True)
        batch_soft_labels = model_teacher(batch_X_teacher, training=True)
        loss = custom_loss(batch_y, predictions, batch_soft_labels, T=T, rate=rate)
        teacher_loss = tf.keras.losses.binary_crossentropy(tf.reshape(batch_y, (-1, 1)), batch_soft_labels)

    gradients = tape.gradient(loss, model_student.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model_student.trainable_variables))

    gradients_teacher = tape.gradient(teacher_loss, model_teacher.trainable_variables)
    optimizer_teacher.apply_gradients(zip(gradients_teacher, model_teacher.trainable_variables))

    del tape  # 删除持久化tape以释放资源
    return loss, teacher_loss


def build_student_model():
    # 构建学生模型
    student_input = tf.keras.layers.Input(shape=(60,), name='relevance_input', dtype='float32')
    x = tf.keras.layers.Dense(128, activation='relu')(student_input)
    x = tf.keras.layers.Dense(64, activation='relu')(x)
    x = tf.keras.layers.Dense(32, activation='relu')(x)
    outputs = tf.keras.layers.Dense(1, activation='sigmoid', name='student_output')(x)
    student_model = tf.keras.Model(inputs=student_input, outputs=outputs)
    return student_model


def build_teacher_model():
    # 构建教师模型
    teacher_input = tf.keras.layers.Input(shape=(124,), name='relevance_input', dtype='float32')
    x = tf.keras.layers.Dense(128, activation='relu')(teacher_input)
    x = tf.keras.layers.Dense(64, activation='relu')(x)
    x = tf.keras.layers.Dense(32, activation='relu')(x)
    outputs = tf.keras.layers.Dense(1, activation='sigmoid', name='teacher_output')(x)
    teacher_model = tf.keras.Model(inputs=teacher_input, outputs=outputs)
    return teacher_model


class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, X_data, y_data, batch_size):
        self.X_data_student = X_data.iloc[:, :60]
        self.X_data_teacher = X_data
        self.y_data = y_data
        self.batch_size = batch_size
        # self.on_epoch_end()  # Shuffle data at the end of each epoch

    def __len__(self):
        return int(np.ceil(len(self.y_data) / self.batch_size))

    def __getitem__(self, index):
        batch_X_student = self.X_data_student[index * self.batch_size:(index + 1) * self.batch_size].to_numpy()
        batch_X_teacher = self.X_data_teacher[index * self.batch_size:(index + 1) * self.batch_size].to_numpy()
        batch_y = self.y_data[index * self.batch_size:(index + 1) * self.batch_size].to_numpy()
        return batch_X_student, batch_X_teacher, batch_y

    # def on_epoch_end(self):
    #     # Shuffle the data
    #     indices = np.arange(len(self.y_data))
    #     np.random.shuffle(indices)
    #     self.X_data_student = self.X_data_student.iloc[indices]
    #     self.X_data_teacher = self.X_data_teacher.iloc[indices]
    #     self.y_data = self.y_data.iloc[indices]


# 创建数据生成器
train_generator = DataGenerator(X_train, y_train, batch_size=16)

# 构建teacher和student模型
student_model = build_student_model()
teacher_model = build_teacher_model()

# loss中温度参数T和损失比例参数的调参
t_list = [1, 2, 3, 4, 5, 6, 7,8,0.5]
rate_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
for t in t_list:
    for rate in rate_list:
        print("当前的参数T和rate值为:", t, rate)
        for epoch in range(11):
            print("...............................epoch:", epoch)
            # 使用自定义数据生成器进行训练
            for batch_X_student, batch_X_teacher, batch_y in train_generator:
                loss, teacher_loss = train_step(batch_X_student, batch_X_teacher, student_model, teacher_model, batch_y, t,
                                                rate)

            # # 在每个epoch之后打乱数据
            # train_generator.on_epoch_end()

            predictions = student_model.predict(X_test.iloc[:, :60])
            auc = roc_auc_score(list(np.array(true_labels)), predictions[:, 0])
            print(f"Testing AUC: {auc}")

            # 使用最佳阈值进行预测
            pred_labels = [int(i > 0.5) for i in predictions[:, 0]]
            true_labels_list = list(np.array(true_labels))
            action_recall_accuracy(pred_labels, true_labels_list)

            if epoch == 10:
                student_model.save(f"./dnn2_student_model", save_format='tf')

        print("||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||")

 

标签:蒸馏,模型,代码,batch,self,student,tf,data,teacher
From: https://www.cnblogs.com/qiaoqifa/p/18260555

相关文章

  • bert_dnn的代码
    importosos.environ["CUDA_VISIBLE_DEVICES"]="2"importtensorflowastffromsklearn.model_selectionimporttrain_test_splitfromtransformersimportBertTokenizer,TFBertModelfromtransformersimportRobertaTokenizer,TFRobertaMod......
  • bert分类的代码
    importosos.environ["CUDA_VISIBLE_DEVICES"]="0"importtensorflowastffromsklearn.model_selectionimporttrain_test_splitfromtransformersimportBertTokenizer,TFBertModelfromtransformersimportRobertaTokenizer,TFRobertaMod......
  • Stable Diffusion一键安装教程含大量关键词模型包
    目前主流AI绘画平台主要有三种:MidjourneyStableDiffusionDALL·E相比较而言StableDiffusion。可以本地化不需要money不占用网络StableDiffusion下载地址想要Stablediffusion安装包的小伙伴可以在文末扫码,我给大家免费安排!1,电脑配置由于是将StableDiffusio......
  • 《大数据智能风控 模型、数据和业务实践》导读
    大数据风控是什么大数据风控是指利用大数据技术对风险进行识别、评估、监控和控制的过程。它通过收集和分析大量的数据,包括结构化数据(如交易记录、信用报告)和非结构化数据(如社交媒体信息、网络行为),来预测和防范潜在的风险。大数据风控必要性:提高风险识别能力:大数据风控......
  • 帮企商城10合一万能DIY分销商城小程序源码系统 带源代码包+搭建部署教程
    系统概述这是一款集多种功能于一体的源码系统,旨在为用户提供一站式的商城解决方案。它不仅支持小程序端,还能与其他平台无缝对接,满足不同用户的需求。代码示例系统特色功能一览   1.万能DIY功能:用户可以根据自己的需求和创意,自由定制商城的外观、布局和功能模块,打造......
  • 超级会员卡积分收银系统源码 带完整的安装代码包以及搭建部署教程
    系统概述超级会员卡积分收银系统源码是一款专为商业运营打造的综合性软件解决方案。它集成了会员卡管理、积分管理、收银管理等多种功能,旨在为企业提供高效、便捷、准确的运营管理工具。该系统源码采用先进的技术架构,具有良好的稳定性和扩展性,能够适应不同规模和类型的企业需......
  • 百度在线分销商城小程序源码系统 分销+会员组+新用户福利 前后端分离 带完整的安装代
    系统概述百度在线分销商城小程序源码系统是一款集分销、会员组管理和新用户福利于一体的前后端分离的系统。它采用先进的技术架构,确保系统的稳定性、高效性和安全性。该系统的前端基于小程序开发,为用户提供了便捷的购物体验和交互界面。用户可以通过小程序轻松浏览商品、下单......
  • Anthropic 推出 Claude 3.5 Sonnet,称新模型优于 GPT-4 Omni
    近日,人工智能研究公司Anthropic宣布推出其最新的语言模型Claude3.5Sonnet,并声称该模型在多个方面优于OpenAI的GPT-4Omni。这一消息在人工智能领域引起了广泛关注和讨论。本文将详细介绍Claude3.5Sonnet的技术特点、应用前景以及其与GPT-4Omni的对比。http://www.z......
  • 课程设计——基于FPGA的交通红绿灯控制系统(源代码)
    摘要:        本课程设计旨在设计一个基于FPGA(现场可编程门阵列)的交通红绿灯控制系统。该系统模拟了实际道路交叉口的红绿灯工作场景,通过硬件描述语言(如Verilog或VHDL)编写源代码实现。系统包含三个主要部分:红绿灯显示模块、计时控制模块以及状态切换模块。红绿灯显示模......
  • 用Python执行JavaScript代码,这些方法你不可不知!
    目录1、PyExecJS:轻量级桥梁......