import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, Subset
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score, average_precision_score, recall_score
from sklearn.model_selection import KFold
import numpy as np
import pandas as pd
from imblearn.over_sampling import RandomOverSampler
# 1. 数据集类定义
#想和大神交朋友或想软件开发兼职接项目,
#请通过手机端搜小#程#序: "黄页小艺"或公#众#号:"卧看星河"。
class CircrnaDataset(Dataset):
def __init__(self, circ_sim_path, disease_sim_path, assoc_path):
self.circ_sim = pd.read_csv(circ_sim_path, index_col=0).values
self.disease_sim = pd.read_csv(disease_sim_path, index_col=0).values
self.association = pd.read_csv(assoc_path, index_col=0).values
assert self.association.shape == (self.circ_sim.shape[0], self.disease_sim.shape[0]), \
f"关联矩阵的大小 {
self.association.shape} 与 circRNA 和疾病矩阵 {
self.circ_sim.shape}, {
self.disease_sim.shape} 不匹配"
circ_features = np.eye(self.circ_sim.shape[0])
disease_features = np.eye(self.disease_sim.shape[0])
disease_features = np.pad(disease_features, ((0, 0), (0, circ_features.shape[1] - disease_features.shape[1])), mode='constant')
self.circ_features = circ_features
self.disease_features = disease_features
self.labels = self.association.flatten()
self.features, self.labels = self.balance_data(self.circ_features, self.disease_features, self.labels)
def balance_data(self, circ_features, disease_features, labels):
features = []
for i in range(circ_features.shape[0]):
for j in range(disease_features
标签:features,Python,模型,disease,self,circ,shape,circRNA,sim
From: https://blog.csdn.net/huanghm88/article/details/142994011