import os import random import shutil def split_data(source_dir, train_dir, val_dir, test_dir): # 确保目标文件夹存在 os.makedirs(train_dir, exist_ok=True) os.makedirs(val_dir, exist_ok=True) os.makedirs(test_dir, exist_ok=True) # 列出源文件夹中的所有文件 files = os.listdir(source_dir) total_files = len(files) # 计算每个子集的大小 train_size = int(total_files * 0.6) val_size = int(total_files * 0.2) test_size = total_files - train_size - val_size # 随机打乱文件列表 random.shuffle(files) # 将文件分配到各个子集 for file in files: file_path = os.path.join(source_dir, file) if train_size > 0: shutil.copy2(file_path, os.path.join(train_dir, file)) train_size -= 1 elif val_size > 0: shutil.copy2(file_path, os.path.join(val_dir, file)) val_size -= 1 else: shutil.copy2(file_path, os.path.join(test_dir, file)) test_size -= 1 if __name__ == "__main__": source_dir = "frog_dataset" # 替换为源文件夹的路径 train_dir = "./data/train/" # 替换为训练文件夹的路径 val_dir = "./data/val/" # 替换为验证文件夹的路径 test_dir = "./data/test/" # 替换为测试文件夹的路径 split_data(source_dir, train_dir, val_dir, test_dir)
标签:集到,val,train,file,test,dir,size From: https://www.cnblogs.com/cupleo/p/17977164