ByteTrak训练自定义训练集

标签：训练自定义 mot train video ByteTrak path os dir

ByteTrack目标追踪训练主要参考的博文是https://blog.csdn.net/Ddddd4431/article/details/126910083

但是这位博主的数据集准备跟我的还有点不一样，他用的是labelimg标注，我用的是Darklabel对视频直接进行标注。而ByteTrak的训练格式是COCO数据集格式。而Darklabel对视频标注生成的是MOT文件。如何使用Darklabel标注视频数据集可下面这篇博文https://blog.csdn.net/qq_61033357/article/details/136331771

1、数据集转换

前面介绍了我是直接对视频进行标注，由于我使用的Darklabel版本最多只能标注100个目标，因此我将我的视频进行裁剪，然后进行标注。标注好的视频会生成对应的MOT文件，并以csv的格式保存你的标注数据。一般MOT文件所包含的信息是[fn, id, x1, y1, w, h, c=-1, c=-1, c=-1, c=-1, cname]。

转换时，先按照一定的比例将标注好的视频和MOT文件分到train和val两个文件夹中

下面是用通义千问写的分类代码：

import os
import random
import shutil

# 定义输入和输出目录
input_dir = ''#定义你自己的路径
output_base_dir = ''#定义你自己的路径

# 定义输出目录
train_dir = os.path.join(output_base_dir, 'train')
val_dir = os.path.join(output_base_dir, 'val')

# 创建输出目录
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

# 定义训练集和验证集的比例
train_ratio = 0.8

# 获取MOT文件和视频文件的列表
mot_files = [f for f in os.listdir(input_dir) if f.startswith('output_') and f.endswith('.csv')]
video_files = [f for f in os.listdir(input_dir) if f.startswith('output_') and f.endswith('.mp4')]

# 确保MOT文件和视频文件数量相同
assert len(mot_files) == len(video_files), "数量不匹配"

# 将MOT文件和视频文件配对
file_pairs = list(zip(mot_files, video_files))

# 混洗文件配对列表以随机分配
random.shuffle(file_pairs)

# 分割文件列表
split_index = int(len(file_pairs) * train_ratio)
train_pairs = file_pairs[:split_index]
val_pairs = file_pairs[split_index:]


# 定义函数来复制文件
def copy_file_pairs(file_pairs, dest_dir):
    for mot_file, video_file in file_pairs:
        # 复制MOT文件
        src_mot_path = os.path.join(input_dir, mot_file)
        dest_mot_path = os.path.join(dest_dir, mot_file)
        shutil.copy(src_mot_path, dest_mot_path)

        # 复制视频文件
        src_video_path = os.path.join(input_dir, video_file)
        dest_video_path = os.path.join(dest_dir, video_file)
        shutil.copy(src_video_path, dest_video_path)


# 复制训练集文件
copy_file_pairs(train_pairs, train_dir)

# 复制验证集文件
copy_file_pairs(val_pairs, val_dir)

print("数据集划分完成！")

分好类后转换为coco数据集训练格式，下面也是用通义千问写的转化代码：

import os
import cv2
import pandas as pd
import json
from sklearn.model_selection import train_test_split

# 定义数据集类别字典
category_dict = {'your class': 1, 'your class': 2,
                  ...}

def read_mot_file(mot_path):
    try:
        # 读取CSV文件并指定列名
        column_names = ['fn', 'id', 'x1', 'y1', 'w', 'h', 'c1', 'c2', 'c3', 'c4', 'cname']
        df = pd.read_csv(mot_path, header=None, names=column_names)

        # 检查是否有缺失值
        if df.isnull().values.any():
            print(f"MOT file {mot_path} contains missing values.")
            return None

        # 返回DataFrame
        return df
    except Exception as e:
        print(f"Failed to read MOT file {mot_path}: {e}")
        return None

def extract_and_save_frames(df, video_path, output_dir):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Failed to open video file: {video_path}")
        return None, None

    # 创建输出目录
    os.makedirs(output_dir, exist_ok=True)

    frame_numbers = df['fn'].unique()
    annotations = []
    image_info = []
    image_id = 0  # 图像ID计数器
    annotation_id = 0  # 注释ID计数器

    video_name = os.path.splitext(os.path.basename(video_path))[0]

    for fn in frame_numbers:
        cap.set(cv2.CAP_PROP_POS_FRAMES, fn - 1)  # OpenCV的帧索引从0开始
        ret, frame = cap.read()
        if not ret:
            print(f"Failed to read frame {fn} from {video_path}")
            continue

        # 构建输出文件名
        output_path = os.path.join(output_dir, f"{video_name}_{fn:06d}.jpg")
        cv2.imwrite(output_path, frame)

        # 记录图像信息
        image_info.append({
            "id": image_id,
            "file_name": f"{video_name}_{fn:06d}.jpg",
            "width": frame.shape[1],
            "height": frame.shape[0]
        })

        # 获取该帧的所有标注信息
        frame_df = df[df['fn'] == fn]

        # 添加标注信息
        for index, row in frame_df.iterrows():
            annotations.append({
                "id": annotation_id,
                "image_id": image_id,
                "category_id": category_dict[row['cname']],
                "bbox": [int(row['x1']), int(row['y1']), int(row['w']), int(row['h'])],
                "area": int(row['w']) * int(row['h']),
                "iscrowd": 0
            })
            annotation_id += 1

        image_id += 1

    cap.release()

    return annotations, image_info

def generate_coco_annotations(annotations, image_info, output_path):
    coco_data = {
        "images": image_info,
        "annotations": annotations,
        "categories": [{"id": v, "name": k} for k, v in category_dict.items()]
    }

    # 写入JSON文件
    with open(output_path, 'w') as f:
        json.dump(coco_data, f, indent=4)

def process_videos(train_dir, val_dir, output_dir):
    # 创建输出目录
    train_output_dir = os.path.join(output_dir, 'train')
    val_output_dir = os.path.join(output_dir, 'val')

    os.makedirs(train_output_dir, exist_ok=True)
    os.makedirs(val_output_dir, exist_ok=True)

    # 创建annotations目录
    annotations_dir = os.path.join(output_dir, 'annotations')
    os.makedirs(annotations_dir, exist_ok=True)

    # 获取所有MOT文件和视频文件的路径
    train_mot_files = [os.path.join(train_dir, f) for f in os.listdir(train_dir) if f.endswith('.csv')]
    train_video_files = [os.path.join(train_dir, f.replace('.csv', '.mp4')) for f in os.listdir(train_dir) if
                         f.endswith('.csv')]

    val_mot_files = [os.path.join(val_dir, f) for f in os.listdir(val_dir) if f.endswith('.csv')]
    val_video_files = [os.path.join(val_dir, f.replace('.csv', '.mp4')) for f in os.listdir(val_dir) if
                       f.endswith('.csv')]

    # 初始化总的图像信息和标注信息列表
    all_train_image_info = []
    all_train_annotations = []
    all_val_image_info = []
    all_val_annotations = []

    # 处理训练集
    for mot_path, video_path in zip(train_mot_files, train_video_files):
        mot_filename = os.path.basename(mot_path)
        video_filename = os.path.basename(video_path)

        if mot_filename.split('.')[0] != video_filename.split('.')[0]:
            print(f"Filename mismatch between MOT file {mot_path} and video file {video_path}.")
            continue

        mot_df = read_mot_file(mot_path)
        if mot_df is not None:
            annotations, image_info = extract_and_save_frames(mot_df, video_path, train_output_dir)
            if annotations and image_info:
                all_train_annotations.extend(annotations)
                all_train_image_info.extend(image_info)

    # 处理验证集
    for mot_path, video_path in zip(val_mot_files, val_video_files):
        mot_filename = os.path.basename(mot_path)
        video_filename = os.path.basename(video_path)

        if mot_filename.split('.')[0] != video_filename.split('.')[0]:
            print(f"Filename mismatch between MOT file {mot_path} and video file {video_path}.")
            continue

        mot_df = read_mot_file(mot_path)
        if mot_df is not None:
            annotations, image_info = extract_and_save_frames(mot_df, video_path, val_output_dir)
            if annotations and image_info:
                all_val_annotations.extend(annotations)
                all_val_image_info.extend(image_info)

    # 生成总的标注文件
    if all_train_image_info and all_train_annotations:
        json_output_path = os.path.join(annotations_dir, 'train.json')
        generate_coco_annotations(all_train_annotations, all_train_image_info, json_output_path)

    if all_val_image_info and all_val_annotations:
        json_output_path = os.path.join(annotations_dir, 'val.json')
        generate_coco_annotations(all_val_annotations, all_val_image_info, json_output_path)

# 示例调用
train_dir = ''#你自己的位置
val_dir = ''#你自己的位置
output_dir = ''#你自己的位置

process_videos(train_dir, val_dir, output_dir)

如果最后你运行出来的结果是下面这样的，那基本上可以训练了~

coco数据集的格式是，annotations文件只保存train.json和val.json，这两个文件记录了你所标注视频的每一帧的所有信息，因为我没有找到直接训练视频的，所找到的信息都是将视频转化成视频帧然后进行训练的，所以上述代码可以直接将视频按照帧号裁剪并将标注信息输入到annotations文件内夹下；train和val两个文件夹保存的是裁剪出来的视频帧！是jpg！

2、训练

打开终端，打开你新建的环境，输入训练代码：

python D:/bytetrack/ByteTrack-main/tools/train.py -f D:/bytetrack/ByteTrack-main/exps/example/mot/tree_yolox_x_ch.py -d 0 -b 3 --fp16 -o -c D:/bytetrack/ByteTrack-main/pretrained/yolox_m.pth

其中，看过文章置顶博文就会明白，mot文件夹下的tree_yolox_x_ch.py文件是你自己的配置文件，我训练的是树，所以是tree开头，具体命名格式根据自己喜好来。

训练截图如下：

3、训练结果保存

训练结束后会保存到一个名为YOLOX_outputs的文件夹下面

txt文件是训练日志，三个压缩文件包类似于yolo训练生成的权重。

4、检测训练结果

检测代码

python D:/bytetrack/ByteTrack-main/tools/demo_track.py video -f D:/bytetrack/ByteTrack-main/exps/example/mot/tree_yolox_x_ch.py --path D:/bytetrack/ByteTrack-main/datasets/xxx.MP4 -c D:/bytetrack/ByteTrack-main/YOLOX_outputs/tree_yolox_x_ch/last_epoch_ckpt.pth.tar --fp16 --fuse --save_result

检测截图

会将输出结果保存到

5、总结

我也是刚开始玩目标追踪不久，也不是专业码农出身哈哈哈哈哈~写这篇博文主要是发现相关训练博文太少了，一是记录自己，二是给别人一个参考~有问题也欢迎讨论哦~

标签：训练,自定义,mot,train,video,ByteTrak,path,os,dir
From： https://blog.csdn.net/2201_75281851/article/details/142177568

ByteTrak训练自定义训练集

相关文章

赞助商

阅读排行