首页 > 其他分享 >yolov5训练前准备工作(采样、制作数据集、数据集增强、数据集预处理)

yolov5训练前准备工作(采样、制作数据集、数据集增强、数据集预处理)

时间:2023-01-29 14:01:15浏览次数:42  
标签:yolov5 img os find bndbox new path 数据 预处理

写在前面
训练数据可以有多种输入方式,本文提到了其中一种。使用的时候注意工作路径。

使用方法:
收集图片,或使用video_2_jpg.py采样视频,(用cam_video.py拍视频,用get_img.py拍照片)
批量重命名图片,最好都是数字
把图片放在全英文路径下,开始用LabelImg标注,生成xml文件
把标注图片和文件分别放在images、annotations两个文件夹
用data_agumentation.py进行数据增强,同时生成图片和xml
用png_to_jpg.py把可能有的png转化为jpg
把图片放yolov5s-master文件夹的datasets/images文件夹下,把标注放在datasets/annotations下,用split_train_val.py分割测试集和训练集
用voc_label.py把datasets/annotations文件夹下的xml输出为labels文件夹下的txt,把分割数据集输出的转化为路径,这会直接输入到yolo,yolo应该是默认标注在图片目录父目录下的lables文件夹
修改模型yaml文件里的类别数量和名称
修改data的yaml文件,大致如下
path: datasets # dataset root dir
train: train.txt # train images (relative to 'path')
val: val.txt # val images (relative to 'path')
nc: 19 # number of classes
names: [自己的类别名]

代码
下面是相应文件的代码
get_img.py

# coding:utf-8
import cv2
import numpy as np
import time
# 参数为0,调用内置摄像头,如果有其他的摄像头可以调整参数为1,2等
cap = cv2.VideoCapture(1)
cameraMatrix =np.matrix([[804.4703,-4.7160,404.5110],[0,799.1279,351.8036],[0,0,1]])
distCoeffs = np.matrix([[-0.5834],[0.7615],[0.0026],[0.0107],[0]])
R = np.identity(3)

while True:
# 从摄像头读取图片
success, img = cap.read()
t = time.time()
timestamp = int(round(t * 1000)) #毫秒级时间戳
img_path = f"new_data/{timestamp}.jpg"
img_size = img.shape[:2]
newCameraMatrix, _ = cv2.getOptimalNewCameraMatrix(cameraMatrix, distCoeffs, img_size, 1, img_size, 0)
map1, map2 = cv2.initUndistortRectifyMap(cameraMatrix, distCoeffs, R, newCameraMatrix, img_size, cv2.CV_16SC2)
rectified_img = cv2.remap(img, map1, map2, cv2.INTER_LINEAR)
# 显示摄像头
cv2.imshow('----------please enter "s" to take a picture----------', rectified_img)
# 保持画面的持续,无限期等待输入
k = cv2.waitKey(1) # k == 27 通过esc键退出摄像 ESC(ASCII码为27)
if k == 27:
cv2.destroyAllWindows()
break
elif k == ord("s"):
# 通过s键保存图片,并退出。
cv2.imwrite(img_path, rectified_img)
# 关闭摄像头
cap.release()
cv2.destroyAllWindows()

cam_video.py


# -*- coding: UTF-8 -*-
import cv2
import os
import time

cap = cv2.VideoCapture(0)
width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
size = (int(width),int(height))
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # 参数还可以 DIVX,XVID,MJPG,X264,WMV1,WMV2。
path = os.getcwd()
if not(os.path.exists(path)):
os.makedirs(path)

#创建VideoWriter,用于写视频
out = cv2.VideoWriter( path + '\\' + time.strftime(r"%Y-%m-%d_%H-%M-%S",time.localtime()) + '.mp4', fourcc, 24.0, size)

while cap.isOpened():
ret, frame = cap.read()
if not ret:
print("No frame")
break
cv2.imshow('frame', frame)
out.write(frame)
if cv2.waitKey(1) == ord('q'): #按Q键退出
break

cap.release()
out.release()
cv2.destroyAllWindows()


video_2_jpg.py

# -*- coding: UTF-8 -*-
import cv2
import os

filepath = r'./cam_video/'#视频路径
pathDir = os.listdir(filepath)

#如果共同文件夹
save_path = './cam_img_data'
if not os.path.exists(save_path):
os.makedirs(save_path)

cnt = 1
#i = 0
i = 4000
interval = 5

for Dir in pathDir:
video_path =filepath+Dir
if video_path[-4:]!='.mp4':
continue
video_name = Dir[:-4]
print(filepath + video_name + '.mp4')
video = cv2.VideoCapture(filepath + video_name+'.mp4')
if video.isOpened():
rval, frame = video.read()
print('open successfully')
else:
print('fail to open')
continue
#如果一个视频一个文件夹
#if not os.path.exists(filepath + video_name):
# os.makedirs(filepath + video_name)

while rval:
rval, frame = video.read()
if (cnt % interval == 0):
i += 1
#如果一个视频一个文件夹
#cv2.imwrite(filepath + video_name + '/image{}.jpg'.format(i), frame)
#如果共同文件夹
frame = cv2.flip(frame,0)#因为我的相机是倒着装的
try:
cv2.imwrite(save_path + '/01{}.jpg'.format(i), frame)
except:
continue
cnt += 1
video.release()
print('write successfully')


data_agumentation.py

# -*- coding: utf-8 -*-
import xml.etree.ElementTree as ET
import os
import numpy as np
from PIL import Image
import shutil
import imgaug as ia
from imgaug import augmenters as iaa
#【注意】文件命名为数字,xml文件别带中文,用labelimg标注的时候图片路径放在没有中文的地方
ia.seed(42)

def read_xml_annotation(root, image_id):
in_file = open(os.path.join(root, image_id))
tree = ET.parse(in_file)
root = tree.getroot()
bndboxlist = []

for object in root.findall('object'): # 找到root节点下的所有country节点
bndbox = object.find('bndbox') # 子节点下节点rank的值

xmin = int(bndbox.find('xmin').text)
xmax = int(bndbox.find('xmax').text)
ymin = int(bndbox.find('ymin').text)
ymax = int(bndbox.find('ymax').text)
# print(xmin,ymin,xmax,ymax)
bndboxlist.append([xmin, ymin, xmax, ymax])
# print(bndboxlist)

bndbox = root.find('object').find('bndbox')
return bndboxlist


# (506.0000, 330.0000, 528.0000, 348.0000) -> (520.4747, 381.5080, 540.5596, 398.6603)
def change_xml_annotation(root, image_id, new_target):
new_xmin = new_target[0]
new_ymin = new_target[1]
new_xmax = new_target[2]
new_ymax = new_target[3]

in_file = open(os.path.join(root, str(image_id) + '.xml')) # 这里root分别由两个意思
tree = ET.parse(in_file)
xmlroot = tree.getroot()
object = xmlroot.find('object')
bndbox = object.find('bndbox')
xmin = bndbox.find('xmin')
xmin.text = str(new_xmin)
ymin = bndbox.find('ymin')
ymin.text = str(new_ymin)
xmax = bndbox.find('xmax')
xmax.text = str(new_xmax)
ymax = bndbox.find('ymax')
ymax.text = str(new_ymax)
tree.write(os.path.join(root, str("%06d" % (str(id) + '.xml'))))


def change_xml_list_annotation(root, image_id, new_target, saveroot, id):
in_file = open(os.path.join(root, str(image_id) + '.xml')) # 这里root分别由两个意思
tree = ET.parse(in_file)
elem = tree.find('filename')
elem.text = (str("%06d" % int(id)) + '.jpg')
xmlroot = tree.getroot()
index = 0

for object in xmlroot.findall('object'): # 找到root节点下的所有country节点
bndbox = object.find('bndbox') # 子节点下节点rank的值

# xmin = int(bndbox.find('xmin').text)
# xmax = int(bndbox.find('xmax').text)
# ymin = int(bndbox.find('ymin').text)
# ymax = int(bndbox.find('ymax').text)

new_xmin = new_target[index][0]
new_ymin = new_target[index][1]
new_xmax = new_target[index][2]
new_ymax = new_target[index][3]

xmin = bndbox.find('xmin')
xmin.text = str(new_xmin)
ymin = bndbox.find('ymin')
ymin.text = str(new_ymin)
xmax = bndbox.find('xmax')
xmax.text = str(new_xmax)
ymax = bndbox.find('ymax')
ymax.text = str(new_ymax)

index = index + 1

tree.write(os.path.join(saveroot, str("%06d" % int(id)) + '.xml'))


def mkdir(path):
# 去除首位空格
path = path.strip()
# 去除尾部 \ 符号
path = path.rstrip("/")
# 判断路径是否存在
# 存在 True
# 不存在 False
isExists = os.path.exists(path)
# 判断结果
if not isExists:
# 如果不存在则创建目录
# 创建目录操作函数
os.makedirs(path)
print(path + 'is successfully created!')
return True
else:
# 如果目录存在则不创建,并提示目录已存在
print(path + 'already exits ')
return False


if __name__ == "__main__":

IMG_DIR = "new_data/image"
XML_DIR = "new_data/annotation"

AUG_XML_DIR = "argu_new_data/annotations" # 存储增强后的XML文件夹路径
try:
shutil.rmtree(AUG_XML_DIR)
except FileNotFoundError as e:
a = 1
mkdir(AUG_XML_DIR)

AUG_IMG_DIR = "argu_new_data/images" # 存储增强后的影像文件夹路径
try:
shutil.rmtree(AUG_IMG_DIR)
except FileNotFoundError as e:
a = 1
mkdir(AUG_IMG_DIR)

AUGLOOP = 7 # 每张影像增强的数量

boxes_img_aug_list = []
new_bndbox = []
new_bndbox_list = []

# 影像增强
seq = iaa.Sequential([
iaa.Flipud(0.5), # vertically flip 20% of all images
iaa.Fliplr(0.5), # 镜像
iaa.Multiply((1.2, 1.5)), # change brightness, doesn't affect BBs
iaa.GaussianBlur(sigma=(0, 2.0)), # iaa.GaussianBlur(0.5),
iaa.Affine(
translate_px={"x": 15, "y": 15},
scale=(0.8, 0.95),
rotate=(-30, 30)
) # translate by 40/60px on x/y axis, and scale to 50-70%, affects BBs
])

for root, sub_folders, files in os.walk(IMG_DIR):

for name in files:

bndbox = read_xml_annotation(XML_DIR, name[:-4] + '.xml')
shutil.copy(os.path.join(XML_DIR, name[:-4] + '.xml'), AUG_XML_DIR)
shutil.copy(os.path.join(IMG_DIR, name), AUG_IMG_DIR)

for epoch in range(AUGLOOP):
seq_det = seq.to_deterministic() # 保持坐标和图像同步改变,而不是随机
# 读取图片
img = Image.open(os.path.join(IMG_DIR, name))
# sp = img.size
img = np.asarray(img)
# bndbox 坐标增强
for i in range(len(bndbox)):
bbs = ia.BoundingBoxesOnImage([
ia.BoundingBox(x1=bndbox[i][0], y1=bndbox[i][1], x2=bndbox[i][2], y2=bndbox[i][3]),
], shape=img.shape)

bbs_aug = seq_det.augment_bounding_boxes([bbs])[0]
boxes_img_aug_list.append(bbs_aug)

# new_bndbox_list:[[x1,y1,x2,y2],...[],[]]
n_x1 = int(max(1, min(img.shape[1], bbs_aug.bounding_boxes[0].x1)))
n_y1 = int(max(1, min(img.shape[0], bbs_aug.bounding_boxes[0].y1)))
n_x2 = int(max(1, min(img.shape[1], bbs_aug.bounding_boxes[0].x2)))
n_y2 = int(max(1, min(img.shape[0], bbs_aug.bounding_boxes[0].y2)))
if n_x1 == 1 and n_x1 == n_x2:
n_x2 += 1
if n_y1 == 1 and n_y2 == n_y1:
n_y2 += 1
if n_x1 >= n_x2 or n_y1 >= n_y2:
print('error', name)
new_bndbox_list.append([n_x1, n_y1, n_x2, n_y2])
# 存储变化后的图片
image_aug = seq_det.augment_images([img])[0]
path = os.path.join(AUG_IMG_DIR,
str("%06d" % (len(files) + int(name[:-4]) + epoch * 1000)) + name[-4:])
image_auged = bbs.draw_on_image(image_aug, thickness=0)
Image.fromarray(image_auged).save(path)

# 存储变化后的XML
change_xml_list_annotation(XML_DIR, name[:-4], new_bndbox_list, AUG_XML_DIR,
len(files) + int(name[:-4]) + epoch * 1000)
print(str("%06d" % (len(files) + int(name[:-4]) + epoch * 250)) + '.xml')
new_bndbox_list = []

png_to_jpg.py

from PIL import Image
import os
import shutil
if __name__ == '__main__':
path = './images'
save_path = './jpg_images'
if not os.path.exists(save_path):
os.makedirs(save_path)
files = os.listdir(path)
for name in files:
save_filepath = os.path.join(save_path, name[:-4]+'.jpg')
filepath = os.path.join(path,name)
if name[-4:] == '.png':
img = Image.open(filepath)
img = img.convert('RGB')
img.save(save_filepath, quality=95)
else:
shutil.copy(filepath,save_filepath)

split_train_val.py

# coding:utf-8
import os
import random
import argparse

parser = argparse.ArgumentParser()
#xml文件的地址,根据自己的数据进行修改 xml一般存放在Annotations下
parser.add_argument('--xml_path', default='datasets/annotations', type=str, help='input xml label path')
#数据集的划分,地址选择自己数据下的ImageSets/Main
parser.add_argument('--txt_path', default='datasets/ImageSets/Main', type=str, help='output txt label path')
opt = parser.parse_args()

trainval_percent = 1.0
train_percent = 0.9
xmlfilepath = opt.xml_path
txtsavepath = opt.txt_path
total_xml = os.listdir(xmlfilepath)
if not os.path.exists(txtsavepath):
os.makedirs(txtsavepath)

num = len(total_xml)
list_index = range(num)
tv = int(num * trainval_percent)
tr = int(tv * train_percent)
trainval = random.sample(list_index, tv)
train = random.sample(trainval, tr)

file_trainval = open(txtsavepath + '/trainval.txt', 'w')
file_test = open(txtsavepath + '/test.txt', 'w')
file_train = open(txtsavepath + '/train.txt', 'w')
file_val = open(txtsavepath + '/val.txt', 'w')

for i in list_index:
name = total_xml[i][:-4] + '\n'
if i in trainval:
file_trainval.write(name)
if i in train:
file_train.write(name)
else:
file_val.write(name)
else:
file_test.write(name)

file_trainval.close()
file_train.close()
file_val.close()
file_test.close()

voc_label.py

# -*- coding: utf-8 -*-

import xml.etree.ElementTree as ET
import os
from os import getcwd
# 运行目录为yolov5-master,把xml转为txt,配合split_train_val.py划分训练集和验证集,需要数据全为jpg
# 少用全局路径,会产生转义字符
sets = ['train', 'val', 'test']
classes = ['ad', 'ad1', 'ad2', 'bskl', 'dp', 'dp1', 'hn', 'hsfk', 'jdb', 'jdb1', 'lsfk', 'mf', 'mf1', 'qdpj', 'wlj', 'xb', 'xhpj', 'xhpj1', 'xhpj2']
#abs_path = os.getcwd()
#print(abs_path)

def convert(size, box):
dw = 1. / (size[0])
dh = 1. / (size[1])
x = (box[0] + box[1]) / 2.0 - 1
y = (box[2] + box[3]) / 2.0 - 1
w = box[1] - box[0]
h = box[3] - box[2]
x = x * dw
w = w * dw
y = y * dh
h = h * dh
return x, y, w, h

def convert_annotation(image_id):
in_file = open('datasets/annotations/%s.xml' % (image_id), encoding='UTF-8')
out_file = open('datasets/labels/%s.txt' % (image_id), 'w')
tree = ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
#difficult = obj.find('Difficult').text
difficult = 0
cls = obj.find('name').text
if cls not in classes or int(difficult) == 1:
print(image_id," wrong class name:"+cls)
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
float(xmlbox.find('ymax').text))
b1, b2, b3, b4 = b
# 标注越界修正
if b2 > w:
b2 = w
if b4 > h:
b4 = h
b = (b1, b2, b3, b4)
bb = convert((w, h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')

wd = getcwd()
for image_set in sets:
if not os.path.exists('datasets/labels/'):
os.makedirs('datasets/labels/')
image_ids = open('datasets/ImageSets/Main/%s.txt' % (image_set)).read().strip().split()
list_file = open('datasets/%s.txt' % (image_set), 'w')
for image_id in image_ids:
#list_file.write(abs_path + '/datasets/images/%s.jpg\n' % (image_id))
list_file.write('datasets/images/%s.jpg\n' % (image_id))
#convert_annotation(image_id)
list_file.close()

 

标签:yolov5,img,os,find,bndbox,new,path,数据,预处理
From: https://www.cnblogs.com/kn-zheng/p/17072507.html

相关文章

  • 数据库设计
    数据库设计什么:有哪些表表里有哪些字段表和表之间有哪些关系表关系有哪几种一对一一对多(多对一)多对多   ......
  • 【奇妙的数据结构世界】用图像和代码对堆栈的使用进行透彻学习 | C++
    第十章堆栈:::hljs-center目录第十章堆栈●前言●一、堆栈是什么?1.简要介绍●二、堆栈操作的关键代码段1.类型定义2.顺序栈的常用操作3.链式栈的常用......
  • 利用递归函数遍历数据
    vardata=[{id:1,name:'家电',goods:[{id:11,......
  • mysql8.0 --mysqldump数据备份
     mysqldump数据,可以把用户名写在配置文件的[mysqldump]中但是这里只能写一个用户名和密码,如果一个数据库里有多个用户和数据库怎么分开备份。1、创建一个备份的用户test......
  • 常见的6个Python数据可视化库!
    提到数据可视化库,相信大家对这个都不陌生,而且Python中内置了很多数据可视化库,是我们工作的好帮手。本文为大家介绍一下常见的6个Python数据可视化库,希望对你们有帮助。......
  • 数据访问层服务自动注册类封装和使用源码-AutoFac
    项目使用三层结构RepositoryIocFactoryusingSystem;usingSystem.Reflection;usingAutofac;namespaceCommonHelper.AutoInject.Repository{publicclassRe......
  • oi中如何一次造多组数据
    #include<bits/stdc++.h>usingnamespacestd;voidwork(FILE*fp,intt)//t表示这是第几组数据{ fprintf(fp,"%d",rand());//用这个输出你想造的数据即可 ......
  • yolov5检测框重合重复,手动调参方法(调整detect,val的conf,iou)
    一、问题描述:检测框重复出现上述问题一般是整体检测方向没错,但conf-thres和iou-thres的参数需要调整。(在默认值0.25和0.45的基础上,提高置信区间,降低iou)conf-thres:置信......
  • yolov5 优化——mosaic相关
    概述Mosaic利用了四张图片重新排布成一张图片,根据论文所说其拥有一个巨大的优点是丰富检测物体的背景:随机缩放增加了很多小目标,让网络的鲁棒性更好;且在BN计算的时候一下子......
  • yolov5 提速多GPU训练显存低的问题
    修改前:按照配置,在train.py配置如下:运行pythontrain.py后nvidia-smi显示显存占用如下:修改后参考yolov5官方中的issue中,有人提到的分布式多进程的方法:在yolov5运行......