http://www.icodebang.com/article/355859.html
highlight: github
theme: github
这篇博客记录了我处理自标注的目标检测数据集的过程,由于数据集中小目标占比较大,处理的目标是希望将数据集中图片切割成小块。过程相对比较繁琐,因此在此记录,以便有同样需求的同学参考,也方便自己回顾。有任何问题或者有更好的方法,也希望在评论区指出,共同进步。
labelme转voc
这个过程网上有很多的代码可供参考,我使用下面代码作为转换。labelme标注的结果应该是一个文件夹里面既有图片,也有同名的txt文件提供标签信息。
Voc格式的数据遵循以下目录
VOC_ROOT #根目录\ | |
├── JPEGImages # 存放源图片\ | |
│ ├── aaaa.jpg\ | |
│ ├── bbbb.jpg\ | |
│ └── cccc.jpg\ | |
├── Annotations # 存放[xml]文件,与JPEGImages中的图片一一对应,解释图片的内容\ | |
│ ├── aaaa.xml\ | |
│ ├── bbbb.xml\ | |
│ └── cccc.xml\ | |
└── ImageSets\ | |
└── Main\ | |
├── train.txt # txt文件中每一行包含一个图片的名称\ | |
└── val.txt |
下面是转换的代码labelme2voc.py
import os | |
from typing import List, Any | |
import numpy as np | |
import codecs | |
import json | |
from glob import glob | |
import cv2 | |
import shutil | |
from sklearn.model_selection import train_test_split | |
# 1.标签路径 | |
labelme_imgpath = r"" # 原始labelme数据图片路径 | |
labelme_annorpath = r"" #labelme数据标签路径(txt) | |
saved_path = r"" # 保存路径 | |
isUseTest = True # 是否创建test集 | |
# 2.创建要求文件夹 | |
if not os.path.exists(saved_path + "Annotations"): | |
os.makedirs(saved_path + "Annotations") | |
if not os.path.exists(saved_path + "JPEGImages/"): | |
os.makedirs(saved_path + "JPEGImages/") | |
if not os.path.exists(saved_path + "ImageSets/Main/"): | |
os.makedirs(saved_path + "ImageSets/Main/") | |
# 3.获取待处理文件 | |
files = glob(labelme_annorpath+ "*.json") | |
files = [i.replace("\", "/").split("/")[-1].split(".json")[0] for i in files] | |
#print(files) | |
# 4.读取标注信息并写入xml | |
for json_file_ in files: | |
json_filename = labelme_annorpath + json_file_ + ".json" | |
json_file = json.load(open(json_filename, "r", encoding="utf-8")) | |
height, width, channels = cv2.imread(labelme_imgpath + json_file_ + ".jpg").shape | |
with codecs.open(saved_path + "Annotations/" + json_file_ + ".xml", "w", "utf-8") as xml: | |
xml.write('<annotation>\n') | |
xml.write('\t<folder>' + 'WH_data' + '</folder>\n') | |
xml.write('\t<filename>' + json_file_ + ".jpg" + '</filename>\n') | |
xml.write('\t<source>\n') | |
xml.write('\t\t<database>WH Data</database>\n') | |
xml.write('\t\t<annotation>WH</annotation>\n') | |
xml.write('\t\t<image>flickr</image>\n') | |
xml.write('\t\t<flickrid>NULL</flickrid>\n') | |
xml.write('\t</source>\n') | |
xml.write('\t<owner>\n') | |
xml.write('\t\t<flickrid>NULL</flickrid>\n') | |
xml.write('\t\t<name>WH</name>\n') | |
xml.write('\t</owner>\n') | |
xml.write('\t<size>\n') | |
xml.write('\t\t<width>' + str(width) + '</width>\n') | |
xml.write('\t\t<height>' + str(height) + '</height>\n') | |
xml.write('\t\t<depth>' + str(channels) + '</depth>\n') | |
xml.write('\t</size>\n') | |
xml.write('\t\t<segmented>0</segmented>\n') | |
for multi in json_file["shapes"]: | |
points = np.array(multi["points"]) | |
labelName = multi["label"] | |
xmin = min(points[:, 0]) | |
xmax = max(points[:, 0]) | |
ymin = min(points[:, 1]) | |
ymax = max(points[:, 1]) | |
label = multi["label"] | |
if xmax <= xmin: | |
pass | |
elif ymax <= ymin: | |
pass | |
else: | |
xml.write('\t<object>\n') | |
xml.write('\t\t<name>' + labelName + '</name>\n') | |
xml.write('\t\t<pose>Unspecified</pose>\n') | |
xml.write('\t\t<truncated>1</truncated>\n') | |
xml.write('\t\t<difficult>0</difficult>\n') | |
xml.write('\t\t<bndbox>\n') | |
xml.write('\t\t\t<xmin>' + str(int(xmin)) + '</xmin>\n') | |
xml.write('\t\t\t<ymin>' + str(int(ymin)) + '</ymin>\n') | |
xml.write('\t\t\t<xmax>' + str(int(xmax)) + '</xmax>\n') | |
xml.write('\t\t\t<ymax>' + str(int(ymax)) + '</ymax>\n') | |
xml.write('\t\t</bndbox>\n') | |
xml.write('\t</object>\n') | |
print(json_filename, xmin, ymin, xmax, ymax, label) | |
xml.write('</annotation>') | |
# 5.复制图片到 VOC2007/JPEGImages/下 | |
image_files = glob(labelme_imgpath + "*.jpg") | |
print("copy image files to VOC007/JPEGImages/") | |
for image in image_files: | |
shutil.copy(image, saved_path + "JPEGImages/") | |
# 6.split files for txt | |
txtsavepath = saved_path + "ImageSets/Main/" | |
ftrainval = open(txtsavepath + '/trainval.txt', 'w') | |
ftest = open(txtsavepath + '/test.txt', 'w') | |
ftrain = open(txtsavepath + '/train.txt', 'w') | |
fval = open(txtsavepath + '/val.txt', 'w') | |
total_files = glob("D:/DATASET_for_CNN/labelme_data_new/VOC2007/Annotations/*.xml") | |
total_files = [i.replace("\", "/").split("/")[-1].split(".xml")[0] for i in total_files] | |
trainval_files = [] | |
test_files = [] | |
if isUseTest: | |
trainval_files, test_files = train_test_split(total_files, test_size=0.15, random_state=55) | |
else: | |
trainval_files = total_files | |
for file in trainval_files: | |
ftrainval.write(file + "\n") | |
# split | |
train_files, val_files = train_test_split(trainval_files, test_size=0.15, random_state=55) | |
# train | |
for file in train_files: | |
ftrain.write(file + "\n") | |
# val | |
for file in val_files: | |
fval.write(file + "\n") | |
for file in test_files: | |
print(file) | |
ftest.write(file + "\n") | |
ftrainval.close() | |
ftrain.close() | |
fval.close() | |
ftest.close() |
voc格式数据集去除不需要的label
我的数据集原本标注的label类共10类,但我在实际使用中只需要使用其中的4类来训练,因此需要把剩下不需要的类别的图片和标注统统删除掉。因为数据集已经转换成了voc格式,在删除的时候只需要遍历xml文件夹,解析xml文件,当里面出现了不需要的类别的obj的时候,就把这个xml连同对应的图片一并删除
我这么做是因为在我的数据集中,不需要的6类本身占比就非常少,因此对于那些混杂着需要目标和不需要目标的图片,我也一并删掉了,并不会对数据集本身的图片数量造成严重影响。
下面是我处理的代码voc_purification.py
,值得注意的是,因为我的voc格式数据中ImageSets\Main\
文件夹下有trainval.txt、train.txt、val.txt、test.txt
四个文件,也就是四个划分,分别是训练验证集、训练集、验证集、测试集,所以在代码中我连续四次检查txt文件中是否有需要删除的行。
import glob | |
import xml.etree.ElementTree as ET | |
import os | |
# import xml.dom.minidom | |
# 类名 把要删除的类名称放进去 | |
delete_labels = ['a', 'b', 'c', 'd', 'e', 'f'] | |
# xml路径 | |
path = r'your/annotation/path' #存放xml文件的文件夹 | |
img_path = r'your/image/path' #存放图片的文件夹 | |
for xml_file in glob.glob(path + '/*.xml'): | |
# 获取文件名(不带后缀) | |
filename = os.path.basename(xml_file)[:-4] | |
# 返回解析树 | |
tree = ET.parse(xml_file) | |
# 获取根节点 | |
root = tree.getroot() | |
# 对所有目标进行解析 | |
for member in root.findall('object'): | |
# 获取object标签内的name | |
objectname = member.find('name').text | |
if objectname in delete_labels: | |
# print(objectname) | |
os.remove(os.path.join(img_path, filename + '.jpg')) | |
print('remove img:' + filename + '.jpg' + '\n') | |
with open(r"your/trainval.txt/path", 'r') as file: | |
lines = file.readlines() | |
with open(r"your/trainval.txt/path", 'w') as file: | |
for line in lines: | |
if line.strip("\n") != filename: | |
file.write(line) | |
with open(r"your/train.txt/path", 'r') as file: | |
lines = file.readlines() | |
with open(r"your/train.txt/path", 'w') as file: | |
for line in lines: | |
if line.strip("\n") != filename: | |
file.write(line) | |
with open(r"your/val.txt/path", 'r') as file: | |
lines = file.readlines() | |
with open(r"your/val.txt/path", 'w') as file: | |
for line in lines: | |
if line.strip("\n") != filename: | |
file.write(line) | |
with open(r"your/test.txt/path", 'r') as file: | |
lines = file.readlines() | |
with open(r"your/test.txt/path", 'w') as file: | |
for line in lines: | |
if line.strip("\n") != filename: | |
file.write(line) | |
print('remove txt file:' + filename + '.jpg' + '\n') | |
os.remove(os.path.join(path, filename + '.xml')) | |
print('remove xml:' + filename + '.jpg' + '\n') | |
break |
voc转coco格式
之前之所以先转成voc格式,就是因为voc格式中一张图片对应一个xml文件的方式对于删掉不需要的图片比较方便,但在实际使用中,还是coco格式用的比较多,因此我再把他转成coco格式。
这部分内容网上有很多教程可以参考,我贴出来一个以供参考。
voc2coco_from_txt
import shutil | |
import xml.etree.ElementTree as ET | |
import os | |
import json | |
coco = dict() | |
coco['images'] = [] | |
coco['type'] = 'instances' | |
coco['annotations'] = [] | |
coco['categories'] = [] | |
category_set = dict() | |
image_set = set() | |
# 注意具体应用中,类别索引是从0开始,还是从1开始。 | |
# 若从1开始(包含背景的情况)下一句代码需改成category_item_id = 0 | |
category_item_id = -1 | |
image_id = 20180000000 | |
annotation_id = 0 | |
def addCatItem(name): | |
global category_item_id | |
category_item = dict() | |
category_item['supercategory'] = 'none' | |
category_item_id += 1 | |
category_item['id'] = category_item_id | |
category_item['name'] = name | |
coco['categories'].append(category_item) | |
category_set[name] = category_item_id | |
return category_item_id | |
def addImgItem(file_name, size): | |
global image_id | |
if file_name is None: | |
raise Exception('Could not find filename tag in xml file.') | |
if size['width'] is None: | |
raise Exception('Could not find width tag in xml file.') | |
if size['height'] is None: | |
raise Exception('Could not find height tag in xml file.') | |
image_id += 1 | |
image_item = dict() | |
image_item['id'] = image_id | |
image_item['file_name'] = file_name | |
image_item['width'] = size['width'] | |
image_item['height'] = size['height'] | |
coco['images'].append(image_item) | |
image_set.add(file_name) | |
return image_id | |
def addAnnoItem(object_name, image_id, category_id, bbox): | |
global annotation_id | |
annotation_item = dict() | |
annotation_item['segmentation'] = [] | |
seg = [] | |
# bbox[] is x,y,w,h | |
# left_top | |
seg.append(bbox[0]) | |
seg.append(bbox[1]) | |
# left_bottom | |
seg.append(bbox[0]) | |
seg.append(bbox[1] + bbox[3]) | |
# right_bottom | |
seg.append(bbox[0] + bbox[2]) | |
seg.append(bbox[1] + bbox[3]) | |
# right_top | |
seg.append(bbox[0] + bbox[2]) | |
seg.append(bbox[1]) | |
annotation_item['segmentation'].append(seg) | |
annotation_item['area'] = bbox[2] * bbox[3] | |
annotation_item['iscrowd'] = 0 | |
annotation_item['ignore'] = 0 | |
annotation_item['image_id'] = image_id | |
annotation_item['bbox'] = bbox | |
annotation_item['category_id'] = category_id | |
annotation_id += 1 | |
annotation_item['id'] = annotation_id | |
coco['annotations'].append(annotation_item) | |
def _read_image_ids(image_sets_file): | |
ids = [] | |
with open(image_sets_file) as f: | |
for line in f: | |
ids.append(line.rstrip()) | |
return ids | |
"""通过txt文件生成""" | |
# split ='train' 'val' 'trainval' 'test' | |
def parseXmlFiles_by_txt(data_dir, json_save_path, split='train'): | |
print("hello") | |
labelfile = split + ".txt" | |
image_sets_file = data_dir + "/ImageSets/Main/" + labelfile | |
ids = _read_image_ids(image_sets_file) | |
for _id in ids: | |
image_file = data_dir + f"/JPEGImages/{_id}.jpg" | |
shutil.copy(image_file, fr"E:\DataSets\labelme_new\COCO_cls_4\val{_id}.jpg") | |
xml_file = data_dir + f"/Annotations/{_id}.xml" | |
bndbox = dict() | |
size = dict() | |
current_image_id = None | |
current_category_id = None | |
file_name = None | |
size['width'] = None | |
size['height'] = None | |
size['depth'] = None | |
tree = ET.parse(xml_file) | |
root = tree.getroot() | |
if root.tag != 'annotation': | |
raise Exception('pascal voc xml root element should be annotation, rather than {}'.format(root.tag)) | |
# elem is <folder>, <filename>, <size>, <object> | |
for elem in root: | |
current_parent = elem.tag | |
current_sub = None | |
object_name = None | |
if elem.tag == 'folder': | |
continue | |
if elem.tag == 'filename': | |
# 若xml文件名和文件里'filename'标签的内容不一致,而xml文件名是正确的, | |
# 即,(标注错误),则用xml文件名赋给file_name,即,下面一句代码换成file_name = _id + '.jpg' | |
file_name = elem.text | |
if file_name in category_set: | |
raise Exception('file_name duplicated') | |
# add img item only after parse <size> tag | |
elif current_image_id is None and file_name is not None and size['width'] is not None: | |
if file_name not in image_set: | |
current_image_id = addImgItem(file_name, size) | |
print('add image with {} and {}'.format(file_name, size)) | |
else: | |
raise Exception('duplicated image: {}'.format(file_name)) | |
# subelem is <width>, <height>, <depth>, <name>, <bndbox> | |
for subelem in elem: | |
bndbox['xmin'] = None | |
bndbox['xmax'] = None | |
bndbox['ymin'] = None | |
bndbox['ymax'] = None | |
current_sub = subelem.tag | |
if current_parent == 'object' and subelem.tag == 'name': | |
object_name = subelem.text | |
if object_name not in category_set: | |
current_category_id = addCatItem(object_name) | |
else: | |
current_category_id = category_set[object_name] | |
elif current_parent == 'size': | |
if size[subelem.tag] is not None: | |
raise Exception('xml structure broken at size tag.') | |
size[subelem.tag] = int(subelem.text) | |
# option is <xmin>, <ymin>, <xmax>, <ymax>, when subelem is <bndbox> | |
for option in subelem: | |
if current_sub == 'bndbox': | |
if bndbox[option.tag] is not None: | |
raise Exception('xml structure corrupted at bndbox tag.') | |
bndbox[option.tag] = int(option.text) | |
# only after parse the <object> tag | |
if bndbox['xmin'] is not None: | |
if object_name is None: | |
raise Exception('xml structure broken at bndbox tag') | |
if current_image_id is None: | |
raise Exception('xml structure broken at bndbox tag') | |
if current_category_id is None: | |
raise Exception('xml structure broken at bndbox tag') | |
bbox = [] | |
# x | |
bbox.append(bndbox['xmin']) | |
# y | |
bbox.append(bndbox['ymin']) | |
# w | |
bbox.append(bndbox['xmax'] - bndbox['xmin']) | |
# h | |
bbox.append(bndbox['ymax'] - bndbox['ymin']) | |
print('add annotation with {},{},{},{}'.format(object_name, current_image_id, current_category_id, | |
bbox)) | |
addAnnoItem(object_name, current_image_id, current_category_id, bbox) | |
json.dump(coco, open(json_save_path, 'w')) | |
"""直接从xml文件夹中生成""" | |
def parseXmlFiles(xml_path, json_save_path): | |
for f in os.listdir(xml_path): | |
if not f.endswith('.xml'): | |
continue | |
bndbox = dict() | |
size = dict() | |
current_image_id = None | |
current_category_id = None | |
file_name = None | |
size['width'] = None | |
size['height'] = None | |
size['depth'] = None | |
xml_file = os.path.join(xml_path, f) | |
print(xml_file) | |
tree = ET.parse(xml_file) | |
root = tree.getroot() | |
if root.tag != 'annotation': | |
raise Exception('pascal voc xml root element should be annotation, rather than {}'.format(root.tag)) | |
# elem is <folder>, <filename>, <size>, <object> | |
for elem in root: | |
current_parent = elem.tag | |
current_sub = None | |
object_name = None | |
if elem.tag == 'folder': | |
continue | |
if elem.tag == 'filename': | |
file_name = elem.text | |
if file_name in category_set: | |
raise Exception('file_name duplicated') | |
# add img item only after parse <size> tag | |
elif current_image_id is None and file_name is not None and size['width'] is not None: | |
if file_name not in image_set: | |
current_image_id = addImgItem(file_name, size) | |
print('add image with {} and {}'.format(file_name, size)) | |
else: | |
raise Exception('duplicated image: {}'.format(file_name)) | |
# subelem is <width>, <height>, <depth>, <name>, <bndbox> | |
for subelem in elem: | |
bndbox['xmin'] = None | |
bndbox['xmax'] = None | |
bndbox['ymin'] = None | |
bndbox['ymax'] = None | |
current_sub = subelem.tag | |
if current_parent == 'object' and subelem.tag == 'name': | |
object_name = subelem.text | |
if object_name not in category_set: | |
current_category_id = addCatItem(object_name) | |
else: | |
current_category_id = category_set[object_name] | |
elif current_parent == 'size': | |
if size[subelem.tag] is not None: | |
raise Exception('xml structure broken at size tag.') | |
size[subelem.tag] = int(subelem.text) | |
# option is <xmin>, <ymin>, <xmax>, <ymax>, when subelem is <bndbox> | |
for option in subelem: | |
if current_sub == 'bndbox': | |
if bndbox[option.tag] is not None: | |
raise Exception('xml structure corrupted at bndbox tag.') | |
bndbox[option.tag] = int(option.text) | |
# only after parse the <object> tag | |
if bndbox['xmin'] is not None: | |
if object_name is None: | |
raise Exception('xml structure broken at bndbox tag') | |
if current_image_id is None: | |
raise Exception('xml structure broken at bndbox tag') | |
if current_category_id is None: | |
raise Exception('xml structure broken at bndbox tag') | |
bbox = [] | |
# x | |
bbox.append(bndbox['xmin']) | |
# y | |
bbox.append(bndbox['ymin']) | |
# w | |
bbox.append(bndbox['xmax'] - bndbox['xmin']) | |
# h | |
bbox.append(bndbox['ymax'] - bndbox['ymin']) | |
print('add annotation with {},{},{},{}'.format(object_name, current_image_id, current_category_id, | |
bbox)) | |
addAnnoItem(object_name, current_image_id, current_category_id, bbox) | |
json.dump(coco, open(json_save_path, 'w')) | |
if __name__ == '__main__': | |
# 通过txt文件生成 | |
voc_data_dir = r"E:\DataSets\labelme_new\VOC2007" # 整个数据集文件夹所在路径 | |
json_save_path = r"E:\DataSets\labelme_new\COCO_cls_4\annotations\val.json" # 生成后的文件存放路径和生成文件的名字 | |
parseXmlFiles_by_txt(voc_data_dir, json_save_path, "test") | |
# 通过文件夹生成 | |
# ann_path = "E:/VOCdevkit/VOC2007/Annotations" | |
# json_save_path = "E:/VOCdevkit/test.json" | |
# parseXmlFiles(ann_path, json_save_path) |
COCO格式数据集切图
由于我的数据集图片中目标都比较小,采用切图训练的方式进行(一般当原始数据集全部有标注框的图片中,有1/2以上的图片标注框的平均宽高与原图宽高比例小于0.04时,建议进行切图训练),本节代码来自PaddleDetection官方GitHub仓库。
统计自己的数据集信息
先统计自己的数据集信息,看看是否需要切图训练
可以用下面代码box_distribution.py
,使用过程在命令行输入
python box_distribution.py --json_path ../../dataset/annotations/train.json --out_img box_distribution.jpg |
其中--json_path
加载coco格式的json文件路径,--out_img
输出统计分布图路径
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
import matplotlib.pyplot as plt | |
import json | |
import numpy as np | |
import argparse | |
def median(data): | |
data.sort() | |
mid = len(data) // 2 | |
median = (data[mid] + data[~mid]) / 2 | |
return median | |
def draw_distribution(width, height, out_path): | |
w_bins = int((max(width) - min(width)) // 10) | |
h_bins = int((max(height) - min(height)) // 10) | |
plt.figure() | |
plt.subplot(221) | |
plt.hist(width, bins=w_bins, color='green') | |
plt.xlabel('Width rate *1000') | |
plt.ylabel('number') | |
plt.title('Distribution of Width') | |
plt.subplot(222) | |
plt.hist(height, bins=h_bins, color='blue') | |
plt.xlabel('Height rate *1000') | |
plt.title('Distribution of Height') | |
plt.savefig(out_path) | |
print(f'Distribution saved as {out_path}') | |
plt.show() | |
def get_ratio_infos(jsonfile, out_img): | |
allannjson = json.load(open(jsonfile, 'r')) | |
be_im_id = 1 | |
be_im_w = [] | |
be_im_h = [] | |
ratio_w = [] | |
ratio_h = [] | |
images = allannjson['images'] | |
for i, ann in enumerate(allannjson['annotations']): | |
if ann['iscrowd']: | |
continue | |
x0, y0, w, h = ann['bbox'][:] | |
if be_im_id == ann['image_id']: | |
be_im_w.append(w) | |
be_im_h.append(h) | |
else: | |
im_w = images[be_im_id - 1]['width'] | |
im_h = images[be_im_id - 1]['height'] | |
im_m_w = np.mean(be_im_w) | |
im_m_h = np.mean(be_im_h) | |
dis_w = im_m_w / im_w | |
dis_h = im_m_h / im_h | |
ratio_w.append(dis_w) | |
ratio_h.append(dis_h) | |
be_im_id = ann['image_id'] | |
be_im_w = [w] | |
be_im_h = [h] | |
im_w = images[be_im_id - 1]['width'] | |
im_h = images[be_im_id - 1]['height'] | |
im_m_w = np.mean(be_im_w) | |
im_m_h = np.mean(be_im_h) | |
dis_w = im_m_w / im_w | |
dis_h = im_m_h / im_h | |
ratio_w.append(dis_w) | |
ratio_h.append(dis_h) | |
mid_w = median(ratio_w) | |
mid_h = median(ratio_h) | |
ratio_w = [i * 1000 for i in ratio_w] | |
ratio_h = [i * 1000 for i in ratio_h] | |
print(f'Median of ratio_w is {mid_w}') | |
print(f'Median of ratio_h is {mid_h}') | |
print('all_img with box: ', len(ratio_h)) | |
print('all_ann: ', len(allannjson['annotations'])) | |
draw_distribution(ratio_w, ratio_h, out_img) | |
def main(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
'--json_path', type=str, default=None, help="Dataset json path.") | |
parser.add_argument( | |
'--out_img', | |
type=str, | |
default='box_distribution.jpg', | |
help="Name of distibution img.") | |
args = parser.parse_args() | |
get_ratio_infos(args.json_path, args.out_img) | |
if __name__ == "__main__": | |
main() |
切图
如果统计结果中,有1/2以上的图片标注框的平均宽高与原图宽高比例小于0.04,如下输出信息,则考虑使用切图方式训练,能够比较有效地提高小目标的检测精度。
Median of ratio_w is 0.03799439775910364 | |
Median of ratio_h is 0.04074914637387802 | |
all_img with box: 1409 | |
all_ann: 98905 | |
Distribution saved as box_distribution.jpg |
切图的代码同样来自PaddleDetection官方Github仓库
slice_image.py
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
import argparse | |
from tqdm import tqdm | |
def slice_data(image_dir, dataset_json_path, output_dir, slice_size, | |
overlap_ratio): | |
try: | |
from sahi.scripts.slice_coco import slice | |
except Exception as e: | |
raise RuntimeError( | |
'Unable to use sahi to slice images, please install sahi, for example: `pip install sahi`, see https://github.com/obss/sahi' | |
) | |
tqdm.write( | |
f" slicing for slice_size={slice_size}, overlap_ratio={overlap_ratio}") | |
slice( | |
image_dir=image_dir, | |
dataset_json_path=dataset_json_path, | |
output_dir=output_dir, | |
slice_size=slice_size, | |
overlap_ratio=overlap_ratio, ) | |
def main(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
'--image_dir', type=str, default=None, help="The image folder path.") | |
parser.add_argument( | |
'--json_path', type=str, default=None, help="Dataset json path.") | |
parser.add_argument( | |
'--output_dir', type=str, default=None, help="Output dir.") | |
parser.add_argument( | |
'--slice_size', type=int, default=500, help="slice_size") | |
parser.add_argument( | |
'--overlap_ratio', type=float, default=0.25, help="overlap_ratio") | |
args = parser.parse_args() | |
slice_data(args.image_dir, args.json_path, args.output_dir, args.slice_size, | |
args.overlap_ratio) | |
if __name__ == "__main__": | |
main() |
删除无目标的背景图
切图之后的数据集,文件夹里面存在大量的无目标标注框的图片,即原图中的背景部分。如果直接丢进去训练有可能造成正负样本不均衡的问题,从而影响精度。因此要把这部分图片删除掉。因为数据集是coco格式的,所以删的时候既要删掉图片,也要把json文件中对应的信息删除掉,具体实现参考下面代码。
coco_del_bg.py
import json | |
import os | |
class CocoDataDeleteBackground: | |
def __init__(self, imgPath, jsonPath): | |
self.imgPath = imgPath | |
self.jsonPath = jsonPath | |
def delete_background(self): | |
with open(self.jsonPath, 'r+') as f: | |
annotation_json = json.load(f) | |
# 查询所有那些有标注框的图片id | |
all_img_id = [] | |
for anno in annotation_json['annotations']: | |
img_id = anno['image_id'] # 获取当前目标所在的图片id | |
all_img_id.append(img_id) | |
all_img_id = list(set(all_img_id)) # id去重 | |
all_imgs_to_del = [] | |
# 遍历images对应的list,删掉其中id不在all_img_id中的项,以及对应的图片 | |
for i in range(len(annotation_json['images'][::])): | |
image_name = annotation_json['images'][i]['file_name'] # 读取图片名 | |
img_id = annotation_json['images'][i]['id'] # 读取图片id | |
if img_id not in all_img_id: | |
all_imgs_to_del.append(i) | |
os.remove(os.path.join(self.imgPath, image_name)) | |
print(image_name + 'has been removed!') | |
all_imgs_to_del = sorted(all_imgs_to_del, reverse=True) | |
for i in all_imgs_to_del: | |
del annotation_json['images'][i] | |
f.seek(0) | |
f.truncate() # json清空 | |
f.write(json.dumps(annotation_json)) # json重写 | |
if __name__ == '__main__': | |
# the first param is the directory's path of images | |
# the second param is the path of json file | |
d = CocoDataDeleteBackground(r"your\image\path", | |
r"your\json\path") | |
# run the delete function | |
d.delete_background() | |
print('done!') |