import os import random import sys import cv2 import numpy as np from PIL import Image import sys_utils import nn_utils from sys_utils import _single_instance_logger as logger import copy import torch import torch.nn as nn class VOCDataSet: def __init__(self, augment, image_size, root): self.augment = augment self.image_size = image_size self.root = root self.label_map = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] self.all_labled_annotations = [] self.border_fill_value = 114 cache_name = sys_utils.get_md5(root) self.cache_and_build(f"runs/dataset_cache/{cache_name}temp.cache") def cache_and_build(self, cache_file): if os.path.exists(cache_file): logger.info(f"Load form cache {cache_file}") self.load_labeled_information_from_cache(cache_file) else: logger.info(f"Build and cache {cache_file}") self.build_labeled_information_cache(cache_file) def load_labeled_information_from_cache(self, cache_file): self.all_labled_annotations = torch.load(cache_file) def build_labeled_information_cache(self, cache_file): """ 1.实现数据检查校验和缓存 """ annotations_files = os.listdir(os.path.join(self.root, "Annotations")) # 保留后缀为xml的 annotations_files = list(filter(lambda x: x.endswith(".xml"), annotations_files)) # 修改xml为jpg jpeg_files = [item[:-3] + "jpg" for item in annotations_files] # 添加全路径 annotations_files = list(map(lambda x: os.path.join(self.root, "Annotations", x), annotations_files)) jpeg_files = list(map(lambda x: os.path.join(self.root, "JPEGImages", x), jpeg_files)) for annotation_file, jpeg_file in zip(annotations_files, jpeg_files): # 数据检查1.图片是否损坏,如果损坏抛出异常2.图片是否过小,如果小抛出异常 # 加载标注信息,并保存起来,标注信息normalize过的 pil_image = Image.open(jpeg_file) pil_image.verify() image_width, image_height = sys_utils.exif_size(pil_image) assert image_width > 9 and image_height > 9, f"Image size is too small{image_width} x {image_height}" # 加载标注信息[cx,cy,width,height,class_index] pixel_annotation = self.load_voc_annotation(annotation_file, self.label_map) # 转换标注信息到normalize格式 normalize_annotations = self.convert_to_normalize_annotation(pixel_annotation, image_width, image_height) self.all_labled_annotations.append([jpeg_file, normalize_annotations, [image_width, image_height]]) sys_utils.mkparents(cache_file) torch.save(self.all_labled_annotations, cache_file) def convert_to_normalize_annotation(self, pixel_annotations, image_width, image_height): """ 转换标注信息到normalize格式,除以图像宽高进行归一化 参数: pixel_annotations[Nx5]: 指定为标注信息,格式是[left, top, right, bottom, class_index] image_width[int]: 指定为标注信息的图像宽度 image_height[int]: 指定为标注信息的图像高度 返回值: normalize_annotations[Nx5]: 返回格式是[cx, cy, width, height, class_index] """ normalize_annotations = pixel_annotations.copy() left, top, right, bottom = [pixel_annotations[:, i] for i in range(4)] normalize_annotations[:, 0] = (left + right) * 0.5 normalize_annotations[:, 1] = (top + bottom) * 0.5 normalize_annotations[:, 2] = right - left + 1 normalize_annotations[:, 3] = bottom - top + 1 normalize_annotations /= [image_width, image_height, image_width, image_height, 1] return normalize_annotations def len(self): return 0 def __getitem__(self, image_index): return None def load_mosaic(self, image_index): """ 把指定图像和随机3个图像拼为一个2*image_size大小的大图 :param image_index:指定图片索引 :return:image大小为2*image_size,和指定框的normalize_annotation """ # 选取0.5image_size-1.5image_size的随机中心点 x_center = int(random.uniform(self.image_size * 0.5, self.image_size * 1.5)) y_center = int(random.uniform(self.image_size * 0.5, self.image_size * 1.5)) # 随机选取三张图片索引 image_len = len(self.all_labled_annotations) all_image_index = [image_index] + [random.randint(0, image_len - 1) for _ in range(3)] # 并且需要把对应四张图片分别移动到随机中心 # img1, img2 # img3, img4 alignment_corner_point = [ [1, 1], # img1的角点相对于其宽高尺寸的位置 [0, 1], # img2的角点相对于其宽高尺寸的位置 [1, 0], # img3的角点相对于其宽高尺寸的位置 [0, 0] # img4的角点相对于其宽高尺寸的位置 ] merge_mosaic_image_size = self.image_size * 2 # np.full,如果填充的是tuple,会造成性能严重影响。因为图片为int所以需要给int merge_mosaic_image = np.full((merge_mosaic_image_size, merge_mosaic_image_size, 3), self.border_fill_value, dtype=np.uint8) merge_mosaic_pixel_annotations = [] for index, (image_index, (corner_point_x, corner_point_y)) in enumerate( zip(all_image_index, alignment_corner_point)): image, normalize_annotations, (image_width, image_height) = self.load_image_with_uniform_scale(image_index) # 拼接前绘制,用来排查bug nn_utils.draw_norm_bboxes(image, normalize_annotations, color=(0, 0, 255), thickness=3) # if index == 0: # normalize_annotations = np.zeros((0, 5)) corner_point_x = image_width * corner_point_x corner_point_y = image_height * corner_point_y x_offset = x_center - corner_point_x y_offset = y_center - corner_point_y # m矩阵需要指定dtyp=folat32 M = np.array([ [1, 0, x_offset], [0, 1, y_offset], ], dtype=np.float32) # 把图片image,映射到大小通过M为merge_mosaic_image_size的merge_mosaic_image,使用边框变换,及线性插值 cv2.warpAffine(image, M, (merge_mosaic_image_size, merge_mosaic_image_size), dst=merge_mosaic_image, borderMode=cv2.BORDER_TRANSPARENT, flags=cv2.INTER_NEAREST) # 转换框为pixel并且为【left,right,top,bottom】 pixel_annotations = self.convert_to_pixel_annotation(normalize_annotations, image_width, image_height) # 因为图片平移所以框anchor也需要平移 pixel_annotations = pixel_annotations + [x_offset, y_offset, x_offset, y_offset, 0] merge_mosaic_pixel_annotations.append(pixel_annotations) # 把所有的框拼为矩形 merge_mosaic_pixel_annotations = np.concatenate(merge_mosaic_pixel_annotations, axis=0) np.clip(merge_mosaic_pixel_annotations[:, :4], a_min=0, a_max=merge_mosaic_image_size - 1, out=merge_mosaic_pixel_annotations[:, :4]) # 随机仿射变化 scale = random.uniform(0.5, 1.5) # 把大图1280 -》640为中心的小图上 # x1*M001 + 0*M002 + x_offset # 640*M001 + x_offset = 320 # x_offset = 320-640*scale # x_offset = (0.5-scale)*self.image_size M = np.array([ [scale, 0, (0.5 - scale) * self.image_size], [0, scale, (0.5 - scale) * self.image_size] ], dtype=np.float32) merge_mosaic_image = cv2.warpAffine(merge_mosaic_image, M, (self.image_size, self.image_size), borderMode=cv2.BORDER_CONSTANT, borderValue=self.border_fill_value, flags=cv2.INTER_LINEAR) # 使用M矩阵对框进行变化 num_targets = len(merge_mosaic_pixel_annotations) output_normalize_annotations = np.zeros((0, 5)) if num_targets > 0: # 把pixel中的annotation映射到对应的M矩阵 # 映射标注框到目标图,使用M矩阵 targets_temp = np.ones((num_targets * 2, 3)) # 内存排布知识 # N x 5 # N x 4 -> left, top, right, bottom, left, top, right, bottom, left, top, right, bottom, # -> reshape(N x 2, 2) # -> left, top # -> right, bottom # -> left, top # -> right, bottom # 把box标注信息变成一行一个点 targets_temp[:, :2] = merge_mosaic_pixel_annotations[:, :4].reshape(num_targets * 2, 2) # targets_temp -> 2N x 3 # M -> 2 x 3 # output: 2N x 2, merge_projection_pixel_annotations = merge_mosaic_pixel_annotations.copy() merge_projection_pixel_annotations[:, :4] = (targets_temp @ M.T).reshape(num_targets, 4) # 处理框 # 1. 裁切到图像范围 # 2. 过滤掉无效的框 np.clip(merge_projection_pixel_annotations[:, :4], a_min=0, a_max=self.image_size - 1, out=merge_projection_pixel_annotations[:, :4]) # 过滤无效的框 projection_box_width = merge_projection_pixel_annotations[:, 2] - merge_projection_pixel_annotations[:, 0] + 1 projection_box_height = merge_projection_pixel_annotations[:, 3] - merge_projection_pixel_annotations[:, 1] + 1 original_box_width = merge_mosaic_pixel_annotations[:, 2] - merge_mosaic_pixel_annotations[:, 0] + 1 original_box_height = merge_mosaic_pixel_annotations[:, 3] - merge_mosaic_pixel_annotations[:, 1] + 1 area_projection = projection_box_width * projection_box_height area_original = original_box_width * original_box_height aspect_ratio = np.maximum(projection_box_width / (projection_box_height + 1e-6), projection_box_height / (projection_box_width + 1e-6)) # 保留的条件分析 # 1. 映射后的框,宽度必须大于2 # 2. 映射后的框,高度必须大于2 # 3. 裁切后的面积 / 裁切前的面积 > 0.2 # 4. max(宽高比,高宽比) < 20 keep_indices = (projection_box_width > 2) & \ (projection_box_height > 2) & \ (area_projection / (area_original * scale + 1e-6) > 0.2) & \ (aspect_ratio < 20) merge_projection_pixel_annotations = merge_projection_pixel_annotations[keep_indices] output_normalize_annotations = nn_utils.convert_to_normalize_annotation(merge_projection_pixel_annotations, self.image_size, self.image_size) return merge_mosaic_image, output_normalize_annotations def convert_to_pixel_annotation(self, normalize_annotations, image_width, image_height): ''' 转换标注信息从normalize到pixel 参数: normalize_annotations[Nx5]: 指定为标注信息,格式是[cx, cy, width, height, class_index] image_width[int]: 指定为标注信息的图像宽度 image_height[int]: 指定为标注信息的图像高度 返回值: pixel_annotations[Nx5]: 返回格式是[left, top, right, bottom, class_index] ''' pixel_annotations = normalize_annotations.copy() cx, cy, width, height = [normalize_annotations[:, i] for i in range(4)] pixel_annotations[:, 0] = cx * image_width - (width * image_width - 1) * 0.5 # left pixel_annotations[:, 1] = cy * image_height - (height * image_height - 1) * 0.5 # top pixel_annotations[:, 2] = cx * image_width + (width * image_width - 1) * 0.5 # right pixel_annotations[:, 3] = cy * image_height + (height * image_height - 1) * 0.5 # bottom # pixel_annotations[:, 0] = cx * image_width - (width * image_width - 1) * 0.5 # pixel_annotations[:, 1] = cy * image_height - (height * image_height - 1) * 0.5 # pixel_annotations[:, 2] = cx * image_width + (width * image_width - 1) * 0.5 # pixel_annotations[:, 3] = cy * image_height + (height * image_height - 1) * 0.5 return pixel_annotations def load_image_with_uniform_scale(self, image_index): """ 加载图像,并且长边等比缩放到self.size大小 :param image_index:图片索引 :return:image,normalize_annotations,(image_resize-width,image_resize-height) """ jpeg_file, normalize_annotations, (image_width, image_height) = self.all_labled_annotations[image_index] image = cv2.imread(jpeg_file) scale = self.image_size / max(image.shape[:2]) if scale < 1 and self.augment: interp = cv2.INTER_AREA else: interp = cv2.INTER_LINEAR image = cv2.resize(image, (0, 0), fx=scale, fy=scale, interpolation=interp) image_resize_height, image_resize_width = image.shape[:2] return image, normalize_annotations, (image_resize_width, image_resize_height) def load_voc_annotation(self, annotation_file, label_map): ''' 加载标注文件xml,读取其中的bboxes 参数: annotation_file[str]: 指定为xml文件路径 label_map[list]: 指定为标签数组 返回值: np.array([(xmin, ymin, xmax, ymax, class_index), (xmin, ymin, xmax, ymax, class_index)]) ''' with open(annotation_file, "r") as f: annotation_data = f.read() def middle(s, begin, end, pos_begin=0): p = s.find(begin, pos_begin) if p == -1: return None, None p += len(begin) e = s.find(end, p) if e == -1: return None, None return s[p:e], e + len(end) obj_bboxes = [] object_, pos_ = middle(annotation_data, "<object>", "</object>") while object_ is not None: xmin = int(middle(object_, "<xmin>", "</xmin>")[0]) ymin = int(middle(object_, "<ymin>", "</ymin>")[0]) xmax = int(middle(object_, "<xmax>", "</xmax>")[0]) ymax = int(middle(object_, "<ymax>", "</ymax>")[0]) name = middle(object_, "<name>", "</name>")[0] object_, pos_ = middle(annotation_data, "<object>", "</object>", pos_) obj_bboxes.append((xmin, ymin, xmax, ymax, label_map.index(name))) # 创建一个0 x 5的ndarray,可以使得后面的计算顺利进行,不必处理box为0时候的问题,也不会造成shape不匹配的错误 return_ndarray_bboxes = np.zeros((0, 5), dtype=np.float32) if len(obj_bboxes) > 0: return_ndarray_bboxes = np.array(obj_bboxes, dtype=np.float32) return return_ndarray_bboxes if __name__ == '__main__': # 设置随机数种子 nn_utils.setup_seed(13) dataset = VOCDataSet(True, 640, "E:\VOC2007\VOCdevkit\VOC2007") # print(len(dataset.all_labled_annotations)) # image, normalize_annotations, (w, h) = dataset.load_image_with_uniform_scale(0) # print(image.shape, w, h) # print(normalize_annotations) # pixel_annotations = np.array([ # [101.55, 50, 200, 150, 1] # ]) # # normalize_annotations = dataset.convert_to_normalize_annotation(pixel_annotations, 640, 640) # print(normalize_annotations) # result_pixel_annotations = dataset.convert_to_pixel_annotation(normalize_annotations, 640, 640) # print(result_pixel_annotations) image, normalize_annotations = dataset.load_mosaic(3) nn_utils.draw_norm_bboxes(image, normalize_annotations, thickness=3) cv2.imwrite("image.jpg", image)View Code
nn.utls
import cv2 import torch import torch.nn as nn import math import random import numpy as np from copy import deepcopy class BBox: def __init__(self, x, y, r, b, landmark): self.x = x self.y = y self.r = r self.b = b self.landmark = landmark def __repr__(self): landmark_info = "HasLandmark" if self.landmark else "NoLandmark" return f"{{Face {self.x}, {self.y}, {self.r}, {self.b}, {landmark_info} }}" @property def left_top_i(self): return int(self.x), int(self.y) @property def right_bottom_i(self): return int(self.r), int(self.b) @property def center_i(self): return int((self.x + self.r) * 0.5), int((self.y + self.b) * 0.5) @property def center(self): return (self.x + self.r) * 0.5, (self.y + self.b) * 0.5 @property def width(self): return self.r - self.x + 1 @property def height(self): return self.b - self.y + 1 @property def location(self): return self.x, self.y, self.r, self.b @property def landmark_union(self): union = () for point in self.landmark: union = union + tuple(point) return union class ImageObject: def __init__(self, file): self.file = file self.bboxes = [] def add(self, annotation): x, y, w, h = annotation[:4] r = x + w - 1 b = y + h - 1 landmark = None if len(annotation) == 20: # x, y, w, h, xyz, xyz, xyz, xyz, xyz, unknow landmark = [] for i in range(5): px = annotation[i * 3 + 0 + 4] py = annotation[i * 3 + 1 + 4] pz = annotation[i * 3 + 2 + 4] if pz == -1: landmark = None break landmark.append([px, py]) self.bboxes.append(BBox(x, y, r, b, landmark)) def load_widerface_annotation(ann_file): with open(ann_file, "r") as f: lines = f.readlines() imageObject = None file = None images = [] for line in lines: line = line.replace("\n", "") if line[0] == "#": file = line[2:] imageObject = ImageObject(file) images.append(imageObject) else: imageObject.add([float(item) for item in line.split(" ")]) return images def draw_gauss_np(heatmap, x, y, box_size): if not isinstance(box_size, tuple): box_size = (box_size, box_size) box_width, box_height = box_size diameter = min(box_width, box_height) height, width = heatmap.shape[:2] sigma = diameter / 6 radius = max(1, int(diameter * 0.5)) s = 2 * sigma * sigma ky, kx = np.ogrid[-radius:+radius+1, -radius:+radius+1] kernel = np.exp(-(kx * kx + ky * ky) / s) dleft, dtop = -min(x, radius), -min(y, radius) dright, dbottom = +min(width - x, radius+1), +min(height - y, radius+1) select_heatmap = heatmap[y+dtop:y+dbottom, x+dleft:x+dright] select_kernel = kernel[radius+dtop:radius+dbottom, radius+dleft:radius+dright] if min(select_heatmap.shape) > 0: np.maximum(select_heatmap, select_kernel, out=select_heatmap) return heatmap def draw_gauss_torch(heatmap, x, y, box_size): if not isinstance(box_size, tuple): box_size = (box_size, box_size) box_width, box_height = box_size diameter = min(box_width, box_height) device = heatmap.device dtype = heatmap.dtype x = int(x) y = int(y) height, width = heatmap.shape[:2] sigma = diameter / 6 radius = max(1, int(diameter * 0.5)) s = 2 * sigma * sigma ky = torch.arange(-radius, +radius+1, device=device, dtype=dtype).view(-1, 1) kx = torch.arange(-radius, +radius+1, device=device, dtype=dtype).view(1, -1) kernel = torch.exp(-(kx * kx + ky * ky) / s) dleft, dtop = -min(x, radius), -min(y, radius) dright, dbottom = +min(width - x, radius+1), +min(height - y, radius+1) select_heatmap = heatmap[y+dtop:y+dbottom, x+dleft:x+dright] select_kernel = kernel[radius+dtop:radius+dbottom, radius+dleft:radius+dright] if min(select_heatmap.shape) > 0: torch.max(select_heatmap, select_kernel, out=select_heatmap) return heatmap def pad_image(image, stride): height, width = image.shape[:2] pad_x = stride - (width % stride) if width % stride != 0 else 0 pad_y = stride - (height % stride) if height % stride != 0 else 0 image = cv2.copyMakeBorder(image, 0, pad_y, 0, pad_x, cv2.BORDER_CONSTANT, (0, 0, 0)) return image def iou(a, b): ax, ay, ar, ab = a bx, by, br, bb = b cross_x = max(ax, bx) cross_y = max(ay, by) cross_r = min(ar, br) cross_b = min(ab, bb) cross_w = max(0, (cross_r - cross_x) + 1) cross_h = max(0, (cross_b - cross_y) + 1) cross_area = cross_w * cross_h union = (ar - ax + 1) * (ab - ay + 1) + (br - bx + 1) * (bb - by + 1) - cross_area return cross_area / union def nms(bboxes, threshold, confidence_index=-1): bboxes.sort(key=lambda x: x[confidence_index], reverse=True) flags = [True] * len(bboxes) keep = [] for i in range(len(bboxes)): if not flags[i]: continue keep.append(bboxes[i]) for j in range(i+1, len(bboxes)): if iou(bboxes[i][:4], bboxes[j][:4]) > threshold: flags[j] = False return keep def nmsAsClass(bboxes, threshold, class_index=-1, confidence_index=-2): boxasclass = {} for box in bboxes: classes = box[class_index] if classes not in boxasclass: boxasclass[classes] = [] boxasclass[classes].append(box) output = [] for key in boxasclass: result = nms(boxasclass[key], threshold, confidence_index) output.extend(result) return output def iou_batch(a, b): # left, top, right, bottom a_xmin, a_xmax = a[..., 0], a[..., 2] a_ymin, a_ymax = a[..., 1], a[..., 3] b_xmin, b_xmax = b[..., 0], b[..., 2] b_ymin, b_ymax = b[..., 1], b[..., 3] inter_xmin = torch.max(a_xmin, b_xmin) inter_xmax = torch.min(a_xmax, b_xmax) inter_ymin = torch.max(a_ymin, b_ymin) inter_ymax = torch.min(a_ymax, b_ymax) inter_width = (inter_xmax - inter_xmin + 1).clamp(0) inter_height = (inter_ymax - inter_ymin + 1).clamp(0) inter_area = inter_width * inter_height a_width, a_height = (a_xmax - a_xmin + 1), (a_ymax - a_ymin + 1) b_width, b_height = (b_xmax - b_xmin + 1), (b_ymax - b_ymin + 1) union = (a_width * a_height) + (b_width * b_height) - inter_area return inter_area / union def draw_bbox(image, left, top, right, bottom, confidence, classes, color=(0, 255, 0), thickness=1): left = int(left + 0.5) top = int(top + 0.5) right = int(right + 0.5) bottom = int(bottom + 0.5) cv2.rectangle(image, (left, top), (right, bottom), color, thickness) if classes == -1: text = f"{confidence:.2f}" else: text = f"[{classes}]{confidence:.2f}" cv2.putText(image, text, (left + 3, top - 5), 0, 0.5, (0, 0, 255), 1, 16) def draw_norm_bboxes(image, bboxes, color=(0, 255, 0), thickness=1): ''' 绘制归一化的边框 参数: image[ndarray]: 图像 bboxes[Nx4/Nx5/Nx6]: 框信息,列数可以是4、5、6,顺序是[cx, cy, width, height, confidence, classes],基于图像大小进行归一化的框 ''' image_height, image_width = image.shape[:2] for obj in bboxes: cx, cy, width, height = obj[:4] * [image_width, image_height, image_width, image_height] left = cx - (width - 1) * 0.5 top = cy - (height - 1) * 0.5 right = cx + (width - 1) * 0.5 bottom = cy + (height - 1) * 0.5 confidence = 0 if len(obj) > 4: confidence = obj[4] classes = -1 if len(obj) > 5: classes = obj[5] draw_bbox(image, left, top, right, bottom, confidence, classes, color, thickness) def draw_pixel_bboxes(image, bboxes, color=(0, 255, 0), thickness=1): ''' 绘制边框,基于left, top, right, bottom标注 ''' for obj in bboxes: left, top, right, bottom = [int(item) for item in obj[:4]] confidence = 0 if len(obj) > 4: confidence = obj[4] classes = -1 if len(obj) > 5: classes = obj[5] draw_bbox(left, top, right, bottom, confidence, classes, color, thickness) def get_center_affine_transform(src_width, src_height, dst_width, dst_height): s = min(dst_width / src_width, dst_height / src_height) new_width = s * src_width new_height = s * src_height dcx = dst_width * 0.5 dcy = dst_height * 0.5 dst_points = np.array([ [dcx - new_width * 0.5, dcy - new_height * 0.5], [dcx + new_width * 0.5, dcy - new_height * 0.5], [dcx + new_width * 0.5, dcy + new_height * 0.5], ], dtype=np.float32) src_points = np.array([ [0, 0], [src_width, 0], [src_width, src_height] ], dtype=np.float32) return cv2.getAffineTransform(src_points, dst_points) def center_affine(image, width, height): src_height, src_width = image.shape[:2] M = get_center_affine_transform(src_width, src_height, width, height) return cv2.warpAffine(image, M, (width, height)) def inverse_center_affine_bboxes(image_width, image_height, net_width, net_height, bboxes): num_bboxes = len(bboxes) if num_bboxes == 0: return bboxes M = get_center_affine_transform(image_width, image_height, net_width, net_height) M = np.matrix(np.vstack([M, np.array([0, 0, 1])])).I M = M[:2] bboxes = np.array(bboxes) # 4 x 6, left, top, right, bottom, confidence.item(), classes.item() left_top = bboxes[:, :2] right_bottom = bboxes[:, 2:4] left_top_project = (M @ np.hstack([left_top, np.ones([num_bboxes, 1])]).T).T right_bottom_project = (M @ np.hstack([right_bottom, np.ones([num_bboxes, 1])]).T).T new_box = np.hstack([left_top_project, right_bottom_project, bboxes[:, 4:]]) return new_box.tolist() def is_parallel(model): # is model is parallel with DP or DDP return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel) def setup_seed(seed): torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) np.random.seed(seed) random.seed(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # return GFlops, MParams def compute_flops(model, input): try: from thop import profile from copy import deepcopy #profile是侵入式的,会污染model,导致名称修改,因此需要深度复制 flops, params = profile(deepcopy(model).eval(), inputs=input, verbose=False) # 单位分别是GFlops和MParams return flops / 1E9, params / 1E6 except Exception as e: pass return -1, -1 class ModelEMA: """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models Keep a moving average of everything in the model state_dict (parameters and buffers). This is intended to allow functionality like https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage A smoothed version of the weights is necessary for some training schemes to perform well. This class is sensitive where it is initialized in the sequence of model init, GPU assignment and distributed training wrappers. """ def __init__(self, model, decay=0.9999, updates=0): # Create EMA self.ema = deepcopy(model.module if is_parallel(model) else model).eval() # FP32 EMA # if next(model.parameters()).device.type != 'cpu': # self.ema.half() # FP16 EMA self.updates = updates # number of EMA updates self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) # decay exponential ramp (to help early epochs) for p in self.ema.parameters(): p.requires_grad_(False) def update(self, model): # Update EMA parameters with torch.no_grad(): self.updates += 1 d = self.decay(self.updates) msd = model.module.state_dict() if is_parallel(model) else model.state_dict() # model state_dict for k, v in self.ema.state_dict().items(): if v.dtype.is_floating_point: v *= d v += (1. - d) * msd[k].detach() def convert_to_pixel_annotation(normalize_annotations, image_width, image_height): ''' 转换标注信息从normalize到pixel 参数: normalize_annotations[Nx5]: 指定为标注信息,格式是[cx, cy, width, height, class_index] image_width[int]: 指定为标注信息的图像宽度 image_height[int]: 指定为标注信息的图像高度 返回值: pixel_annotations[Nx5]: 返回格式是[left, top, right, bottom, class_index] ''' # pixel_annotations = normalize_annotations.copy() # cx, cy, width, height, class_index = [normalize_annotations[:, i] for i in range(5)] # pixel_annotations[:, 0] = cx * image_width - (width * image_width - 1) * 0.5 # left # pixel_annotations[:, 1] = cy * image_height - (height * image_height - 1) * 0.5 # top # pixel_annotations[:, 2] = cx * image_width + (width * image_width - 1) * 0.5 # right # pixel_annotations[:, 3] = cy * image_height + (height * image_height - 1) * 0.5 # bottom # return pixel_annotations pixel_annotations = normalize_annotations.copy() cx, cy, width, height, class_index = [normalize_annotations[:, i] for i in range(5)] pixel_annotations[:, 0] = cx * image_width - (width * image_width - 1) * 0.5 # left pixel_annotations[:, 1] = cy * image_height - (height * image_height - 1) * 0.5 # top pixel_annotations[:, 2] = cx * image_width + (width * image_width - 1) * 0.5 # right pixel_annotations[:, 3] = cy * image_height + (height * image_height - 1) * 0.5 # bottom return pixel_annotations def convert_to_normalize_annotation(pixel_annotations, image_width, image_height): ''' 转换标注信息到normalize格式,除以图像宽高进行归一化 参数: pixel_annotations[Nx5]: 指定为标注信息,格式是[left, top, right, bottom, class_index] image_width[int]: 指定为标注信息的图像宽度 image_height[int]: 指定为标注信息的图像高度 返回值: normalize_annotations[Nx5]: 返回格式是[cx, cy, width, height, class_index] ''' normalize_annotations = pixel_annotations.copy() left, top, right, bottom, class_index = [pixel_annotations[:, i] for i in range(5)] normalize_annotations[:, 0] = (left + right) * 0.5 / image_width # cx normalize_annotations[:, 1] = (top + bottom) * 0.5 / image_height # cy normalize_annotations[:, 2] = (right - left + 1) / image_width # width normalize_annotations[:, 3] = (bottom - top + 1) / image_height # height return normalize_annotations def load_voc_annotation(annotation_file, label_map): ''' 加载标注文件xml,读取其中的bboxes 参数: annotation_file[str]: 指定为xml文件路径 label_map[list]: 指定为标签数组 返回值: np.array([(xmin, ymin, xmax, ymax, class_index), (xmin, ymin, xmax, ymax, class_index)]) ''' with open(annotation_file, "r") as f: annotation_data = f.read() def middle(s, begin, end, pos_begin = 0): p = s.find(begin, pos_begin) if p == -1: return None, None p += len(begin) e = s.find(end, p) if e == -1: return None, None return s[p:e], e + len(end) obj_bboxes = [] object_, pos_ = middle(annotation_data, "<object>", "</object>") while object_ is not None: xmin = int(middle(object_, "<xmin>", "</xmin>")[0]) ymin = int(middle(object_, "<ymin>", "</ymin>")[0]) xmax = int(middle(object_, "<xmax>", "</xmax>")[0]) ymax = int(middle(object_, "<ymax>", "</ymax>")[0]) name = middle(object_, "<name>", "</name>")[0] object_, pos_ = middle(annotation_data, "<object>", "</object>", pos_) obj_bboxes.append((xmin, ymin, xmax, ymax, label_map.index(name))) # 创建一个0 x 5的ndarray,可以使得后面的计算顺利进行,不必处理box为0时候的问题,也不会造成shape不匹配的错误 return_ndarray_bboxes = np.zeros((0, 5), dtype=np.float32) if len(obj_bboxes) > 0: return_ndarray_bboxes = np.array(obj_bboxes, dtype=np.float32) return return_ndarray_bboxesView Code
sys_utls
import logging import datetime import os import shutil import hashlib from pathlib import Path from PIL import Image, ExifTags from logging.handlers import TimedRotatingFileHandler for orientation in ExifTags.TAGS.keys(): if ExifTags.TAGS[orientation] == 'Orientation': break def get_md5(data): return hashlib.md5(data.encode(encoding='UTF-8')).hexdigest() def exif_size(img): ''' Returns exif-corrected PIL size ''' width, height = img.size # (width, height) try: rotation = dict(img._getexif().items())[orientation] if rotation == 6 or rotation == 8: # rotation 270 or rotation 90 # exchange width, height = height, width except Exception as e: pass return width, height def mkdirs(directory): try: os.makedirs(directory) except Exception as e: ... def mkparents(path): parent = Path(path).parent if not os.path.exists(parent): mkdirs(parent) def build_logger(path): logger = logging.getLogger("NewLogger") logger.setLevel(logging.INFO) mkparents(path) rf_handler = logging.handlers.TimedRotatingFileHandler(path, when='midnight', interval=1, backupCount=7, atTime=datetime.time(0, 0, 0, 0)) formatter = logging.Formatter('[%(levelname)s][%(filename)s:%(lineno)d][%(asctime)s]: %(message)s') rf_handler.setFormatter(formatter) logger.addHandler(rf_handler) sh_handler = logging.StreamHandler() sh_handler.setFormatter(formatter) logger.addHandler(sh_handler) return logger def build_default_logger(): logger = logging.getLogger("DefaultLogger") logger.setLevel(logging.INFO) formatter = logging.Formatter('[%(levelname)s][%(filename)s:%(lineno)d][%(asctime)s]: %(message)s') sh_handler = logging.StreamHandler() sh_handler.setFormatter(formatter) logger.addHandler(sh_handler) return logger def copy_code_to(src, dst): if len(dst) == 0 or dst == ".": print("invalid operate, copy to current directory") return for file in os.listdir(src): if file.endswith(".py"): source = f"{src}/{file}" dest = f"{dst}/{file}" mkparents(dest) shutil.copy(source, dest) # 单例模式 class SingleInstanceLogger: def __init__(self): self.logger = build_default_logger() def __getattr__(self, name): return getattr(self.logger, name) def setup_single_instance_logger(path): global _single_instance_logger _single_instance_logger.logger = build_logger(path) _single_instance_logger = SingleInstanceLogger() # class B: # def __init__(self): # self.name = "xiaozhang" # def echo(self, message): # print(f"Echo message: {message}") # class A: # def __init__(self): # self.b = B() # def __getattr__(self, name): # print("get attr", name) # return getattr(self.b, name) # a = A() # a.echo("hello") # print(a.name)View Code
数据集都是voc2007
运行结果如下
标签:pixel,yolov5dataset,image,self,width,height,复写,annotations From: https://www.cnblogs.com/xiaoruirui/p/16893345.html