一、实验目的
本实验旨在学习如何构建自定义的人脸数据集,并将其应用于图像生成任务中。具体来说,本任务是一个条件生成任务,即给定一个人脸的类别标签(如超模脸、动漫脸、萌娃脸、明星脸等),生成与该类别标签相对应的人脸图像。
二、硬件与软件环境
以下是我的设备配置:
处理器:12th Gen Intel(R) Core(TM) i5-12450H,2.00 GHz
内存:16.0 GB (15.8 GB 可用)
系统类型:64 位操作系统,基于 x64 的处理器
显卡:NVIDIA GeForce RTX 3060 Laptop GPU
GPU核心频率:210 MHz
显存频率:405 MHz
GPU性能状态:8
操作系统:Windows 11 家庭中文版
开发工具:PyCharm 2023.3.1 (Professional Edition),Python 3.11
三、实验内容
1.人脸数据集构建。
1.1创建Flask项目
这是需要完成的整体目录结构:
通常自定义数据集的构建可以分为 2 步:数据收集、数据整理。
1.2 人脸图像收集
与公开数据集不同,我们需要根据需要从web上收集数据。这里我们主要通过python爬取百度图片网页上的图像。在根目录上新建一个download_data.py文件,编写代码如下:
download_data.py
import os
import requests
import re
from tqdm import tqdm
def get_image_type(url):
return "png" if ".png" in url else "jpg" if ".jpg" in url or ".jpeg" in url else None
def get_urls_one_page(url):
try:
response = requests.get(url, timeout=10)
response.raise_for_status()
response.encoding = 'utf-8'
html = response.text
url_pictures_this_page = re.findall(r'"objURL":"(.*?)",', html)
url_next_page_prefix = re.findall(r'<a href="(.*?)" class="next">下一页</a>', html)
url_next_page = f"http://image.baidu.com/{url_next_page_prefix[0]}" if url_next_page_prefix else None
return url_pictures_this_page, url_next_page
except requests.RequestException as e:
print(f"请求页面出错: {e}")
return [], None
def download_image(url, save_dir, index, image_type):
try:
picture = requests.get(url, timeout=10)
picture.raise_for_status()
name = f"{save_dir}/{index}.{image_type}"
with open(name, 'wb') as f:
f.write(picture.content)
print(f"第{index + 1}张图片下载成功")
except Exception as e:
print(f"第{index + 1}张图片下载失败! 错误: {e}")
def download_images(keyWord, save_dir, number):
if not os.path.exists(save_dir):
os.makedirs(save_dir)
base_url = "http://image.baidu.com/search/flip?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=result&fr=&sf=1&fmq=1497491098685_R&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&ctd=1497491098685%5E00_1519X735&word="
url = base_url + keyWord
a = 0
while a < number:
pictures_url, url = get_urls_one_page(url)
if not pictures_url:
break
for i in pictures_url:
image_type = get_image_type(i)
if image_type:
download_image(i, save_dir, a, image_type)
a += 1
if a >= number:
break
classlist = ['黄种人', '白种人', '黑种人', '动漫头像']
for key in tqdm(classlist, desc="下载进度"):
download_images(key, f'data/raw/{key}/', 1000)
由于网络上爬取的内容参差不齐,所以我直接找到了处理好的数据集使用。
可以选择自己喜欢的数据集进行合成。
1.3 人脸数据整理
完成下载后,查看下载的图像会发现掺杂了一些非人脸或非对应类别的人脸,需要手动进行一些大致的清理,删除不符合要求的图像。
清理后,为了用于训练,我们需要对这些原始数据进行初步的处理。如用于生成训 练,我们需要将所有图像剪切到 128*128 的尺寸,并尽量使人脸部分居中对齐。我们使用 mtcnn 插件完成对人脸的处理。具体过程为:
1.3.1 mtcnn_pytorch.zip
我们将 mtcnn_pytorch.zip 从资源中拷贝到项目根目录并解压,得到一个 mtcnn_pytorch 文件夹。在资源绑定中可以下载相关压缩包。
1.3.2 人脸对齐相关程序
在根目录新建 mtcnn.py 文件,编写人脸对齐相关程序。
mtcnn.py
import numpy as np
import torch
from PIL import Image
from mtcnn_pytorch.src.get_nets import PNet, RNet, ONet
from mtcnn_pytorch.src.box_utils import nms, calibrate_box, get_image_boxes, convert_to_square
from mtcnn_pytorch.src.first_stage import run_first_stage
from mtcnn_pytorch.src.align_trans import get_reference_facial_points, warp_and_crop_face
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
class MTCNN():
def __init__(self):
self.pnet = PNet().to(device)
self.rnet = RNet().to(device)
self.onet = ONet().to(device)
self.pnet.eval()
self.rnet.eval()
self.onet.eval()
self.reference = get_reference_facial_points(default_square=True)
def get_resized_reference_facial_points(self, crop_size, reference_pts):
ref_pts = np.float32(reference_pts)
tmp_crop_size = np.array((96, 112))
resize_TIME = max(crop_size) / max(tmp_crop_size)
ref_pts = ref_pts * resize_TIME
return ref_pts
def align(self, img, crop_size=(112, 112)):
_, landmarks = self.detect_faces(img)
if len(landmarks) == 0:
print("No face detected.")
return None # 处理未检测到人脸的情况
facial5points = [[landmarks[0][j], landmarks[0][j + 5]] for j in range(5)]
reference_pts = self.get_resized_reference_facial_points(crop_size, self.reference)
warped_face = warp_and_crop_face(np.array(img), facial5points, reference_pts, crop_size)
return Image.fromarray(warped_face)
def align_multi(self, img, limit=None, min_face_size=30.0):
boxes, landmarks = self.detect_faces(img, min_face_size)
if limit:
boxes = boxes[:limit]
landmarks = landmarks[:limit]
faces = []
for landmark in landmarks:
facial5points = [[landmark[j], landmark[j + 5]] for j in range(5)]
warped_face = warp_and_crop_face(np.array(img), facial5points, self.reference,
crop_size=(112, 112))
faces.append(Image.fromarray(warped_face))
return boxes, faces
def detect_faces(self, image, min_face_size=20.0, thresholds=[0.6, 0.7, 0.8],
nms_thresholds=[0.7, 0.7, 0.7]):
width, height = image.size
min_length = min(height, width)
min_detection_size = 12
factor = 0.707 # sqrt (0.5)
scales = []
m = min_detection_size / min_face_size
min_length *= m
factor_count = 0
while min_length > min_detection_size:
scales.append(m * factor ** factor_count)
min_length *= factor
factor_count += 1
bounding_boxes = []
with torch.no_grad():
for s in scales:
boxes = run_first_stage(image, self.pnet, scale=s, threshold=thresholds[0])
if boxes is not None:
bounding_boxes.append(boxes)
if not bounding_boxes:
return [], []
bounding_boxes = np.vstack(bounding_boxes)
keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0])
bounding_boxes = bounding_boxes[keep]
bounding_boxes = calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:])
bounding_boxes = convert_to_square(bounding_boxes)
bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
img_boxes = get_image_boxes(bounding_boxes, image, size=24)
img_boxes = torch.FloatTensor(img_boxes).to(device)
output = self.rnet(img_boxes)
offsets = output[0].cpu().data.numpy() # shape [n_boxes,4]
probs = output[1].cpu().data.numpy() # shape [n_boxes,2]
keep = np.where(probs[:, 1] > thresholds[1])[0]
bounding_boxes = bounding_boxes[keep]
bounding_boxes[:, 4] = probs[keep, 1].reshape((-1,))
offsets = offsets[keep]
keep = nms(bounding_boxes, nms_thresholds[1])
bounding_boxes = bounding_boxes[keep]
bounding_boxes = calibrate_box(bounding_boxes, offsets[keep])
bounding_boxes = convert_to_square(bounding_boxes)
bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
img_boxes = get_image_boxes(bounding_boxes, image, size=48)
if len(img_boxes) == 0 or len(bounding_boxes) == 0:
return [], []
img_boxes = torch.FloatTensor(img_boxes).to(device)
output = self.onet(img_boxes)
landmarks = output[0].cpu().data.numpy() # shape [n_boxes,10]
offsets = output[1].cpu().data.numpy() # shape [n_boxes,4]
probs = output[2].cpu().data.numpy() # shape [n_boxes,2]
keep = np.where(probs[:, 1] > thresholds[2])[0]
bounding_boxes = bounding_boxes[keep]
bounding_boxes[:, 4] = probs[keep, 1].reshape((-1,))
offsets = offsets[keep]
landmarks = landmarks[keep]
width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0
height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0
xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1]
landmarks[:, 0:5] = np.expand_dims(xmin, 1) + np.expand_dims(width, 1) * landmarks[:, 0:5]
landmarks[:, 5:10] = np.expand_dims(ymin, 1) + np.expand_dims(height, 1) * landmarks[:, 5:10]
bounding_boxes = calibrate_box(bounding_boxes, offsets)
keep = nms(bounding_boxes, nms_thresholds[2], mode='min')
bounding_boxes = bounding_boxes[keep]
landmarks = landmarks[keep]
return bounding_boxes, landmarks
1.3.3 人脸对齐和剪切
在根目录新建 process_mtcnn_128.py 文件,用于人脸对齐和剪切操作。代码如下:
import PIL
from PIL import Image
import os
from tqdm import tqdm
import argparse
from mtcnn import MTCNN
def crop(imgpath, savepath, mtcnn, crop_size=(112, 112)):
try:
image = PIL.Image.open(imgpath).convert('RGB')
aligned_image = mtcnn.align(img=image, crop_size=crop_size)
if aligned_image is None:
print(f'No face detected or alignment failed for {imgpath}')
return False
aligned_image.save(savepath)
print(f'Successfully saved: {savepath}')
return True
except Exception as e:
print(f'Error processing {imgpath}: {str(e)}')
return False
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--root", type=str, default='data/raw/', required=False,
help="the dir of raw datas ")
parser.add_argument("--output_dir", type=str, default='data/crop128/', required=False,
help="the dir for processed datas")
args = parser.parse_args()
mtcnn = MTCNN()
dir_origin_path = args.root
dir_save_path = args.output_dir
if not os.path.exists(dir_save_path):
os.makedirs(dir_save_path)
success_count = 0
fail_count = 0
for root, subdirs, _ in os.walk(dir_origin_path):
for sub in tqdm(subdirs, desc="Processing subdirectories"):
base = os.path.join(dir_origin_path, sub)
path = os.path.join(dir_save_path, sub)
if not os.path.exists(path):
os.makedirs(path)
print(f'Created directory: {path}')
cnt = 0
for img in tqdm(os.listdir(base), desc=f"Processing images in {sub}"):
output_img = os.path.join(path, f"{cnt:08}.png")
img_file = os.path.join(base, img)
if os.path.isfile(output_img):
continue
result = crop(img_file, output_img, mtcnn, crop_size=(128, 128))
if result:
success_count += 1
else:
fail_count += 1
cnt += 1
print(f'Total processed: {success_count + fail_count}')
print(f'Success: {success_count}')
print(f'Failed: {fail_count}')
执行程序。介绍两种方式执行程序,一种上可视化的方式,右键点击文件选择“执行”(英文版:run),如图所示:
第二种方法是通过命令的方法执行。点开 Pycharm 的 terminal 窗口,或者ALT+F12快捷键进入终端输入命令:
python process_mtcnn_128.py --root data/raw --output_dir data/crop128
如图所示:
1.3.4 数据可视化
在根目录下新建 loaddata.py,编写代码如下:
import torchvision
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torchvision.transforms import Compose
import matplotlib.pyplot as plt
import numpy as np
def main():
data_root = 'data/crop128/'
image_size = 128
batch_size_train = 4
# 创建数据集
train_data = ImageFolder(
root=data_root,
transform=Compose([
torchvision.transforms.Resize(image_size),
torchvision.transforms.CenterCrop(image_size),
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
)
# 创建加载器
train_loader = DataLoader(
train_data,
batch_size=batch_size_train,
num_workers=4,
shuffle=True
)
def imshow(img):
img = img / 2 + 0.5 # 反归一化
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
plt.axis('off') # 不显示坐标轴
plt.show()
# 展示一批图像
for idx, (images, labels) in enumerate(train_loader):
print(labels)
imshow(torchvision.utils.make_grid(images))
break # 仅展示第一批
if __name__ == '__main__':
main()
运行结果如下: