我被要求使用 python 将上传的视频转换为动画。期望:
我得到的样本随着时间的推移我在项目上的工作还没有实现它。相反,我得到的是这样的:
任何解决方案将不胜感激,谢谢
这是我正在使用的代码:
import sys
import cv2
import tensorflow as tf
import numpy as np
from PyQt5.QtWidgets import (QApplication, QMainWindow, QPushButton, QLabel, QFileDialog, QVBoxLayout,
QWidget, QComboBox, QHBoxLayout, QProgressBar)
from PyQt5.QtCore import Qt
from PyQt5.QtGui import QPixmap, QDragEnterEvent, QDropEvent
import os
# Load the pre-trained style transfer model
model = tf.saved_model.load('model/')
def preprocess(image):
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = tf.image.convert_image_dtype(image, tf.float32)
image = tf.image.resize(image, (256, 256))
image = image[tf.newaxis, :]
return image
def apply_style(content_image, style_image):
content_image = preprocess(content_image)
style_image = preprocess(style_image)
stylized_image = model(tf.constant(content_image), tf.constant(style_image))[0]
stylized_image = tf.image.resize(stylized_image, (content_image.shape[1], content_image.shape[2]))
stylized_image = tf.squeeze(stylized_image).numpy()
stylized_image = (stylized_image * 255).astype(np.uint8)
stylized_image = cv2.cvtColor(stylized_image, cv2.COLOR_RGB2BGR)
return stylized_image
class VideoStyleTransferApp(QMainWindow):
def __init__(self):
super().__init__()
self.initUI()
def initUI(self):
self.setWindowTitle('Video Style Transfer')
self.setGeometry(100, 100, 800, 600)
layout = QVBoxLayout()
self.label = QLabel('Drag and drop a video or click to upload, then select a style', self)
self.label.setAlignment(Qt.AlignCenter)
layout.addWidget(self.label)
self.drop_area = QLabel('Drag and drop video here', self)
self.drop_area.setStyleSheet("QLabel { background-color : lightgray; border: 2px dashed gray; }")
self.drop_area.setAlignment(Qt.AlignCenter)
self.drop_area.setFixedHeight(100)
layout.addWidget(self.drop_area)
self.drop_area.setAcceptDrops(True)
self.drop_area.dragEnterEvent = self.dragEnterEvent
self.drop_area.dropEvent = self.dropEvent
self.upload_video_btn = QPushButton('Upload Video', self)
self.upload_video_btn.clicked.connect(self.upload_video)
layout.addWidget(self.upload_video_btn)
self.style_selector = QComboBox(self)
self.style_selector.addItems(self.load_styles())
layout.addWidget(self.style_selector)
self.process_btn = QPushButton('Process Video', self)
self.process_btn.clicked.connect(self.process_video)
self.process_btn.setEnabled(False)
layout.addWidget(self.process_btn)
self.progress_bar = QProgressBar(self)
self.progress_bar.setValue(0)
self.progress_bar.setTextVisible(True)
layout.addWidget(self.progress_bar)
self.video_path = None
container = QWidget()
container.setLayout(layout)
self.setCentralWidget(container)
def load_styles(self):
self.styles_dir = 'styles/'
styles = [f for f in os.listdir(self.styles_dir) if f.endswith(('.jpg', '.png'))]
return styles
def dragEnterEvent(self, event: QDragEnterEvent):
if event.mimeData().hasUrls():
event.acceptProposedAction()
def dropEvent(self, event: QDropEvent):
for url in event.mimeData().urls():
self.video_path = url.toLocalFile()
if self.video_path.endswith(('.mp4', '.avi')):
self.label.setText(f'Video selected: {self.video_path}')
self.check_files_ready()
def upload_video(self):
options = QFileDialog.Options()
self.video_path, _ = QFileDialog.getOpenFileName(self, 'Open Video File', '', 'Video Files (*.mp4 *.avi)', options=options)
if self.video_path:
self.label.setText(f'Video selected: {self.video_path}')
self.check_files_ready()
def check_files_ready(self):
if self.video_path and self.style_selector.currentText():
self.process_btn.setEnabled(True)
def process_video(self):
if not self.video_path or not self.style_selector.currentText():
return
self.label.setText('Processing video...')
self.progress_bar.setValue(0)
self.repaint()
style_image_path = os.path.join(self.styles_dir, self.style_selector.currentText())
style_image = cv2.imread(style_image_path)
cap = cv2.VideoCapture(self.video_path)
output_folder = os.path.join(os.path.expanduser("~"), "Videos", "StyledVideos")
os.makedirs(output_folder, exist_ok=True)
output_path = os.path.join(output_folder, 'output_' + os.path.basename(self.video_path))
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
self.progress_bar.setMaximum(frame_count)
processed_frames = 0
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
styled_frame = apply_style(frame, style_image)
out.write(styled_frame)
processed_frames += 1
self.progress_bar.setValue(processed_frames)
cap.release()
out.release()
self.label.setText(f'Processing complete! Output saved as {output_path}')
self.progress_bar.setValue(frame_count) # Ensure progress bar is full at the end
if __name__ == '__main__':
app = QApplication(sys.argv)
ex = VideoStyleTransferApp()
ex.show()
sys.exit(app.exec_())
遇到的问题是,正在对视频的每一帧应用相同的静态样式转换。这会导致输出中出现不一致和“闪烁”的外观,因为它没有考虑到帧之间的时序一致性。
为了实现期望的效果(如动画化图像所示),需要使用 视频风格迁移 ,它考虑了帧之间的时间信息。
以下是需要改进的代码,以及实现视频风格迁移的步骤:
1. 使用不同的模型: 当前的模型可能专为图像风格迁移而设计。研究并选择一个专门用于 视频风格迁移 的预训练模型。这些模型通常使用循环神经网络(RNN)或 Transformers 来处理时间信息。一些流行的选项包括:
* **GAN 框架:**例如,可以探索使用 [First Order Motion Model for Image Animation](https://github.com/AliaksandrSiarohin/first-order-model). 这个模型能够将源图像的运动迁移到目标图像,从而实现动画效果。
* **AdaIN (Adaptive Instance Normalization) 方法:** 这种方法也常用于视频风格迁移。
2. 实现时间一致性:
* **光流:** 可以使用光流算法来估计帧之间像素的运动。这将帮助在应用样式时保持时间一致性。
* **循环一致性:** 一些视频风格迁移模型使用循环一致性损失来确保输出视频中的时间平滑度。
3. 优化代码:
* **批处理:** 可以尝试将多个帧批处理在一起,以在保持时间一致性的同时提高性能。
* **GPU 加速:** 确保使用的是 GPU 来加速风格迁移过程,因为这在计算上非常昂贵。
代码示例(使用 First Order Motion Model):
需要安装 First Order Motion Model 的依赖项并下载预训练模型。
# ... 导入库 ...
from demo import load_checkpoints
from demo import make_animation
from skimage import img_as_ubyte
from skimage.transform import resize
# ... GUI 代码 ...
def process_video(self):
# ... 加载视频和样式图像 ...
# 加载 First Order Motion Model
generator, kp_detector = load_checkpoints(config_path='config/vox-256.yaml',
checkpoint_path='vox-cpk.pth.tar')
# 处理视频帧
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
# 将样式图像调整为与帧相同的大小
style_image_resized = resize(style_image, (frame.shape[0], frame.shape[1]), anti_aliasing=True)
# 应用 First Order Motion Model
predictions = make_animation(style_image_resized, frame, generator, kp_detector, relative=True)
styled_frame = predictions[0]
# 将输出帧写入视频
out.write(img_as_ubyte(styled_frame))
# ... 释放资源 ...
请注意:
- 这只是一个示例,可能需要根据所选的模型和方法调整代码。
- 视频风格迁移是一个计算密集型任务,因此请确保有足够的计算资源。
通过实现这些更改,应该能够生成在帧之间保持时间一致性的更流畅、更像动画的输出视频。
标签:python,qt,tensorflow,machine-learning,artificial-intelligence From: 78830209