首页 > 编程语言 >验证darknet中前处理做图像缩放(双线性内插值法)scale的算法效果

验证darknet中前处理做图像缩放(双线性内插值法)scale的算法效果

时间:2022-12-05 12:03:32浏览次数:51  
标签:name 缩放 插值法 image height int 双线性 file width

​​DARKNET中使用的缩放算法是双线性内插值法,这里就实际验证一把DARKNET 中scale的工作原理与效果:

验证darknet中前处理做图像缩放(双线性内插值法)scale的算法效果_开发语言

首先这是一张原图,画面中的是南京明城墙玄武门,玄武湖的正门。18年国庆带娃回学校的时候留念,一段令人怀念的岁月。

验证darknet中前处理做图像缩放(双线性内插值法)scale的算法效果_#include_02

图像参数如下,可以看到是YUV420格式的,尺寸为1920*1080。

验证darknet中前处理做图像缩放(双线性内插值法)scale的算法效果_2d_03

 首先我们将其转换为RAW YUV444P 格式,目的是为了方便做SCALE,转换工具就用FFMPEG

ffmpeg -i 165823915.jpg -pix_fmt yuv444p xuanwumen.yuv444p.yuv

验证darknet中前处理做图像缩放(双线性内插值法)scale的算法效果_#include_04

转换后的RAW YUV图像,可以通过ffplay命令查看,只是由于这个时候没有PARSER信息,需要手动输入图像的分辨率:

ffplay -pix_fmt yuv444p -f rawvideo -video_size 1920x1080 ./xuanwumen.yuv444p.yuv

验证darknet中前处理做图像缩放(双线性内插值法)scale的算法效果_2d_05

至此,万事俱备,只欠代码了。

我们目的是要对YUV图作SCALE,由于YUV图形是NV12格式的,我们需要对Y和U部分和V部分分别作SCALE。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stddef.h>
#include <stdint.h>
#include <fcntl.h>
#include <unistd.h>
#include <assert.h>
#include <sys/ioctl.h>

#define DBG(fmt, ...) do { printf("%s line %d, "fmt"\n", __func__, __LINE__, ##__VA_ARGS__); } while (0)

typedef struct image {
int w;
int h;
int c;
unsigned char *data;
} image;

static void dump_memory(uint8_t *buf, int32_t len)
{
int i;

printf("\n\rdump file memory:");
for (i = 0; i < len; i ++)
{
if ((i % 16) == 0)
{
printf("\n\r%p: ", buf + i);
}
printf("0x%02x ", buf[i]);
}

printf("\n\r");

return;
}

image make_empty_image(int w, int h, int c)
{
image out;
out.data = 0;
out.h = h;
out.w = w;
out.c = c;
return out;
}

image copy_image(image p)
{
image copy = p;
copy.data = (unsigned char*)calloc(p.h * p.w * p.c, sizeof(float));
memcpy(copy.data, p.data, p.h*p.w*p.c*sizeof(float));
return copy;
}

image make_image(int w, int h, int c)
{
image out = make_empty_image(w,h,c);
out.data = (unsigned char*)calloc(h * w * c, sizeof(char));
return out;
}

static void set_pixel(image m, int x, int y, int c, float val)
{
if (x < 0 || y < 0 || c < 0 || x >= m.w || y >= m.h || c >= m.c) return;

assert(x < m.w && y < m.h && c < m.c);
m.data[c*m.h*m.w + y*m.w + x] = val;
}

static float get_pixel(image m, int x, int y, int c)
{
assert(x < m.w && y < m.h && c < m.c);
return m.data[c*m.h*m.w + y*m.w + x];
}

void free_image(image m)
{
if(m.data){
free(m.data);
}
}

static void add_pixel(image m, int x, int y, int c, float val)
{
assert(x < m.w && y < m.h && c < m.c);
m.data[c*m.h*m.w + y*m.w + x] += val;
}

image resize_image(image im, int w, int h)
{
if (im.w == w && im.h == h) return copy_image(im);

image resized = make_image(w, h, im.c);
image part = make_image(w, im.h, im.c);
int r, c, k;
float w_scale = (float)(im.w - 1) / (w - 1);
float h_scale = (float)(im.h - 1) / (h - 1);
for(k = 0; k < im.c; ++k){
for(r = 0; r < im.h; ++r){
for(c = 0; c < w; ++c){
float val = 0;
if(c == w-1 || im.w == 1){
val = get_pixel(im, im.w-1, r, k);
} else {
float sx = c*w_scale;
int ix = (int) sx;
float dx = sx - ix;
val = (1 - dx) * get_pixel(im, ix, r, k) + dx * get_pixel(im, ix+1, r, k);
}
set_pixel(part, c, r, k, val);
}
}
}
for(k = 0; k < im.c; ++k){
for(r = 0; r < h; ++r){
float sy = r*h_scale;
int iy = (int) sy;
float dy = sy - iy;
for(c = 0; c < w; ++c){
float val = (1-dy) * get_pixel(part, c, iy, k);
set_pixel(resized, c, r, k, val);
}
if(r == h-1 || im.h == 1) continue;
for(c = 0; c < w; ++c){
float val = dy * get_pixel(part, c, iy+1, k);
add_pixel(resized, c, r, k, val);
}
}
}

free_image(part);
return resized;
}

int main(int argc, char **argv)
{
FILE *file;
int width, height;

DBG("in");

if(argc != 4)
{
DBG("input error, you should use this program like that: program xxxx.yuv width height.");
exit(-1);
}

width = atoi(argv[2]);
height = atoi(argv[3]);

DBG("scale to width %d, height %d.", width, height);

file = fopen(argv[1], "rb");
if(file == NULL)
{
DBG("fatal error, open file %s failure, please check the file status.", argv[1]);
exit(-1);
}

fseek(file, 0, SEEK_END);
int filelen = ftell(file);

DBG("file %s len %d byets.", argv[1], filelen);

unsigned char *p = malloc(filelen);
if(p == NULL)
{
DBG("malloc buffer failure for %s len %d.", argv[1], filelen);
exit(-1);
}

memset(p, 0x00, filelen);
fseek(file, 0, SEEK_SET);

if(fread(p, 1, filelen, file) != filelen)
{
DBG("read file failure, size wrong.");
exit(-1);
}

fclose(file);

dump_memory(p, 32);
dump_memory(p + filelen - 32, 32);

image orig_Y = make_image(1920,1080, 1);
image orig_U = make_image(1920,1080, 1);
image orig_V = make_image(1920,1080, 1);
memcpy(orig_Y.data, p + 0*1920*1080,1920*1080);
memcpy(orig_U.data, p + 1*1920*1080,1920*1080);
memcpy(orig_V.data, p + 2*1920*1080,1920*1080);

image outputy = resize_image(orig_Y, width,height);
image outputu = resize_image(orig_U, width,height);
image outputv = resize_image(orig_V, width,height);

file = fopen("./output.yuv", "wb+");
if(file == NULL)
{
DBG("fatal error, open output file failure, please check the file status.");
exit(-1);
}

unsigned char *o = malloc(width * height);
if(o == NULL)
{
DBG("malloc output buffer failure.");
exit(-1);
}

memset(o, 0x00, width * height);
memcpy(o, outputy.data, width * height);

filelen = width * height;
if(fwrite(o, 1, filelen, file) != filelen)
{
DBG("read file failure, size wrong.");
exit(-1);
}
memset(o, 0xb0, width * height);
memcpy(o, outputu.data, width * height);

if(fwrite(o, 1, filelen, file) != filelen)
{
DBG("read file failure, size wrong.");
exit(-1);
}

memset(o, 0xb0, width * height);
memcpy(o, outputv.data, width * height);

if(fwrite(o, 1, filelen, file) != filelen)
{
DBG("read file failure, size wrong.");
exit(-1);
}

fflush(file);
fsync(fileno(file));
fclose(file);

return 0;
}

执行如下命令:

$gcc main.c
$ffmpeg -i 165823915.jpg -pix_fmt yuv444p xuanwumen.yuv444p.yuv
$./a.out xuanwumen.yuv444p.yuv 352 352
$ffplay -pix_fmt yuv444p -f rawvideo -video_size 352x352 ./output.yuv

SCALE 352X352后输出结果:

验证darknet中前处理做图像缩放(双线性内插值法)scale的算法效果_算法_06

 SCALE到 100*200的大小:

$./a.out xuanwumen.yuv444p.yuv 100 200
$ffplay -pix_fmt yuv444p -f rawvideo -video_size 100x200 ./output.yuv

验证darknet中前处理做图像缩放(双线性内插值法)scale的算法效果_2d_07

scale 32*32大小:

验证darknet中前处理做图像缩放(双线性内插值法)scale的算法效果_算法_08

 scale 16*16大小:

验证darknet中前处理做图像缩放(双线性内插值法)scale的算法效果_python_09

上面在程序中固定写死了输入图像的尺寸,这不是一个好做法,下面的程序将输入尺寸设置为可变的,由程序输入。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stddef.h>
#include <stdint.h>
#include <fcntl.h>
#include <unistd.h>
#include <assert.h>
#include <sys/ioctl.h>

#define DBG(fmt, ...) do { printf("%s line %d, "fmt"\n", __func__, __LINE__, ##__VA_ARGS__); } while (0)

typedef struct image {
int w;
int h;
int c;
unsigned char *data;
} image;

static void dump_memory(uint8_t *buf, int32_t len)
{
int i;

printf("\n\rdump file memory:");
for (i = 0; i < len; i ++)
{
if ((i % 16) == 0)
{
printf("\n\r%p: ", buf + i);
}
printf("0x%02x ", buf[i]);
}

printf("\n\r");

return;
}

image make_empty_image(int w, int h, int c)
{
image out;
out.data = 0;
out.h = h;
out.w = w;
out.c = c;
return out;
}

image copy_image(image p)
{
image copy = p;
copy.data = (unsigned char*)calloc(p.h * p.w * p.c, sizeof(float));
memcpy(copy.data, p.data, p.h*p.w*p.c*sizeof(float));
return copy;
}

image make_image(int w, int h, int c)
{
image out = make_empty_image(w,h,c);
out.data = (unsigned char*)calloc(h * w * c, sizeof(char));
return out;
}

static void set_pixel(image m, int x, int y, int c, float val)
{
if (x < 0 || y < 0 || c < 0 || x >= m.w || y >= m.h || c >= m.c) return;

assert(x < m.w && y < m.h && c < m.c);
m.data[c*m.h*m.w + y*m.w + x] = val;
}

static float get_pixel(image m, int x, int y, int c)
{
assert(x < m.w && y < m.h && c < m.c);
return m.data[c*m.h*m.w + y*m.w + x];
}

void free_image(image m)
{
if(m.data){
free(m.data);
}
}

static void add_pixel(image m, int x, int y, int c, float val)
{
assert(x < m.w && y < m.h && c < m.c);
m.data[c*m.h*m.w + y*m.w + x] += val;
}

image resize_image(image im, int w, int h)
{
if (im.w == w && im.h == h) return copy_image(im);

image resized = make_image(w, h, im.c);
image part = make_image(w, im.h, im.c);
int r, c, k;
float w_scale = (float)(im.w - 1) / (w - 1);
float h_scale = (float)(im.h - 1) / (h - 1);
for(k = 0; k < im.c; ++k){
for(r = 0; r < im.h; ++r){
for(c = 0; c < w; ++c){
float val = 0;
if(c == w-1 || im.w == 1){
val = get_pixel(im, im.w-1, r, k);
} else {
float sx = c*w_scale;
int ix = (int) sx;
float dx = sx - ix;
val = (1 - dx) * get_pixel(im, ix, r, k) + dx * get_pixel(im, ix+1, r, k);
}
set_pixel(part, c, r, k, val);
}
}
}
for(k = 0; k < im.c; ++k){
for(r = 0; r < h; ++r){
float sy = r*h_scale;
int iy = (int) sy;
float dy = sy - iy;
for(c = 0; c < w; ++c){
float val = (1-dy) * get_pixel(part, c, iy, k);
set_pixel(resized, c, r, k, val);
}
if(r == h-1 || im.h == 1) continue;
for(c = 0; c < w; ++c){
float val = dy * get_pixel(part, c, iy+1, k);
add_pixel(resized, c, r, k, val);
}
}
}

free_image(part);
return resized;
}

int main(int argc, char **argv)
{
FILE *file;
int width, height, input_width, input_height;

DBG("in");

if(argc != 6)
{
DBG("input error, you should use this program like that: program xxxx.yuv width height.");
exit(-1);
}

width = atoi(argv[2]);
height = atoi(argv[3]);
input_width = atoi(argv[4]);
input_height = atoi(argv[5]);

DBG("scale to width %d, height %d.", width, height);

file = fopen(argv[1], "rb");
if(file == NULL)
{
DBG("fatal error, open file %s failure, please check the file status.", argv[1]);
exit(-1);
}

fseek(file, 0, SEEK_END);
int filelen = ftell(file);

DBG("file %s len %d byets.", argv[1], filelen);

unsigned char *p = malloc(filelen);
if(p == NULL)
{
DBG("malloc buffer failure for %s len %d.", argv[1], filelen);
exit(-1);
}

memset(p, 0x00, filelen);
fseek(file, 0, SEEK_SET);

if(fread(p, 1, filelen, file) != filelen)
{
DBG("read file failure, size wrong.");
exit(-1);
}

fclose(file);

dump_memory(p, 32);
dump_memory(p + filelen - 32, 32);

image orig_Y = make_image(input_width,input_height, 1);
image orig_U = make_image(input_width,input_height, 1);
image orig_V = make_image(input_width,input_height, 1);
memcpy(orig_Y.data, p + 0*input_width*input_height,input_width*input_height);
memcpy(orig_U.data, p + 1*input_width*input_height,input_width*input_height);
memcpy(orig_V.data, p + 2*input_width*input_height,input_width*input_height);

image outputy = resize_image(orig_Y, width,height);
image outputu = resize_image(orig_U, width,height);
image outputv = resize_image(orig_V, width,height);

file = fopen("./output.yuv", "wb+");
if(file == NULL)
{
DBG("fatal error, open output file failure, please check the file status.");
exit(-1);
}

unsigned char *o = malloc(width * height);
if(o == NULL)
{
DBG("malloc output buffer failure.");
exit(-1);
}

memset(o, 0x00, width * height);
memcpy(o, outputy.data, width * height);

filelen = width * height;
if(fwrite(o, 1, filelen, file) != filelen)
{
DBG("read file failure, size wrong.");
exit(-1);
}
memset(o, 0xb0, width * height);
memcpy(o, outputu.data, width * height);

if(fwrite(o, 1, filelen, file) != filelen)
{
DBG("read file failure, size wrong.");
exit(-1);
}

memset(o, 0xb0, width * height);
memcpy(o, outputv.data, width * height);

if(fwrite(o, 1, filelen, file) != filelen)
{
DBG("read file failure, size wrong.");
exit(-1);
}

fflush(file);
fsync(fileno(file));
fclose(file);

return 0;
}

拓展

ffmpeg是图像除了领域的瑞士军刀,总是在最关键的地方发挥关键的作用。上面我们用FFMPEG命令对图像的像素格式进行转换,那么FFMPEG支持哪些像素格式之间的转换呢?貌似HELP命令或者帮助文档中没有给出明显的信息,只能看代码了,根据关键词"YUV444P",我们在pixdesc.c中找到了AVPixFmtDescriptor这个结构体的定义,根据名字来看,我们找的是正确的:

验证darknet中前处理做图像缩放(双线性内插值法)scale的算法效果_python_10

验证darknet中前处理做图像缩放(双线性内插值法)scale的算法效果_#include_11

经过整理后,得到的像素格式结果如下,竟然有这么多。

.name = "yuv420p",
.name = "yuyv422",
.name = "yvyu422",
.name = "y210le",
.name = "y210be",
.name = "rgb24",
.name = "bgr24",
.name = "x2rgb10le",
.name = "x2rgb10be",
.name = "x2bgr10le",
.name = "x2bgr10be",
.name = "yuv422p",
.name = "yuv444p",
.name = "yuv410p",
.name = "yuv411p",
.name = "yuvj411p",
.name = "gray",
.name = "monow",
.name = "monob",
.name = "pal8",
.name = "yuvj420p",
.name = "yuvj422p",
.name = "yuvj444p",
.name = "xvmc",
.name = "uyvy422",
.name = "uyyvyy411",
.name = "bgr8",
.name = "bgr4",
.name = "bgr4_byte",
.name = "rgb8",
.name = "rgb4",
.name = "rgb4_byte",
.name = "nv12",
.name = "nv21",
.name = "argb",
.name = "rgba",
.name = "abgr",
.name = "bgra",
.name = "0rgb",
.name = "rgb0",
.name = "0bgr",
.name = "bgr0",
.name = "gray9be",
.name = "gray9le",
.name = "gray10be",
.name = "gray10le",
.name = "gray12be",
.name = "gray12le",
.name = "gray14be",
.name = "gray14le",
.name = "gray16be",
.name = "gray16le",
.name = "yuv440p",
.name = "yuvj440p",
.name = "yuv440p10le",
.name = "yuv440p10be",
.name = "yuv440p12le",
.name = "yuv440p12be",
.name = "yuva420p",
.name = "yuva422p",
.name = "yuva444p",
.name = "yuva420p9be",
.name = "yuva420p9le",
.name = "yuva422p9be",
.name = "yuva422p9le",
.name = "yuva444p9be",
.name = "yuva444p9le",
.name = "yuva420p10be",
.name = "yuva420p10le",
.name = "yuva422p10be",
.name = "yuva422p10le",
.name = "yuva444p10be",
.name = "yuva444p10le",
.name = "yuva420p16be",
.name = "yuva420p16le",
.name = "yuva422p16be",
.name = "yuva422p16le",
.name = "yuva444p16be",
.name = "yuva444p16le",
.name = "rgb48be",
.name = "rgb48le",
.name = "rgba64be",
.name = "rgba64le",
.name = "rgb565be",
.name = "rgb565le",
.name = "rgb555be",
.name = "rgb555le",
.name = "rgb444be",
.name = "rgb444le",
.name = "bgr48be",
.name = "bgr48le",
.name = "bgra64be",
.name = "bgra64le",
.name = "bgr565be",
.name = "bgr565le",
.name = "bgr555be",
.name = "bgr555le",
.name = "bgr444be",
.name = "bgr444le",
.name = "vaapi",
.name = "yuv420p9le",
.name = "yuv420p9be",
.name = "yuv420p10le",
.name = "yuv420p10be",
.name = "yuv420p12le",
.name = "yuv420p12be",
.name = "yuv420p14le",
.name = "yuv420p14be",
.name = "yuv420p16le",
.name = "yuv420p16be",
.name = "yuv422p9le",
.name = "yuv422p9be",
.name = "yuv422p10le",
.name = "yuv422p10be",
.name = "yuv422p12le",
.name = "yuv422p12be",
.name = "yuv422p14le",
.name = "yuv422p14be",
.name = "yuv422p16le",
.name = "yuv422p16be",
.name = "yuv444p16le",
.name = "yuv444p16be",
.name = "yuv444p10le",
.name = "yuv444p10be",
.name = "yuv444p9le",
.name = "yuv444p9be",
.name = "yuv444p12le",
.name = "yuv444p12be",
.name = "yuv444p14le",
.name = "yuv444p14be",
.name = "d3d11va_vld",
.name = "dxva2_vld",
.name = "ya8",
.name = "ya16le",
.name = "ya16be",
.name = "videotoolbox_vld",
.name = "gbrp",
.name = "gbrp9le",
.name = "gbrp9be",
.name = "gbrp10le",
.name = "gbrp10be",
.name = "gbrp12le",
.name = "gbrp12be",
.name = "gbrp14le",
.name = "gbrp14be",
.name = "gbrp16le",
.name = "gbrp16be",
.name = "gbrap",
.name = "gbrap16le",
.name = "gbrap16be",
.name = "vdpau",
.name = "xyz12le",
.name = "xyz12be",
.name = "bayer_bggr8",
.name = "bayer_bggr16le",
.name = "bayer_bggr16be",
.name = "bayer_rggb8",
.name = "bayer_rggb16le",
.name = "bayer_rggb16be",
.name = "bayer_gbrg8",
.name = "bayer_gbrg16le",
.name = "bayer_gbrg16be",
.name = "bayer_grbg8",
.name = "bayer_grbg16le",
.name = "bayer_grbg16be",
.name = "nv16",
.name = "nv20le",
.name = "nv20be",
.name = "qsv",
.name = "mediacodec",
.name = "mmal",
.name = "cuda",
.name = "ayuv64le",
.name = "ayuv64be",
.name = "p010le",
.name = "p010be",
.name = "p016le",
.name = "p016be",
.name = "gbrap12le",
.name = "gbrap12be",
.name = "gbrap10le",
.name = "gbrap10be",
.name = "d3d11",
.name = "gbrpf32be",
.name = "gbrpf32le",
.name = "gbrapf32be",
.name = "gbrapf32le",
.name = "drm_prime",
.name = "grayf32be",
.name = "grayf32le",
.name = "yuva422p12be",
.name = "yuva422p12le",
.name = "yuva444p12be",
.name = "yuva444p12le",
.name = "nv24",
.name = "nv42",
.name = "vulkan",
.name = "p210be",
.name = "p210le",
.name = "p410be",
.name = "p410le",
.name = "p216be",
.name = "p216le",
.name = "p416be",
.name = "p416le",

以ARGB为例:

ffmpeg -i 165823915.jpg -pix_fmt argb xuanwumen.rgb
ffplay -pix_fmt argb -f rawvideo -video_size 1920x1080 xuanwumen.rgb

验证darknet中前处理做图像缩放(双线性内插值法)scale的算法效果_python_12

NV12格式缩放

前面验证的都是测试YUV三planner数据长度相等的格式,下面验证一下NV12。

首先,将一张图像转为NV12格式:

ffmpeg -i e253d6bd7ac64bde9e7f1fb09dc908b2.png -vf scale=1920:1080 output.jpg
ffmpeg -i output.jpg -pix_fmt nv12 sousuo.nv12.yuv

原图如下:

验证darknet中前处理做图像缩放(双线性内插值法)scale的算法效果_2d_13

转为1920X1080的YUV后如下:

ffplay -pix_fmt nv12 -f rawvideo -video_size 1920x1080 ./sousuo.nv12.yuv

验证darknet中前处理做图像缩放(双线性内插值法)scale的算法效果_#include_14

运行NV12缩放程序,将1920X1080的NV12转换为1280X720的NV12

./a.out sousuo.nv12.yuv 1280 720 1920 1080
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stddef.h>
#include <stdint.h>
#include <fcntl.h>
#include <unistd.h>
#include <assert.h>
#include <sys/ioctl.h>

#define DBG(fmt, ...) do { printf("%s line %d, "fmt"\n", __func__, __LINE__, ##__VA_ARGS__); } while (0)

typedef struct image {
int w;
int h;
int c;
unsigned char *data;
} image;

static void dump_memory(uint8_t *buf, int32_t len)
{
int i;

printf("\n\rdump file memory:");
for (i = 0; i < len; i ++)
{
if ((i % 16) == 0)
{
printf("\n\r%p: ", buf + i);
}
printf("0x%02x ", buf[i]);
}

printf("\n\r");

return;
}

image make_empty_image(int w, int h, int c)
{
image out;
out.data = 0;
out.h = h;
out.w = w;
out.c = c;
return out;
}

image copy_image(image p)
{
image copy = p;
copy.data = (unsigned char*)calloc(p.h * p.w * p.c, sizeof(float));
memcpy(copy.data, p.data, p.h*p.w*p.c*sizeof(float));
return copy;
}

image make_image(int w, int h, int c)
{
image out = make_empty_image(w,h,c);
out.data = (unsigned char*)calloc(h * w * c, sizeof(char));
return out;
}

static void set_pixel(image m, int x, int y, int c, float val)
{
if (x < 0 || y < 0 || c < 0 || x >= m.w || y >= m.h || c >= m.c) return;

assert(x < m.w && y < m.h && c < m.c);
m.data[c*m.h*m.w + y*m.w + x] = val;
}

static float get_pixel(image m, int x, int y, int c)
{
assert(x < m.w && y < m.h && c < m.c);
return m.data[c*m.h*m.w + y*m.w + x];
}

void free_image(image m)
{
if(m.data){
free(m.data);
}
}

static void add_pixel(image m, int x, int y, int c, float val)
{
assert(x < m.w && y < m.h && c < m.c);
m.data[c*m.h*m.w + y*m.w + x] += val;
}

image resize_image(image im, int w, int h)
{
if (im.w == w && im.h == h) return copy_image(im);

image resized = make_image(w, h, im.c);
image part = make_image(w, im.h, im.c);
int r, c, k;
float w_scale = (float)(im.w - 1) / (w - 1);
float h_scale = (float)(im.h - 1) / (h - 1);
for(k = 0; k < im.c; ++k){
for(r = 0; r < im.h; ++r){
for(c = 0; c < w; ++c){
float val = 0;
if(c == w-1 || im.w == 1){
val = get_pixel(im, im.w-1, r, k);
} else {
float sx = c*w_scale;
int ix = (int) sx;
float dx = sx - ix;
val = (1 - dx) * get_pixel(im, ix, r, k) + dx * get_pixel(im, ix+1, r, k);
}
set_pixel(part, c, r, k, val);
}
}
}
for(k = 0; k < im.c; ++k){
for(r = 0; r < h; ++r){
float sy = r*h_scale;
int iy = (int) sy;
float dy = sy - iy;
for(c = 0; c < w; ++c){
float val = (1-dy) * get_pixel(part, c, iy, k);
set_pixel(resized, c, r, k, val);
}
if(r == h-1 || im.h == 1) continue;
for(c = 0; c < w; ++c){
float val = dy * get_pixel(part, c, iy+1, k);
add_pixel(resized, c, r, k, val);
}
}
}

free_image(part);
return resized;
}

int main(int argc, char **argv)
{
FILE *file;
int width, height, input_width, input_height;

DBG("in");

if(argc != 6)
{
DBG("input error, you should use this program like that: program xxxx.yuv width height.");
exit(-1);
}

width = atoi(argv[2]);
height = atoi(argv[3]);
input_width = atoi(argv[4]);
input_height = atoi(argv[5]);

DBG("scale to width %d, height %d.", width, height);

file = fopen(argv[1], "rb");
if(file == NULL)
{
DBG("fatal error, open file %s failure, please check the file status.", argv[1]);
exit(-1);
}

fseek(file, 0, SEEK_END);
int filelen = ftell(file);

DBG("file %s len %d byets.", argv[1], filelen);

unsigned char *p = malloc(filelen);
if(p == NULL)
{
DBG("malloc buffer failure for %s len %d.", argv[1], filelen);
exit(-1);
}

memset(p, 0x00, filelen);
fseek(file, 0, SEEK_SET);

if(fread(p, 1, filelen, file) != filelen)
{
DBG("read file failure, size wrong.");
exit(-1);
}

fclose(file);

dump_memory(p, 32);
dump_memory(p + filelen - 32, 32);

image orig_Y = make_image(input_width,input_height, 1);
image orig_U = make_image(input_width/2,input_height/2, 1);
image orig_V = make_image(input_width/2,input_height/2, 1);
memcpy(orig_Y.data, p + 0*input_width*input_height,input_width*input_height);
/*memcpy(orig_U.data, p + 1*input_width*input_height,input_width*input_height/4);*/
/*memcpy(orig_V.data, p + input_width*input_height *5/4,input_width*input_height/4);*/

int i;
int j = 0, k = 0;
for(i = input_width * input_height; i < input_width * input_height * 3 /2 ; i ++)
{
if(i % 2 == 0)
orig_V.data[j++] = p[i];
else
orig_U.data[k++] = p[i];
}

image outputy = resize_image(orig_Y, width,height);
image outputu = resize_image(orig_U, width/2,height/2);
image outputv = resize_image(orig_V, width/2,height/2);

int ksize = width * height / 2;
unsigned char *uv = malloc(ksize);

memset(uv, 0x00, ksize);
j = k = 0;
for(i = 0; i < ksize; i ++)
{
if(i % 2 == 0)
uv[i] = outputv.data[j++];
else
uv[i] = outputu.data[k++];
}


file = fopen("./output.yuv", "wb+");
if(file == NULL)
{
DBG("fatal error, open output file failure, please check the file status.");
exit(-1);
}

unsigned char *o = malloc(width * height);
if(o == NULL)
{
DBG("malloc output buffer failure.");
exit(-1);
}

memset(o, 0x00, width * height);
memcpy(o, outputy.data, width * height);

filelen = width * height;
if(fwrite(o, 1, filelen, file) != filelen)
{
DBG("read file failure, size wrong.");
exit(-1);
}

filelen = ksize;
memset(o, 0xb0, filelen);
memcpy(o, uv, filelen);

if(fwrite(o, 1, filelen, file) != filelen)
{
DBG("read file failure, size wrong.");
exit(-1);
}

#if 0
memset(o, 0xb0, filelen);
memcpy(o, outputv.data, filelen);

if(fwrite(o, 1, filelen, file) != filelen)
{
DBG("read file failure, size wrong.");
exit(-1);
}

#endif
fflush(file);
fsync(fileno(file));
fclose(file);

return 0;
}

 输出output.yuv,执行如下命令查看,缩放效果真的不错!

ffplay -pix_fmt nv12 -f rawvideo -video_size 1280x720 ./output.yuv

480/270->352/198

ffmpeg -i e253d6bd7ac64bde9e7f1fb09dc908b2.png -vf scale=480:270 output.jpg
ffmpeg -i output.jpg -pix_fmt nv12 sousuo.nv12.yuv
ffplay -pix_fmt nv12 -f rawvideo -video_size 480x270 ./sousuo.nv12.yuv
./a.out sousuo.nv12.yuv 352 198 480 270
ffplay -pix_fmt nv12 -f rawvideo -video_size 352x198 ./output.yuv

验证darknet中前处理做图像缩放(双线性内插值法)scale的算法效果_2d_15

352/198->480/270

ffmpeg -i e253d6bd7ac64bde9e7f1fb09dc908b2.png -vf scale=352:198 output.jpg
ffmpeg -i output.jpg -pix_fmt nv12 sousuo.nv12.yuv
ffplay -pix_fmt nv12 -f rawvideo -video_size 352x198 ./sousuo.nv12.yuv
./a.out sousuo.nv12.yuv 480 270 352 198
ffplay -pix_fmt nv12 -f rawvideo -video_size 480x270 ./output.yuv

验证darknet中前处理做图像缩放(双线性内插值法)scale的算法效果_算法_16

 在某款嵌入式平台上成功应用的scale算法

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stddef.h>
#include <stdint.h>
#include <fcntl.h>
#include <unistd.h>
#include <assert.h>
#include <sys/ioctl.h>

#define DBG(fmt, ...) do { printf("%s line %d, "fmt"\n", __func__, __LINE__, ##__VA_ARGS__); } while (0)

typedef struct image {
int w;
int h;
int c;
unsigned char *data;
} image;

static void dump_memory(uint8_t *buf, int32_t len)
{
int i;

printf("\n\rdump file memory:");
for (i = 0; i < len; i ++)
{
if ((i % 16) == 0)
{
printf("\n\r%p: ", buf + i);
}
printf("0x%02x ", buf[i]);
}

printf("\n\r");

return;
}

image make_empty_image(int w, int h, int c)
{
image out;
out.data = 0;
out.h = h;
out.w = w;
out.c = c;
return out;
}

image copy_image(image p)
{
image copy = p;
copy.data = (unsigned char*)calloc(p.h * p.w * p.c, sizeof(float));
memcpy(copy.data, p.data, p.h*p.w*p.c*sizeof(float));
return copy;
}

image make_image(int w, int h, int c)
{
image out = make_empty_image(w,h,c);
out.data = (unsigned char*)calloc(h * w * c, sizeof(char));
return out;
}

static void set_pixel(image m, int x, int y, int c, float val)
{
if (x < 0 || y < 0 || c < 0 || x >= m.w || y >= m.h || c >= m.c) return;

assert(x < m.w && y < m.h && c < m.c);
m.data[c*m.h*m.w + y*m.w + x] = val;
}

static float get_pixel(image m, int x, int y, int c)
{
assert(x < m.w && y < m.h && c < m.c);
return m.data[c*m.h*m.w + y*m.w + x];
}

void free_image(image m)
{
if(m.data){
free(m.data);
}
}

static void add_pixel(image m, int x, int y, int c, float val)
{
assert(x < m.w && y < m.h && c < m.c);
m.data[c*m.h*m.w + y*m.w + x] += val;
}

image resize_image(image im, int w, int h)
{
if (im.w == w && im.h == h) return copy_image(im);

image resized = make_image(w, h, im.c);
image part = make_image(w, im.h, im.c);
int r, c, k;
float w_scale = (float)(im.w - 1) / (w - 1);
float h_scale = (float)(im.h - 1) / (h - 1);
for(k = 0; k < im.c; ++k){
for(r = 0; r < im.h; ++r){
for(c = 0; c < w; ++c){
float val = 0;
if(c == w-1 || im.w == 1){
val = get_pixel(im, im.w-1, r, k);
} else {
float sx = c*w_scale;
int ix = (int) sx;
float dx = sx - ix;
val = (1 - dx) * get_pixel(im, ix, r, k) + dx * get_pixel(im, ix+1, r, k);
}
set_pixel(part, c, r, k, val);
}
}
}
for(k = 0; k < im.c; ++k){
for(r = 0; r < h; ++r){
float sy = r*h_scale;
int iy = (int) sy;
float dy = sy - iy;
for(c = 0; c < w; ++c){
float val = (1-dy) * get_pixel(part, c, iy, k);
set_pixel(resized, c, r, k, val);
}
if(r == h-1 || im.h == 1) continue;
for(c = 0; c < w; ++c){
float val = dy * get_pixel(part, c, iy+1, k);
add_pixel(resized, c, r, k, val);
}
}
}

free_image(part);
return resized;
}

int main(int argc, char **argv)
{
FILE *file;
int width, height, input_width, input_height;

DBG("in");

if(argc != 6)
{
DBG("input error, you should use this program like that: program xxxx.yuv width height.");
exit(-1);
}

width = atoi(argv[2]);
height = atoi(argv[3]);
input_width = atoi(argv[4]);
input_height = atoi(argv[5]);

DBG("scale to width %d, height %d.", width, height);

file = fopen(argv[1], "rb");
if(file == NULL)
{
DBG("fatal error, open file %s failure, please check the file status.", argv[1]);
exit(-1);
}

fseek(file, 0, SEEK_END);
int filelen = ftell(file);

DBG("file %s len %d byets.", argv[1], filelen);

unsigned char *p = malloc(filelen);
if(p == NULL)
{
DBG("malloc buffer failure for %s len %d.", argv[1], filelen);
exit(-1);
}

memset(p, 0x00, filelen);
fseek(file, 0, SEEK_SET);

if(fread(p, 1, filelen, file) != filelen)
{
DBG("read file failure, size wrong.");
exit(-1);
}

fclose(file);

dump_memory(p, 32);
dump_memory(p + filelen - 32, 32);

image orig_Y = make_image(input_width,input_height, 1);
image orig_U = make_image(input_width/2,input_height/2, 1);
image orig_V = make_image(input_width/2,input_height/2, 1);
memcpy(orig_Y.data, p + 0*input_width*input_height,input_width*input_height);
/*memcpy(orig_U.data, p + 1*input_width*input_height,input_width*input_height/4);*/
/*memcpy(orig_V.data, p + input_width*input_height *5/4,input_width*input_height/4);*/

int i;
int j = 0, k = 0;
for(i = input_width * input_height; i < input_width * input_height * 3 /2 ; i ++)
{
if(i % 2 == 0)
orig_U.data[j++] = p[i];
else
orig_V.data[k++] = p[i];
}

image outputy = resize_image(orig_Y, width,height);
image outputu = resize_image(orig_U, width/2,height/2);
image outputv = resize_image(orig_V, width/2,height/2);

int ksize = width * height / 2;
unsigned char *uv = malloc(ksize);

memset(uv, 0x00, ksize);
j = k = 0;
for(i = 0; i < ksize; i ++)
{
if(i % 2 == 0)
uv[i] = outputu.data[j++];
else
uv[i] = outputv.data[k++];
}


file = fopen("./output.yuv", "wb+");
if(file == NULL)
{
DBG("fatal error, open output file failure, please check the file status.");
exit(-1);
}

unsigned char *o = malloc(width * height);
if(o == NULL)
{
DBG("malloc output buffer failure.");
exit(-1);
}

memset(o, 0x00, width * height);
memcpy(o, outputy.data, width * height);

filelen = width * height;
if(fwrite(o, 1, filelen, file) != filelen)
{
DBG("read file failure, size wrong.");
exit(-1);
}

filelen = ksize;
memset(o, 0xb0, filelen);
memcpy(o, uv, filelen);

if(fwrite(o, 1, filelen, file) != filelen)
{
DBG("read file failure, size wrong.");
exit(-1);
}

#if 0
memset(o, 0xb0, filelen);
memcpy(o, outputv.data, filelen);

if(fwrite(o, 1, filelen, file) != filelen)
{
DBG("read file failure, size wrong.");
exit(-1);
}

#endif
fflush(file);
fsync(fileno(file));
fclose(file);

return 0;
}

基于G2D硬件加速的SUNXI平台缩放实现代码:

下面代码已经应用在人脸检测模型算法中。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stddef.h>
#include <stdint.h>
#include <fcntl.h>
#include <unistd.h>
#include <assert.h>
#include <memoryAdapter.h>
#include <ion_memmanager.h>
#include <plat_math.h>
#include <plat_log.h>
#include <utils/PIXEL_FORMAT_E_g2d_format_convert.h>
#include <linux/g2d_driver.h>
#include "hw_scale.h"

typedef struct
{
unsigned long *p_virtaddr_y;
unsigned long *p_virtaddr_uv;
unsigned long *p_physaddr_y;
unsigned long *p_physaddr_uv;
}ion_img_buff_t;

int g2d_mem_open(void)
{
return ion_memOpen();
}

int g2d_mem_close(void)
{
return ion_memClose();
}

static unsigned char* g2d_alloc_mem(unsigned int size)
{
IonAllocAttr alloc_attr;
memset(&alloc_attr, 0, sizeof(IonAllocAttr));

alloc_attr.mLen = size;
alloc_attr.mAlign = 0;
alloc_attr.mIonHeapType = IonHeapType_IOMMU;
alloc_attr.mbSupportCache = 0;
return ion_allocMem_extend(&alloc_attr);
}

static int g2d_free_mem(void *vir_ptr)
{
return ion_freeMem(vir_ptr);
}

static unsigned int g2d_getphyaddr_by_viraddr(void *vir_ptr)
{
return ion_getMemPhyAddr(vir_ptr);
}

static int g2d_flush_cache(void *vir_ptr, unsigned int size)
{
return ion_flushCache(vir_ptr, size);
}

static ion_img_buff_t src;
static ion_img_buff_t dst;
int scale_picture_nv12_to_nv12_byg2d(const unsigned char *srcpic, int src_width, int src_height, unsigned char *dstpic, int dst_width, int dst_height, int g2d_fd)
{
int ret = 0;
static int mem_inited = 0;
int size;
g2d_blt_h blit;
g2d_fmt_enh src_fmt, dst_fmt;

if(mem_inited == 0)
{
size = ALIGN(src_width, 16) * ALIGN(src_height, 16);
src.p_virtaddr_y = (void*)g2d_alloc_mem(size);
src.p_virtaddr_uv = (void*)g2d_alloc_mem(size/2);

if(!src.p_virtaddr_y || !src.p_virtaddr_uv )
{
aloge("fatal error, ion malloc for src image failure.");
goto EXIT;
}

src.p_physaddr_y = (void *)g2d_getphyaddr_by_viraddr(src.p_virtaddr_y);
src.p_physaddr_uv = (void *)g2d_getphyaddr_by_viraddr(src.p_virtaddr_uv);

size = ALIGN(dst_width, 16) * ALIGN(dst_height, 16);
dst.p_virtaddr_y = (void*)g2d_alloc_mem(size);
dst.p_virtaddr_uv = (void*)g2d_alloc_mem(size/2);

if(!dst.p_virtaddr_y || !dst.p_virtaddr_uv )
{
aloge("fatal error, ion malloc for dst image failure.");
goto EXIT;
}

dst.p_physaddr_y = (void *)g2d_getphyaddr_by_viraddr(dst.p_virtaddr_y);
dst.p_physaddr_uv = (void *)g2d_getphyaddr_by_viraddr(dst.p_virtaddr_uv);
mem_inited = 1;
}

if(!src.p_virtaddr_y || !src.p_virtaddr_uv || !dst.p_virtaddr_y || !dst.p_virtaddr_uv)
{
aloge("src or dst buffer addr corrupted?");
goto EXIT;
}

size = src_width*src_height;
memcpy(src.p_virtaddr_y, srcpic, size);
memcpy(src.p_virtaddr_uv, srcpic + size , size / 2);
g2d_flush_cache(src.p_virtaddr_y, size);
g2d_flush_cache(src.p_virtaddr_uv, size / 2);

// MM_PIXEL_FORMAT_YUV_SEMIPLANAR_420. nv12 format.
ret = convert_PIXEL_FORMAT_E_to_g2d_fmt_enh(MM_PIXEL_FORMAT_YUV_SEMIPLANAR_420 , &src_fmt);
if(ret != SUCCESS)
{
aloge("fatal error, convert pixel fmt from video to g2d failure.");
goto EXIT;
}

ret = convert_PIXEL_FORMAT_E_to_g2d_fmt_enh(MM_PIXEL_FORMAT_YUV_SEMIPLANAR_420, &dst_fmt);
if(ret != SUCCESS)
{
aloge("fatal error, convert pixel fmt from video to g2d failure.");
goto EXIT;
}

memset(&blit, 0, sizeof(g2d_blt_h));
blit.flag_h = G2D_BLT_NONE_H;

blit.src_image_h.format = src_fmt;
blit.src_image_h.laddr[0] = (unsigned long)src.p_physaddr_y;
blit.src_image_h.laddr[1] = (unsigned long)src.p_physaddr_uv;
blit.src_image_h.laddr[2] = (unsigned long)0;

blit.src_image_h.width = src_width;
blit.src_image_h.height = src_height;
blit.src_image_h.align[0] = 0;
blit.src_image_h.align[1] = 0;
blit.src_image_h.align[2] = 0;
blit.src_image_h.clip_rect.x = 0;
blit.src_image_h.clip_rect.y = 0;
blit.src_image_h.clip_rect.w = src_width;
blit.src_image_h.clip_rect.h = src_height;
blit.src_image_h.gamut = G2D_BT601;
blit.src_image_h.bpremul = 0;

blit.src_image_h.mode = G2D_PIXEL_ALPHA;
blit.src_image_h.fd = -1;
blit.src_image_h.use_phy_addr = 1;

blit.dst_image_h.format = dst_fmt;
blit.dst_image_h.laddr[0] = (unsigned long)dst.p_physaddr_y;
blit.dst_image_h.laddr[1] = (unsigned long)dst.p_physaddr_uv;
blit.dst_image_h.laddr[2] = (unsigned long)0;

blit.dst_image_h.width = dst_width;
blit.dst_image_h.height = dst_height;
blit.dst_image_h.align[0] = 0;
blit.dst_image_h.align[1] = 0;
blit.dst_image_h.align[2] = 0;
blit.dst_image_h.clip_rect.x = 0;
blit.dst_image_h.clip_rect.y = 0;
blit.dst_image_h.clip_rect.w = dst_width;
blit.dst_image_h.clip_rect.h = dst_height;
blit.dst_image_h.gamut = G2D_BT601;
blit.dst_image_h.bpremul = 0;

blit.dst_image_h.mode = G2D_PIXEL_ALPHA;
blit.dst_image_h.fd = -1;
blit.dst_image_h.use_phy_addr = 1;

ret = ioctl(g2d_fd, G2D_CMD_BITBLT_H, (unsigned long)&blit);
if(ret < 0)
{
aloge("fatal error! bit-block(image) transfer failed[%d]", ret);
goto EXIT;
}

size = dst_width*dst_height;

g2d_flush_cache(dst.p_virtaddr_y, size);
g2d_flush_cache(dst.p_virtaddr_uv, size / 2);

memcpy(dstpic, dst.p_virtaddr_y, size);
memcpy(dstpic + size, dst.p_virtaddr_uv, size / 2);

return 0;

EXIT:
if(src.p_virtaddr_y)
{
g2d_free_mem(src.p_virtaddr_y);
}
if(src.p_virtaddr_uv)
{
g2d_free_mem(src.p_virtaddr_uv);
}
if(dst.p_virtaddr_y)
{
g2d_free_mem(dst.p_virtaddr_y);
}
if(dst.p_virtaddr_uv)
{
g2d_free_mem(dst.p_virtaddr_uv);
}

memset(&src, 0x00, sizeof(src));
memset(&dst, 0x00, sizeof(dst));

return -1;
}

// dummy function,nothing need todo.
void scale_create_buffer(void)
{
return;
}

void scale_destory_buffer(void)
{
if(src.p_virtaddr_y)
{
g2d_free_mem(src.p_virtaddr_y);
}
if(src.p_virtaddr_uv)
{
g2d_free_mem(src.p_virtaddr_uv);
}
if(dst.p_virtaddr_y)
{
g2d_free_mem(dst.p_virtaddr_y);
}
if(dst.p_virtaddr_uv)
{
g2d_free_mem(dst.p_virtaddr_uv);
}

memset(&src, 0x00, sizeof(src));
memset(&dst, 0x00, sizeof(dst));
return;
}

名词释义:

像素格式中看到很多格式后面紧跟着字母P,SP,它们是什么意思呢?P代表planer,也就是平面,SP就是Semi Planer,翻译过来就是半平面。具体代表什么呢?

以YUV444P为例,P的意思是Y,U,V三个元素个占用一个平面,也就是三个平面,几何布局和内存中布局就是如下这个样子的:

验证darknet中前处理做图像缩放(双线性内插值法)scale的算法效果_#include_17

而SP代表的半平面,顾名思义,就是平面不到3个,但也不能是1个,1个,因为YUV毕竟存在不同元,所以实际上就是2个平面,Y一个,UV合起来站占用一个.

​YUV420SP​​​格式的图像阵列,首先是所有​​Y值​​​,然后是​​UV​​​或者​​VU​​​交替存储,​​NV12和NV21属于YUV420SP​​​格式,是一种​​two-plane模式​​​,即​​Y和UV分为两个plane​​​,但是​​UV(CbCr)​​​为交错存储,而不是分为三个平面,我们常用的NV12或者NV21都属于​​YUV420SP。​

布局如下图所示:

验证darknet中前处理做图像缩放(双线性内插值法)scale的算法效果_2d_18

验证darknet中前处理做图像缩放(双线性内插值法)scale的算法效果_算法_19

我们最常见的YUV420P和YUV420SP都是基于4:2:0采样的,所以如果图片的宽为width,高为heigth,在内存中占的空间为width * height * 3 / 2。

​YUV420P​​​又叫​​plane平面模式​​​,​​Y , U , V​​​分别在不同平面,也就是有三个平面,它是​​YUV标准格式4:2:0​​​,主要分为:​​YU12和YV12​​, YUV420P其中前width * height的空间存放Y分量,接着width * height / 4存放U或者V分量,最后width * height / 4存放V或者U分量,根据UV顺序的不同,YUV420P又可以分为YV12和YU12

验证darknet中前处理做图像缩放(双线性内插值法)scale的算法效果_#include_20

神经网络的输入一般是RGB24格式,而非ARGB格式,这点要注意。

参考:

​​图解YU12、I420、YV12、NV12、NV21、YUV420P、YUV420SP、YUV422P、YUV444P的区别_handy周的博客-CSDN博客_yv12​​


结束!

标签:name,缩放,插值法,image,height,int,双线性,file,width
From: https://blog.51cto.com/u_15899439/5911843

相关文章