前言
在计算机视觉领域,人脸五官定位是一个重要的任务。本文将介绍如何使用 Unity Sentis 和 Compute Shader,结合 det_10g.onnx
模型,实现高效的人脸五官定位。我们将详细讲解每一步骤,并提供完整的代码示例。
模型分析
输入值:
模型的输入是我这边选择的是1x3x640x640;
输出值:步长8, 16, 32 的三组数据
(448,451,454):这是步长为8的一组数据,448(12800x1=>1x80x80x2 ),步长为8,高宽640,640/8=80,每行80个预测框,共80行。通过insightface的源码可以看到,num_anchors = 2,每个位置的目标框是两组,正常来说是黑白图两种,既然是同一个位置,那么可以合并一起,意思就是有2张图 ,每张图大小是80x80,有这么多分值。
451:bboxs: 1x8x80x80 每一个分数对应的四个点(x1,y1,x2,y2)*注意这个点是距离原点的相对值,还是需要计算的,这里1x8 前面1~4 是一个矩形框的点,后面的4~8是另一张图的矩形框坐标点,就是黑白图。
454:kps:1x20x80x80 每一个分数对应的五官坐标点(x,y)*注意这个点是距离原点的相对值,还是需要计算的,这里1~10 是一组坐标点,另外的10~20是另外一张图的一组坐标点,分开计算就行。
(471,474,477):这是步长为16的一组数据,与上同理。
(494,497,500):这是步长为32的一组数据,与上同理。
代码示例(待优化):
using System;
using System.Collections.Generic;
using Unity.Mathematics;
using Unity.Sentis;
using UnityEngine;
using System.Linq;
public class Retinaface :MonoBehaviour
{
public ModelAsset modelAsset;
public Model model;
private IWorker worker;
private GPUComputeBackend gpu;
public int textureWidth=640, textureHeight =640;
private int[] feat_stride_fpn = new int[] {8, 16, 32};
public Dictionary<(int,int,int),FunctionalTensor> center_cache = new Dictionary<(int, int, int), FunctionalTensor>() ;
private int _num_anchors = 2;
private FunctionalTensor anchor_centers;
private float det_scale = 2.5f;
public ComputeShader postprocess1;
private RenderTexture scoresRT;
private RenderTexture boxesRT;
private RenderTexture kpssRT1;
private RenderTexture kpssRT2;
private RenderTexture kpssRT3;
private RenderTexture kpssRT4;
private RenderTexture kpssRT5;
private ComputeBuffer post1;
public ComputeShader postprocess2;
private ComputeBuffer post2;
private ComputeBuffer counter;
private void Start()
{
InitBuffer();
model = ModelLoader.Load(modelAsset);
gpu = new GPUComputeBackend();
var model2 = Functional.Compile(input =>
{
List<FunctionalTensor> scores_list = new List<FunctionalTensor>();
List<FunctionalTensor> bboxes_list = new List<FunctionalTensor>();
List<FunctionalTensor> kpss_list = new List<FunctionalTensor>();
var outputs = model.Forward(input);
//遍历不同步长
for (int i = 0; i < feat_stride_fpn.Length; i++)
{
var scores = outputs[i*3];
var bbox_preds = outputs[i * 3+1]* feat_stride_fpn[i];
var kps_preds = outputs[i*3+2] * feat_stride_fpn[i];
int height = 640 / feat_stride_fpn[i];
int width = 640 / feat_stride_fpn[i];
var key = (height, width, feat_stride_fpn[i]);
if (center_cache.ContainsKey(key) )
{
anchor_centers = center_cache[key];
}
else
{
//构建坐标系
var range_X = Functional.ARange(0, height);
var range_640_x = range_X.Unsqueeze(-1).BroadcastTo(new[] {height});
var range_640_y = range_640_x.Transpose(0, 1);
//(n,n,2)
anchor_centers =Functional.Stack(new[] {range_640_y, range_640_x}, 2);
//(n*n,2)
anchor_centers = (anchor_centers * feat_stride_fpn[i]).Reshape(new[] {-1, 2});
//(n*n*2,2) (12800,2)(3200,2)(800,2)
anchor_centers = Functional.Concat(new []{anchor_centers,anchor_centers},1).Reshape(new[] {-1, 2});
if (center_cache.Count<100 )
{
center_cache[key] = anchor_centers;
}
}
//(bbox (左上x距离中心距离,左上y距离中心距离,右下x距离中心距离,右下y距离中心距离))
//(n,4) (左上坐标x1,y1,右下坐标x2,y2)
var bboxes = distance2bbox(anchor_centers, bbox_preds);
scores_list.Add(scores);
bboxes_list.Add(bboxes);
//shape (n,10) (12800,10)
var kpss = distance2kps(anchor_centers, kps_preds);
kpss_list.Add(kpss);
}
//(n,1)
var scores_vstack = Functional.Concat(scores_list.ToArray(), 0);
//(n,4)
var boxes_vstack = Functional.Concat(bboxes_list.ToArray(), 0)/det_scale;
//(n,10)
var kpss_vstack = Functional.Concat(kpss_list.ToArray(), 0)/det_scale;
//非极大值抑制,不知道为什么没效果
/*var indices = Functional.NMS(boxes_vstack, scores_vstack.Transpose(0, 1), 0.5f);
var output_scores = Functional.Gather(scores_vstack ,0,indices);
var output_boxes = Functional.Gather(boxes_vstack,0,indices);
var output_kpss = Functional.Gather(kpss_vstack,0,indices); */
return (scores_vstack,boxes_vstack, kpss_vstack );
},
InputDef.FromModel(model)[0]
);
worker = WorkerFactory.CreateWorker(BackendType.GPUCompute, model2);
Detect(t2d);
}
private void Update()
{
Detect(t2d);
}
void InitBuffer()
{
scoresRT = new RenderTexture(210, 80, 0);
boxesRT = new RenderTexture(210, 80, 0, RenderTextureFormat.ARGBFloat);
kpssRT1 = new RenderTexture(210, 80, 0,RenderTextureFormat.RGFloat);
kpssRT2 = new RenderTexture(210, 80, 0,RenderTextureFormat.RGFloat);
kpssRT3 = new RenderTexture(210, 80, 0,RenderTextureFormat.RGFloat);
kpssRT4 = new RenderTexture(210, 80, 0,RenderTextureFormat.RGFloat);
kpssRT5 = new RenderTexture(210, 80, 0,RenderTextureFormat.RGFloat);
post1 = new ComputeBuffer(512, 4*15);
post2 = new ComputeBuffer(512, 4*15, ComputeBufferType.Append);
counter = new ComputeBuffer(1, sizeof(uint), ComputeBufferType.Counter);
_material = new Material(_visualizer);
_drawArgs = new ComputeBuffer(4, sizeof(uint),
ComputeBufferType.IndirectArguments);
_drawArgs.SetData(new int [] {6, 0, 0, 0});
}
FunctionalTensor distance2bbox(FunctionalTensor points,FunctionalTensor distance)
{
FunctionalTensor x1 = points[.., 0] - distance[.., 0];
FunctionalTensor y1 = points[.., 1] - distance[.., 1];
FunctionalTensor x2 = points[.., 0] + distance[.., 2];
FunctionalTensor y2 = points[.., 1] + distance[.., 3];
return Functional.Stack(new[] {x1, y1, x2, y2}, -1);
}
FunctionalTensor distance2kps(FunctionalTensor points, FunctionalTensor distance)
{
List<FunctionalTensor> preds = new List<FunctionalTensor>();
int[] range = new[] {0, 2, 4, 6, 8};
foreach (var i in range)
{
FunctionalTensor px = points[.., i % 2] + distance[.., i];
FunctionalTensor py = points[.., i % 2 + 1] + distance[.., i + 1];
preds.Add(px);
preds.Add(py);
}
return Functional.Stack(preds.ToArray(), -1);
}
private float[] tempBox = new float[15];
public Material testMat;
void Detect(Texture source)
{
using (var input = TextureConverter.ToTensor(source, 640, 640, 3))
{
worker.Execute(input);
}
using var scores = worker.PeekOutput("output_0") as TensorFloat;
using var boxes = worker.PeekOutput("output_1") as TensorFloat;
using var kpss = worker.PeekOutput("output_2") as TensorFloat;
scores.Reshape( new TensorShape(1,210,80,1));
boxes.Reshape( new TensorShape(1,210,80,4));
kpss.Reshape( new TensorShape(1,210,80,10));
// kpss.Reshape( new TensorShape(1,210,400,2));
TensorFloat kpss_1 = TensorFloat.AllocNoData(new TensorShape(1,210,80,2));
TensorFloat kpss_2 = TensorFloat.AllocNoData(new TensorShape(1,210,80,2));
TensorFloat kpss_3 = TensorFloat.AllocNoData(new TensorShape(1,210,80,2));
TensorFloat kpss_4 = TensorFloat.AllocNoData(new TensorShape(1,210,80,2));
TensorFloat kpss_5 = TensorFloat.AllocNoData(new TensorShape(1,210,80,2));
gpu.Slice(kpss,kpss_1,new [] {0},new [] {3},new [] {1});
gpu.Slice(kpss,kpss_2,new [] {2},new [] {3},new [] {1});
gpu.Slice(kpss,kpss_3,new [] {4},new [] {3},new [] {1});
gpu.Slice(kpss,kpss_4,new [] {6},new [] {3},new [] {1});
gpu.Slice(kpss,kpss_5,new [] {8},new [] {3},new [] {1});
using TensorFloat tagetT = TensorFloat.AllocNoData(new TensorShape(1, 1, 80,210));
gpu.Transpose(scores,tagetT,new int[] {0, 3, 1, 2});
using TensorFloat tagetT1 = TensorFloat.AllocNoData(new TensorShape(1, 4, 80,210));
gpu.Transpose(boxes,tagetT1,new int[] {0, 3, 1, 2});
using TensorFloat tagetT2 = TensorFloat.AllocNoData(new TensorShape(1, 2, 80,210));
gpu.Transpose(kpss_1,tagetT2,new int[] {0, 3, 1, 2});
using TensorFloat tagetT3 = TensorFloat.AllocNoData(new TensorShape(1, 2, 80,210));
gpu.Transpose(kpss_2,tagetT3,new int[] {0, 3, 1, 2});
using TensorFloat tagetT4 = TensorFloat.AllocNoData(new TensorShape(1, 2, 80,210));
gpu.Transpose(kpss_3,tagetT4,new int[] {0, 3, 1, 2});
using TensorFloat tagetT5 = TensorFloat.AllocNoData(new TensorShape(1, 2, 80,210));
gpu.Transpose(kpss_4,tagetT5,new int[] {0, 3, 1, 2});
using TensorFloat tagetT6 = TensorFloat.AllocNoData(new TensorShape(1, 2, 80,210));
gpu.Transpose(kpss_5,tagetT6,new int[] {0, 3, 1, 2});
scoresRT = TextureConverter.ToTexture(tagetT, 210, 80, 1);
boxesRT = TextureConverter.ToTexture(tagetT1 , 210, 80, 4);
kpssRT1 = TextureConverter.ToTexture(tagetT2 , 210, 80, 2);
kpssRT2 = TextureConverter.ToTexture(tagetT3 , 210, 80, 2);
kpssRT3 = TextureConverter.ToTexture(tagetT4 , 210, 80, 2);
kpssRT4 = TextureConverter.ToTexture(tagetT5 , 210, 80, 2);
kpssRT5 = TextureConverter.ToTexture(tagetT6 , 210, 80, 2);
post2.SetCounterValue(0);
counter.SetCounterValue(0);
postprocess1.SetTexture(0, "Scores", scoresRT);
postprocess1.SetTexture(0, "Boxes", boxesRT);
postprocess1.SetTexture(0, "kpss_1", kpssRT1);
postprocess1.SetTexture(0, "kpss_2", kpssRT2);
postprocess1.SetTexture(0, "kpss_3", kpssRT3);
postprocess1.SetTexture(0, "kpss_4", kpssRT4);
postprocess1.SetTexture(0, "kpss_5", kpssRT5);
postprocess1.SetInts("InputSize", 210,80);
postprocess1.SetFloat("Threshold", 0.3f);
postprocess1.SetBuffer(0, "Output", post1);
postprocess1.SetBuffer(0, "OutputCount", counter);
postprocess1.Dispatch (0, (boxesRT.width+14 )/16,boxesRT.height/4,1);
postprocess2.SetFloat ("Threshold", 0.5f);
postprocess2.SetBuffer(0, "Input", post1);
postprocess2.SetBuffer(0, "InputCount", counter);
postprocess2.SetBuffer(0, "Output", post2);
postprocess2.Dispatch (0, 1, 1, 1);
post2.GetData(tempBox);
testMat.SetTexture("_MainTex",t2d);
testMat.SetVector("_leftPos",new Vector2(tempBox[0]/256, 1-tempBox[1]/256));
testMat.SetVector("_rightPos",new Vector2(tempBox[2]/256, 1-tempBox[3]/256));
testMat.SetVector("_rightEye",new Vector2(tempBox[5]/256, 1-tempBox[6]/256));
testMat.SetVector("_leftEye",new Vector2(tempBox[7]/256, 1-tempBox[8]/256));
testMat.SetVector("_nose",new Vector2(tempBox[9]/256, 1-tempBox[10]/256));
testMat.SetVector("_rightMouse",new Vector2(tempBox[11]/256, 1-tempBox[12]/256));
testMat.SetVector("_leftMouse",new Vector2(tempBox[13]/256, 1-tempBox[14]/256));
}
private void OnDestroy()
{
gpu.Dispose();
worker.Dispose();
}
}
标签:Sentis,Compute,210,onnx,private,TensorFloat,kpss,new,80
From: https://blog.csdn.net/m0_55632444/article/details/139326330