Delta E 是评估色彩准确度的重要测量指标。摄影师、影片编辑和平面设计师等创意专业人士都应重视这项标准,因其是选择专业级显示器的重要考虑因素。
常见的找色算法都是基于颜色RGB上的数值差,这种方法虽然快捷,但是和人眼视觉上的色彩并不相同。这里采用Delta E的评估标准找色更符合人眼的直观感觉。
上文使用CPU计算,采用了一些优化方法但是都不尽如人意,这里使用cuda加速提高这个算法的可用度。
//计算颜色之间的Delta E
//<= 1.0:人眼无法感知差异
//1 - 2:仔细观察可以感知差异
//2 - 10:随意一看便可以感知差异
//11 - 49:色彩的相似程度大于相反程度
//100:色彩完全失真
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <cmath>
#include <ctime>
cudaError_t addWithCuda(int *c, const int *a, const int *b, unsigned int size);
struct Color_BGR
{
int B, G, R;
};
struct Color_Lab
{
float L, a, b;
};
Color_Lab BGR2Lab(Color_BGR x)
{
#define gamma(x) (((x) > 0.04045) ? std::pow(((x)+0.055f) / 1.055f, 2.4f) : ((x) / 12.92));
const float param_13 = 1.0f / 3.0f;
const float param_16116 = 16.0f / 116.0f;
const float Xn = 0.950456f;
const float Yn = 1.0f;
const float Zn = 1.088754f;
float RR = gamma(x.R / 255.0);
float GG = gamma(x.G / 255.0);
float BB = gamma(x.B / 255.0);
float X, Y, Z, fX, fY, fZ;
X = 0.4124564f * RR + 0.3575761f * GG + 0.1804375f * BB;
Y = 0.2126729f * RR + 0.7151522f * GG + 0.0721750f * BB;
Z = 0.0193339f * RR + 0.1191920f * GG + 0.9503041f * BB;
X /= (Xn);
Y /= (Yn);
Z /= (Zn);
if (Y > 0.008856f)
fY = std::pow(Y, param_13);
else
fY = 7.787f * Y + param_16116;
if (X > 0.008856f)
fX = std::pow(X, param_13);
else
fX = 7.787f * X + param_16116;
if (Z > 0.008856)
fZ = std::pow(Z, param_13);
else
fZ = 7.787f * Z + param_16116;
float L, a, b;
L = 116.0f * fY - 16.0f;
L = L > 0.0f ? L : 0.0f;
a = 500.0f * (fX - fY);
b = 200.0f * (fY - fZ);
return { L,a,b };
}
cudaError_t FindColorCuda(Color_BGR *src, float *ret,Color_Lab target,unsigned int size);
__global__ void FindColorCudaKernel(Color_BGR *src, float* ret, Color_Lab target)
{
int i = blockIdx.x * 256 + threadIdx.x;
#define gamma(x) (((x) > 0.04045) ? pow(((x)+0.055f) / 1.055f, 2.4f) : ((x) / 12.92));
const float param_13 = 1.0f / 3.0f;
const float param_16116 = 16.0f / 116.0f;
const float Xn = 0.950456f;
const float Yn = 1.0f;
const float Zn = 1.088754f;
float RR = gamma(src[i].R / 255.0);
float GG = gamma(src[i].G / 255.0);
float BB = gamma(src[i].B / 255.0);
float X, Y, Z, fX, fY, fZ;
X = 0.4124564f * RR + 0.3575761f * GG + 0.1804375f * BB;
Y = 0.2126729f * RR + 0.7151522f * GG + 0.0721750f * BB;
Z = 0.0193339f * RR + 0.1191920f * GG + 0.9503041f * BB;
X /= (Xn);
Y /= (Yn);
Z /= (Zn);
if (Y > 0.008856f)
fY = pow(Y, param_13);
else
fY = 7.787f * Y + param_16116;
if (X > 0.008856f)
fX = pow(X, param_13);
else
fX = 7.787f * X + param_16116;
if (Z > 0.008856)
fZ = pow(Z, param_13);
else
fZ = 7.787f * Z + param_16116;
float L, a, b;
L = 116.0f * fY - 16.0f;
L = L > 0.0f ? L : 0.0f;
a = 500.0f * (fX - fY);
b = 200.0f * (fY - fZ);
ret[i] = sqrt((L - target.L) * (L - target.L) + (a - target.a) * (a - target.a) + (b - target.b) * (b - target.b));
}
Color_BGR src_mat[1024 * 1024];
float ret_mat[1024 * 1024];
int main()
{
for (int i = 0; i < 1024 * 1024; i++)
{
src_mat[i] = { std::rand() % 256,std::rand() % 256, std::rand() % 256 };
}
//Pre Run for Best Speed
cudaError_t cudaStatus = FindColorCuda(src_mat, ret_mat, BGR2Lab({ 190,35,41 }), 1024 * 1024);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "FindColorCuda failed!");
return 1;
}
int st = clock();
// Add vectors in parallel.
cudaStatus = FindColorCuda(src_mat, ret_mat, BGR2Lab({190,35,41}), 1024 * 1024);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "FindColorCuda failed!");
return 1;
}
printf("Cost: %d\n", clock() - st);
int count = 0;
for (int i = 0; i < 1024*1024 ; i++)
{
if (ret_mat[i] < 2)
count++;
}
printf("%d", count);
// cudaDeviceReset must be called before exiting in order for profiling and
// tracing tools such as Nsight and Visual Profiler to show complete traces.
cudaStatus = cudaDeviceReset();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaDeviceReset failed!");
return 1;
}
return 0;
}
//Helper
cudaError_t FindColorCuda(Color_BGR* src, float* ret, Color_Lab target, unsigned int size)
{
Color_BGR* dev_src = nullptr;
float* dev_ret = nullptr;
cudaError cudaStatus;
// Choose which GPU to run on, change this on a multi-GPU system.
cudaStatus = cudaSetDevice(0);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaSetDevice failed! Do you have a CUDA-capable GPU installed?");
goto Error;
}
// Allocate GPU buffers for three vectors (two input, one output) .
cudaStatus = cudaMalloc((void**)&dev_src, size * sizeof(Color_BGR));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed!");
goto Error;
}
cudaStatus = cudaMalloc((void**)&dev_ret, size * sizeof(float));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed!");
goto Error;
}
// Copy input vectors from host memory to GPU buffers.
cudaStatus = cudaMemcpy(dev_src, src, size * sizeof(Color_BGR), cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMemcpy failed!");
goto Error;
}
FindColorCudaKernel <<<size/256, 256 >>> (dev_src,dev_ret,target);
// Check for any errors launching the kernel
cudaStatus = cudaGetLastError();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "FindColorCuda launch failed: %s\n", cudaGetErrorString(cudaStatus));
goto Error;
}
// cudaDeviceSynchronize waits for the kernel to finish, and returns
// any errors encountered during the launch.
cudaStatus = cudaDeviceSynchronize();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching addKernel!\n", cudaStatus);
goto Error;
}
cudaStatus = cudaMemcpy( ret, dev_ret, size * sizeof(float), cudaMemcpyDeviceToHost);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMemcpy failed!");
goto Error;
}
Error:
cudaFree(dev_ret);
cudaFree(dev_src);
return cudaStatus;
}
在4060 LapTop 上取得 8ms(1024*1024)的成绩
标签:src,CIE,1024,1976,找色,float,cudaStatus,param,ret From: https://www.cnblogs.com/Icys/p/DeltaECuda.html