首页 > 其他分享 >CPU指令集——bayer抽取r、g、b三通道(含镜像)-宽度为16或32整数倍版本

CPU指令集——bayer抽取r、g、b三通道(含镜像)-宽度为16或32整数倍版本

时间:2024-08-01 13:29:00浏览次数:13  
标签:__ index shuffle 16 mm 32 unsigned bayer m128i

#include <intrin.h>    //for sse
#include <string.h>    //for memcpy

enum BayerFormat
{
    bayerRG,
    bayerGR,
    bayerBG,
    bayerGB
};

enum Mirror
{
    mirrorNo,        //不镜像
    mirrorTB,        //上下镜像
    mirrorLR,        //左右镜像
    mirrorAll        //全镜像
};

//使用要求:宽度为32或16的整数倍,高度为2的整数倍
int bayer2rgb_CPU(const unsigned char* pBayer, int nWidth, int nHeight, BayerFormat nBayerFormat, Mirror nMirror, unsigned char* pR, unsigned char* pG, unsigned char* pB)
{
    if (nWidth%32==0 && nHeight%2==0)
    {
        __m256i shuffle_oe = _mm256_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15, 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15);
        __m128i shuffle_reserseOrder = _mm_setr_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);    //用于左右镜像
        int index = 0;

        for (int row2 = 0; row2 < nHeight / 2; row2++)
        {
            for (int col32 = 0; col32 < nWidth / 32; col32++)
            {
                __m256i line1 = _mm256_load_si256((__m256i*)(pBayer + nWidth*row2 * 2) + col32);
                __m256i line2 = _mm256_load_si256((__m256i*)(pBayer + nWidth*(row2 * 2 + 1)) + col32);

                __m256i line1_128oe = _mm256_shuffle_epi8(line1, shuffle_oe);//前16字节与后16字节是分开处理的,得到:前16字节的奇数位元素A、前16字节的偶数位元素B、后16字节的奇数位元素C、后16字节的偶数位元素D
                __m256i line2_128oe = _mm256_shuffle_epi8(line2, shuffle_oe);
                __m256i line1_oe = _mm256_permute4x64_epi64(line1_128oe, 0b11011000);//将ABCD重排,得到ACBD,即32字节里所有奇数位元素E、所有偶数位元素F
                __m256i line2_oe = _mm256_permute4x64_epi64(line2_128oe, 0b11011000);

                __m128i line11 = _mm256_extracti128_si256(line1_oe, 0);        //得到EF中的E
                __m128i line12 = _mm256_extracti128_si256(line1_oe, 1);        //得到EF中的F
                __m128i line21 = _mm256_extracti128_si256(line2_oe, 0);
                __m128i line22 = _mm256_extracti128_si256(line2_oe, 1);

                switch (nMirror)
                {
                case mirrorNo:
                    index = nWidth / 32 * row2 + col32;                        //不镜像
                    break;
                case mirrorTB:
                    index = nWidth / 32 * (nHeight / 2 - 1 - row2) + col32;    //上下镜像
                    break;
                case mirrorLR:
                    index = nWidth / 32 * row2 + (nWidth / 32 - 1 - col32);    //左右镜像
                    line11 = _mm_shuffle_epi8(line11, shuffle_reserseOrder);
                    line12 = _mm_shuffle_epi8(line12, shuffle_reserseOrder);
                    line21 = _mm_shuffle_epi8(line21, shuffle_reserseOrder);
                    line22 = _mm_shuffle_epi8(line22, shuffle_reserseOrder);
                    break;
                case mirrorAll:
                    index = nWidth / 32 * (nHeight / 2 - 1 - row2) + (nWidth / 32 - 1 - col32);
                    line11 = _mm_shuffle_epi8(line11, shuffle_reserseOrder);
                    line12 = _mm_shuffle_epi8(line12, shuffle_reserseOrder);
                    line21 = _mm_shuffle_epi8(line21, shuffle_reserseOrder);
                    line22 = _mm_shuffle_epi8(line22, shuffle_reserseOrder);
                    break;
                default:
                    return -1;
                }

                switch (nBayerFormat)
                {
                case bayerRG:
                    _mm_storeu_si128((__m128i*)pR + index, line11);
                    _mm_storeu_si128((__m128i*)pB + index, line22);
                    _mm_storeu_si128((__m128i*)pG + index, _mm_avg_epu8(line12, line21));//对g通道求均值
                    break;
                case bayerGR:
                    _mm_storeu_si128((__m128i*)pR + index, line12);
                    _mm_storeu_si128((__m128i*)pB + index, line21);
                    _mm_storeu_si128((__m128i*)pG + index, _mm_avg_epu8(line11, line22));//对g通道求均值
                    break;
                case bayerBG:
                    _mm_storeu_si128((__m128i*)pR + index, line22);
                    _mm_storeu_si128((__m128i*)pB + index, line11);
                    _mm_storeu_si128((__m128i*)pG + index, _mm_avg_epu8(line12, line21));//对g通道求均值
                    break;
                case bayerGB:
                    _mm_storeu_si128((__m128i*)pR + index, line21);
                    _mm_storeu_si128((__m128i*)pB + index, line12);
                    _mm_storeu_si128((__m128i*)pG + index, _mm_avg_epu8(line11, line22));//对g通道求均值
                    break;
                default:
                    return -1;
                }
            }            
        }
        return 0;
    }    
    else if (nWidth % 16 == 0 && nHeight % 2 == 0)  //宽度为16的整数倍
    {
        __m256i shuffle_oe = _mm256_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15, 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15);
        __m128i shuffle_reserseOrder = _mm_setr_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);    //用于左右镜像
        int index = 0;
        int add8 = 0;    //用于处理每行的头或尾8bit
        for (int row2 = 0; row2 < nHeight / 2; row2++)
        {
            for (int col16 = 0; col16 < (nWidth+16) / 32; col16++)
            {
                 __m128i line1 = _mm_load_si128((__m128i*)(pBayer + nWidth*row2 * 2) + col16*2);
                __m128i line2 = _mm_load_si128((__m128i*)(pBayer + nWidth*(row2 * 2 + 1)) + col16*2);
                __m128i line1_next, line2_next;
                if (col16== (nWidth + 16) / 32 -1)
                {
                    line1_next = _mm_setzero_si128();                            //每行的尾16字节补0
                    line2_next = _mm_setzero_si128();
                }
                else
                {
                    line1_next = _mm_load_si128((__m128i*)(pBayer + nWidth*row2 * 2) + col16*2 + 1);
                    line2_next = _mm_load_si128((__m128i*)(pBayer + nWidth*(row2 * 2 + 1)) + col16*2 + 1);
                }
                __m256i line1n = _mm256_setr_m128i(line1, line1_next);            //组合成32字节,批量处理
                __m256i line2n = _mm256_setr_m128i(line2, line2_next);
                __m256i line1n_128oe = _mm256_shuffle_epi8(line1n, shuffle_oe);    //前16字节与后16字节是分开处理的,得到:前16字节的奇数位元素A、前16字节的偶数位元素B、后16字节的奇数位元素C、后16字节的偶数位元素D
                __m256i line2n_128oe = _mm256_shuffle_epi8(line2n, shuffle_oe);
                __m256i line1n_oe = _mm256_permute4x64_epi64(line1n_128oe, 0b11011000);
                __m256i line2n_oe = _mm256_permute4x64_epi64(line2n_128oe, 0b11011000);
                __m128i line11 = _mm256_extracti128_si256(line1n_oe, 0);
                __m128i line12 = _mm256_extracti128_si256(line1n_oe, 1);
                __m128i line21 = _mm256_extracti128_si256(line2n_oe, 0);
                __m128i line22 = _mm256_extracti128_si256(line2n_oe, 1);

                switch (nMirror)
                {
                case mirrorNo:
                    index = nWidth / 2 * row2+16*col16;                            //不镜像
                    break;
                case mirrorTB:
                    index = nWidth / 2 * (nHeight / 2 - 1 - row2) + 16 * col16;    //上下镜像
                    break;
                case mirrorLR:
                    index = nWidth / 2 * row2 + (nWidth / 2 - 16*(col16+1));    //左右镜像
                    add8 = 8;
                    line11 = _mm_shuffle_epi8(line11, shuffle_reserseOrder);
                    line12 = _mm_shuffle_epi8(line12, shuffle_reserseOrder);
                    line21 = _mm_shuffle_epi8(line21, shuffle_reserseOrder);
                    line22 = _mm_shuffle_epi8(line22, shuffle_reserseOrder);
                    break;
                case mirrorAll:
                    index = nWidth / 2 * (nHeight / 2 - 1 - row2) + (nWidth / 2 - 16 * (col16 + 1));
                    add8 = 8;
                    line11 = _mm_shuffle_epi8(line11, shuffle_reserseOrder);
                    line12 = _mm_shuffle_epi8(line12, shuffle_reserseOrder);
                    line21 = _mm_shuffle_epi8(line21, shuffle_reserseOrder);
                    line22 = _mm_shuffle_epi8(line22, shuffle_reserseOrder);
                    break;
                default:
                    return -1;
                }

                switch (nBayerFormat)
                {
                case bayerRG:
                        if (col16 == (nWidth + 16) / 32 - 1)
                        {
                            memcpy(pR + index + add8, (unsigned char*)&line11 + add8, 8 * sizeof(unsigned char));
                            memcpy(pB + index + add8, (unsigned char*)&line22 + add8, 8 * sizeof(unsigned char));
                            memcpy(pG + index + add8, (unsigned char*)&_mm_avg_epu8(line12, line21) + add8, 8 * sizeof(unsigned char));
                        }
                        else
                        {
                            _mm_storeu_si128((__m128i*)(pR + index), line11);
                            _mm_storeu_si128((__m128i*)(pB + index), line22);
                            _mm_storeu_si128((__m128i*)(pG + index), _mm_avg_epu8(line12, line21));//对g通道求均值
                        }
                    break;
                case bayerGR:
                    if (col16 == (nWidth + 16) / 32 - 1)
                    {
                        memcpy(pR + index + add8, (unsigned char*)&line12 + add8, 8 * sizeof(unsigned char));
                        memcpy(pB + index + add8, (unsigned char*)&line21 + add8, 8 * sizeof(unsigned char));
                        memcpy(pG + index + add8, (unsigned char*)&_mm_avg_epu8(line11, line22) + add8, 8 * sizeof(unsigned char));
                    }
                    else
                    {
                        _mm_storeu_si128((__m128i*)pR + index, line12);
                        _mm_storeu_si128((__m128i*)pB + index, line21);
                        _mm_storeu_si128((__m128i*)pG + index, _mm_avg_epu8(line11, line22));//对g通道求均值
                    }
                    break;
                case bayerBG:
                    if (col16 == (nWidth + 16) / 32 - 1)
                    {
                        memcpy(pR + index + add8, (unsigned char*)&line22 + add8, 8 * sizeof(unsigned char));
                        memcpy(pB + index + add8, (unsigned char*)&line11 + add8, 8 * sizeof(unsigned char));
                        memcpy(pG + index + add8, (unsigned char*)&_mm_avg_epu8(line12, line21) + add8, 8 * sizeof(unsigned char));
                    }
                    else
                    {
                        _mm_storeu_si128((__m128i*)pR + index, line22);
                        _mm_storeu_si128((__m128i*)pB + index, line11);
                        _mm_storeu_si128((__m128i*)pG + index, _mm_avg_epu8(line12, line21));//对g通道求均值
                    }
                    break;
                case bayerGB:
                    if (col16 == (nWidth + 16) / 32 - 1)
                    {
                        memcpy(pR + index + add8, (unsigned char*)&line21 + add8, 8 * sizeof(unsigned char));
                        memcpy(pB + index + add8, (unsigned char*)&line12 + add8, 8 * sizeof(unsigned char));
                        memcpy(pG + index + add8, (unsigned char*)&_mm_avg_epu8(line11, line22) + add8, 8 * sizeof(unsigned char));
                    }
                    else
                    {
                        _mm_storeu_si128((__m128i*)pR + index, line21);
                        _mm_storeu_si128((__m128i*)pB + index, line12);
                        _mm_storeu_si128((__m128i*)pG + index, _mm_avg_epu8(line11, line22));//对g通道求均值
                    }
                    break;
                default:
                    return -1;
                }                
            }
        }
        return 0;
    }
    
    return -1;
}

更好理解的版本:

不含镜像版本 CPU指令集——bayer抽取r、g、b三通道 - 夕西行 - 博客园 (cnblogs.com)

宽度为32整数倍版本 CPU指令集——bayer抽取r、g、b三通道(含镜像)-宽度为32整数倍版本 - 夕西行 - 博客园 (cnblogs.com)

测试:

#include<opencv2\opencv.hpp>
using namespace cv;
int main()
{
    String path = "E:/偏振相机采图/偏振相机2448_512/*.bmp";
    std::vector<String> paths;
    glob(path, paths);
    Mat src, srcValid, src0, src90;
    Mat dst90_0;

    for (int i = 0; i < paths.size(); i++)
    {
        src = imread(paths[i], 0);
        //宽度2448不是32的整数倍,用2432,扔掉右侧的18个像素
        srcValid = src(Rect(0, 0, 2432, 512)).clone();    //必须深拷贝,否则得到的图像有问题。
        //srcValid = src(Rect(0, 0,16,4)).clone();
        //srcValid = src.clone();
        int width = srcValid.cols;
        int height = srcValid.rows;

        const unsigned char* pBayerRG = srcValid.data;
        unsigned char* pR = new unsigned char[width*height / 4];
        unsigned char* pG = new unsigned char[width*height / 4];
        unsigned char* pB = new unsigned char[width*height / 4];        

        bayer2rgb_CPU(pBayerRG, width, height, BayerFormat::bayerRG, Mirror::mirrorAll, pR, pG, pB);

        src0 = Mat(height /2, width /2, CV_8UC1, pR);
        src90 = Mat(height /2, width /2, CV_8UC1, pB);
        Mat srcG = Mat(height / 2, width / 2, CV_8UC1, pG);

        absdiff(src90, src0, dst90_0);

    }

    return 0;
}

 

标签:__,index,shuffle,16,mm,32,unsigned,bayer,m128i
From: https://www.cnblogs.com/xixixing/p/18336479

相关文章

  • 代码随想录day16 || 513 树左下角值,112 路径之和,116 中序后序遍历构造二叉树
    切片传递问题question:什么情况下传递切片,什么情况下传递切片指针,为什么有时候会修改原始副本,有时候又不会呢?typesli[]intfuncmain(){ slice:=[]int{1} fmt.Printf("slice:%p\n",slice) change1(slice) fmt.Println("=================================") s2:=......
  • Day16 二叉树Part4 常规递归和遍历法的综合应用(二叉树相关)
    目录任务112.路径总和思路113.路径总和II思路106.从中序与后序遍历序列构造二叉树思路105.从前序与中序遍历序列构造二叉树思路心得体会任务112.路径总和给你二叉树的根节点root和一个表示目标和的整数targetSum。判断该树中是否存在根节点到叶子节点的路径,这条路......
  • STM32F1基于HAL库的学习记录实用使用教程分享(二、GPIO_Input 按键)
    往期内容STM32F1基于HAL库的学习记录实用使用教程分享(一、GPIO_Output)文章目录往期内容前言一、GPIO_Input1.浮空输入(GPIO_Mode_IN_FLOATING)2.上拉输入(GPIO_Mode_IPU)3.下拉输入(GPIO_Mode_IPD)4.上拉和下拉的区分原因二、配置1.RCC2.SYS(1).Debug(2).SystemWa......
  • 嵌入式文件系统介绍合集:FAT32、exFAT、ext3、ext4、jffs2、yaffs2、ubifs、squashf
    liwen012024.07.21前言在嵌入式Linux系统中常用的文件系统有:FAT32、exFAT、ext3、ext4、jffs2、yaffs2、ubifs、squashfs。不管是在文件系统类型选型还是嵌入式应用软件面试中,关于文件系统的问题都会经常被提起。就目前而言,并没有哪款文件系统可以适用于所有应用场景,它们各......
  • 出现错误:[WinError 193] %1 不是有效的 Win32 应用程序
    运行python脚本时出现以下错误FailedtoinitializeWebDriverorloadconfiguration:[WinError193]%1isnotavalidWin32applicationTraceback(mostrecentcalllast):File"C:\Users\gourav\Downloads\Automation_Tool\Automation_Tool\demo.py",l......
  • P2119 [NOIP2016 普及组] 魔法阵
    P2119[NOIP2016普及组]魔法阵传送门1我们可以先写出\(O(m^4)\)的暴力#include<bits/stdc++.h>#defineintlonglong#definePIIpair<int,int>usingnamespacestd;constintinf=0x3f3f3f3f;constintMOD=1e9+7,N=4e4+5;intn,m,ans[N......
  • 基于N32L40x CmBacktrace mdk5平台下的移植测试
    首先感谢大神提供的开源库CmBacktrace开源地址:https://github.com/armink/CmBacktrace/releases/latesthttps://gitee.com/Armink/CmBacktraceCmBacktrace是什么CmBacktrace一款针对ARMCortex-M系列MCU的错误代码自动追踪、定位,错误原因自动分析的开源库CmBac......
  • 基于N32L406MB EasyFlash参数(key-value)记录库移植
    EasyFlash感谢作者的分享https://github.com/armink/EasyFlashEasyFlash是一款开源的轻量级嵌入式Flash存储器库,方便开发者更加轻松的实现基于Flash存储器的常见应用开发三大实用功能ENV快速保存产品参数(key-value),支持写平衡(磨损平衡)及掉电保护功能EasyFlash不仅......
  • L1-016 查验身份证 分数 15
    小错不断,简直灾难//14'52"#include<bits/stdc++.h>usingnamespacestd;#defineintlonglongintarr[17]={7,9,10,5,8,4,2,1,6,3,7,9,10,5,8,4,2};signedmain(){intn;cin>>n;map<int,char>map;map.insert({0,......
  • STM32下载程序的方式
            STM32下载程序的方式多种多样,每种方式都有其特点和适用场景。以下是一些常见的STM32下载程序的方式:1.SWD接口下载特点:        SWD(SerialWireDebug)是STM32特有的一种串行调试接口,通过两根线(SWDIO串行数据输入输出和SWCLK串行时钟输入)实现程序的下......