首页 > 其他分享 >11、Android Studio的ARM Neon学习笔记

11、Android Studio的ARM Neon学习笔记

时间:2022-09-28 14:06:47浏览次数:88  
标签:11 0.01328125037252903 0.02421874925494194 0.01171875 Neon 0.04921875149011612 S


基本思想:先深入的学习一下ARM Neon的基本原理,在开始测试NCNN大佬&ZZ大佬贡献的源码~

学习大佬博客:​​https://www.yuque.com/docs/share/3eff70c4-c70f-40df-b0af-df9fa7365d3c?#​​  mmp研究中 

(1)、在Android studio测试一下文章结尾的代码

这样写

arguments '-DANDROID=c++_shared',"-DANDROID_ARM_NEON=TRUE", "-DANDROID_TOOLCHAIN=clang"//"-mfloat-abi=hard"

这样写好像没有区别

arguments '-DANDROID=c++_shared',"-DANDROID_ARM_NEON=TRUE", "-DANDROID_TOOLCHAIN=clang  -O3 -mcpu=cortex-a8 -mfloat-abi=soft"//"-mfloat-abi=hard"

或者

arguments '-DANDROID=c++_shared',"-DANDROID_ARM_NEON=TRUE", "-DANDROID_TOOLCHAIN=clang  -O3 -mcpu=cortex-a8 -mfloat-abi=hard"//"-mfloat-abi=hard"

测试

#include <jni.h>
#include <string>
#include <arm_neon.h>
#include <android/log.h>
#include<chrono>

#define LOG_TAG "TEST_NEON"
#define LOGD(...) __android_log_print(ANDROID_LOG_DEBUG, LOG_TAG, __VA_ARGS__)
#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, __VA_ARGS__)
//所有指令的查询 https://developer.arm.com/architectures/instruction-sets/intrinsics/
void vector_dot_c(float32_t const *A, float32_t const *B, float32_t *C, uint32_t count)
{
if(count < 0){
*C = 0.0;
return ;
}
*C = 0.0;
for(int idx=0; idx<count; idx++){
*C += A[idx]*B[idx];
}
}



// AArch拥有31x64bit通用寄存器和一个特殊寄存器。这些寄存器既可以被当作31x64bits 寄存器(X0-X30)使用也可以被当作31x32bit寄存器(W0-W30)使用
// AArch有32x128bits Neon寄存器(V0-V31),这些寄存器可以被当作32bits寄存器或16bits寄存器使用
void vector_dot_intrinsics(float32_t const *src1, float32_t const *src2, float32_t *dst, uint32_t count)
{
float32x4_t Aregister;
float32x4_t Bregister;
float32x4_t Cregister;

// LOGD("sizeof(float32x4_t)= :%ld\n",sizeof(float32x4_t)); //16字节
// LOGD("sizeof(float32_t)= :%ld\n",sizeof(float32_t)); //4字节
// LOGD("sizeof(float)= :%ld\n",sizeof(float)); // 4字节
Cregister = vmovq_n_f32(0.0); //初始化4个向量为0(32位)

for(int idx=0; idx<count/4; idx++)
{

Aregister = vld1q_f32(src1+idx*4); //每次从src1起始地址取4个字节 复制给寄存器变量Aregister vld1q_f32 从src地址拷贝4个的4字节的数据到寄存器中
Bregister = vld1q_f32(src2+idx*4); //每次从src1起始地址取4个字节 复制给寄存器变量Bregister
Cregister = vfmaq_f32(Cregister, Aregister, Bregister); //将Aregister寄存器和Bregister寄存器的值进行相乘 然后加上Cregister寄存器中数值,然后在将返回值写到Cregister寄存器中
//
// Cregister += vmulq_f32(Aregister, Bregister); //将Aregister寄存器和Bregister寄存器的值进行相乘 ,然后在将返回值写到Cregister寄存器中
}

float32_t result[4] = {0.0};
vst1q_f32(result, Cregister); //将寄存器中的数值 Cregister[0] Cregister[1] Cregister[2] Cregister[3] 取出来 放入内存的变量中

//printf("%f,%f,%f,%f\n", result[0], result[1], result[2], result[3]);
*dst = result[0] + result[1] + result[2] + result[3];
}

//###############################################################################################
//# void vector_dot_assembly(float32_t const *src1, float32_t const *src2, float32_t *dst, uint32_t count)
//# r0: dst
//# r1: src1
//# r2: src2
//# r3: count
//###############################################################################################
void vector_dot_assembly(float32_t const* src1, float32_t const * src2, float32_t *dst, uint32_t count)
{
asm volatile (
"fmul v3.4s, v5.4s, v5.4s \n"
"1: \n"
"ld1 {v0.4s}, [%[src1]], #16 \n"
"ld1 {v1.4s}, [%[src2]], #16 \n"
"fmul v2.4s, v0.4s, v1.4s \n"
"fadd v3.4s, v3.4s, v2.4s \n"
"subs %[count], %[count], #4 \n"
"bgt 1b \n"
"st1 {v3.4s}, [%[dst]] \n"
: [dst] "+r" (dst)
: [src1] "r" (src1), [src2] "r" (src2), [count] "r" (count)
: "memory", "v0", "v1"
);
}





void test()
{

const float32_t arrayA[1024] = {-74.0, -79.0, -84.0, -85.0, -80.0, -71.0, -65.0, -64.0, -65.0, -63.0, -65.0, -64.0, -62.0, -61.0, -63.0, -60.0, -59.0, -59.0, -62.0, -66.0, -68.0, -75.0, -78.0, -82.0, -82.0, -73.0, -67.0, -68.0, -75.0, -81.0, -86.0, -85.0, -82.0, -76.0, -73.0, -68.0, -68.0, -69.0, -68.0, -68.0, -70.0, -71.0, -70.0, -68.0, -62.0, -59.0, -59.0, -59.0, -59.0, -55.0, -53.0, -53.0, -53.0, -52.0, -48.0, -48.0, -51.0, -57.0, -60.0, -61.0, -64.0, -60.0, -47.0, -7.0, 37.0, 48.0, 33.0, 22.0, 8.0, -15.0, -46.0, -59.0, -66.0, -68.0, -69.0, -70.0, -68.0, -73.0, -73.0, -73.0, -73.0, -75.0, -76.0, -75.0, -76.0, -75.0, -77.0, -78.0, -79.0, -78.0, -77.0, -77.0, -76.0, -76.0, -76.0, -77.0, -77.0, -80.0, -79.0, -75.0, -70.0, -66.0, -61.0, -57.0, -53.0, -52.0, -52.0, -53.0, -56.0, -54.0, -50.0, -47.0, -48.0, -46.0, -44.0, -45.0, -44.0, -44.0, -44.0, -45.0, -46.0, -47.0, -48.0, -48.0, -48.0, -48.0, -50.0, -51.0, -50.0, -48.0, -45.0, -43.0, -40.0, -37.0, -36.0, -34.0, -32.0, -32.0, -33.0, -34.0, -36.0, -39.0, -45.0, -47.0, -49.0, -47.0, -41.0, -21.0, -4.0, -2.0, -1.0, 8.0, 14.0, 4.0, -11.0, -19.0, -25.0, -30.0, -36.0, -36.0, -34.0, -32.0, -28.0, -26.0, -25.0, -26.0, -25.0, -26.0, -28.0, -30.0, -34.0, -34.0, -37.0, -38.0, -34.0, -35.0, -38.0, -32.0, -14.0, -4.0, -3.0, -5.0, -3.0, -4.0, -7.0, -16.0, -25.0, -32.0, -37.0, -36.0, -31.0, -33.0, -42.0, -43.0, -38.0, -33.0, -34.0, -38.0, -39.0, -37.0, -39.0, -44.0, -44.0, -42.0, -39.0, -41.0, -39.0, -33.0, -22.0, -23.0, -30.0, -29.0, -17.0, -1.0, -2.0, -18.0, -23.0, -21.0, -20.0, -28.0, -36.0, -42.0, -43.0, -41.0, -37.0, -35.0, -32.0, -29.0, -29.0, -30.0, -30.0, -30.0, -29.0, -29.0, -25.0, -19.0, -11.0, -2.0, 4.0, 13.0, 22.0, 30.0, 30.0, 26.0, 23.0, 11.0, -1.0, -6.0, -17.0, -30.0, -30.0, -27.0, -28.0, -28.0, -27.0, -27.0, -28.0, -30.0, -29.0, -28.0, -25.0, -26.0, -29.0, -36.0, -41.0, -44.0, -36.0, -35.0, -43.0, -39.0, -23.0, -16.0, -21.0, -33.0, -38.0, -28.0, -23.0, -30.0, -41.0, -41.0, -36.0, -30.0, -27.0, -28.0, -31.0, -35.0, -35.0, -37.0, -41.0, -42.0, -36.0, -29.0, -27.0, -35.0, -37.0, -34.0, -33.0, -33.0, -34.0, -33.0, -32.0, -31.0, -31.0, -34.0, -36.0, -36.0, -34.0, -42.0, -46.0, -50.0, -52.0, -48.0, -42.0, -36.0, -38.0, -39.0, -37.0, -33.0, -32.0, -35.0, -40.0, -45.0, -46.0, -49.0, -52.0, -53.0, -56.0, -57.0, -61.0, -57.0, -49.0, -42.0, -32.0, -18.0, -5.0, 6.0, 7.0, 3.0, -4.0, -13.0, -31.0, -51.0, -51.0, -50.0, -39.0, -25.0, -10.0, 3.0, 18.0, 38.0, 36.0, 32.0, 26.0, 15.0, 3.0, -6.0, -12.0, -24.0, -41.0, -56.0, -63.0, -66.0, -65.0, -63.0, -62.0, -60.0, -52.0, -42.0, -34.0, -29.0, -28.0, -26.0, -29.0, -37.0, -44.0, -52.0, -62.0, -60.0, -58.0, -56.0, -59.0, -61.0, -60.0, -57.0, -58.0, -62.0, -65.0, -60.0, -52.0, -48.0, -47.0, -43.0, -47.0, -48.0, -49.0, -46.0, -51.0, -60.0, -61.0, -60.0, -59.0, -60.0, -64.0, -69.0, -72.0, -71.0, -71.0, -71.0, -70.0, -68.0, -66.0, -64.0, -65.0, -64.0, -64.0, -65.0, -67.0, -68.0, -71.0, -71.0, -73.0, -75.0, -75.0, -75.0, -75.0, -74.0, -72.0, -71.0, -71.0, -70.0, -71.0, -71.0, -72.0, -76.0, -76.0, -75.0, -75.0, -74.0, -74.0, -74.0, -74.0, -71.0, -69.0, -70.0, -70.0, -69.0, -67.0, -65.0, -66.0, -64.0, -65.0, -71.0, -74.0, -76.0, -82.0, -83.0, -82.0, -79.0, -73.0, -64.0, -62.0, -61.0, -74.0, -86.0, -92.0, -95.0, -79.0, -36.0, 35.0, 87.0, 125.0, 147.0, 151.0, 146.0, 151.0, 150.0, 151.0, 150.0, 150.0, 149.0, -75.0, -80.0, -86.0, -86.0, -80.0, -72.0, -65.0, -64.0, -64.0, -63.0, -64.0, -64.0, -61.0, -61.0, -62.0, -61.0, -59.0, -59.0, -62.0, -64.0, -67.0, -75.0, -78.0, -82.0, -81.0, -73.0, -68.0, -69.0, -70.0, -74.0, -78.0, -83.0, -84.0, -79.0, -75.0, -75.0, -75.0, -75.0, -74.0, -74.0, -75.0, -78.0, -79.0, -75.0, -68.0, -63.0, -62.0, -63.0, -63.0, -57.0, -55.0, -54.0, -54.0, -53.0, -51.0, -49.0, -52.0, -56.0, -59.0, -60.0, -63.0, -59.0, -45.0, -4.0, 38.0, 48.0, 32.0, 22.0, 8.0, -15.0, -46.0, -58.0, -65.0, -65.0, -67.0, -68.0, -66.0, -70.0, -69.0, -69.0, -71.0, -72.0, -73.0, -72.0, -73.0, -73.0, -74.0, -75.0, -76.0, -75.0, -74.0, -74.0, -73.0, -72.0, -73.0, -74.0, -74.0, -75.0, -72.0, -68.0, -64.0, -60.0, -57.0, -52.0, -50.0, -50.0, -50.0, -51.0, -54.0, -53.0, -50.0, -47.0, -46.0, -44.0, -42.0, -42.0, -41.0, -41.0, -41.0, -43.0, -43.0, -44.0, -45.0, -45.0, -46.0, -46.0, -48.0, -48.0, -47.0, -46.0, -43.0, -40.0, -39.0, -37.0, -35.0, -33.0, -32.0, -32.0, -33.0, -33.0, -36.0, -39.0, -44.0, -47.0, -49.0, -47.0, -42.0, -25.0, -13.0, -9.0, -6.0, 1.0, 1.0, -9.0, -20.0, -26.0, -31.0, -34.0, -39.0, -40.0, -37.0, -34.0, -29.0, -28.0, -27.0, -28.0, -27.0, -27.0, -29.0, -31.0, -34.0, -34.0, -37.0, -37.0, -34.0, -33.0, -33.0, -28.0, -13.0, -5.0, -3.0, -5.0, -5.0, -5.0, -7.0, -17.0, -26.0, -33.0, -39.0, -39.0, -35.0, -36.0, -42.0, -42.0, -38.0, -32.0, -31.0, -34.0, -37.0, -36.0, -38.0, -43.0, -43.0, -42.0, -40.0, -41.0, -38.0, -32.0, -21.0, -20.0, -24.0, -23.0, -14.0, -3.0, -5.0, -16.0, -24.0, -26.0, -27.0, -36.0, -44.0, -49.0, -49.0, -45.0, -42.0, -39.0, -36.0, -32.0, -31.0, -32.0, -32.0, -32.0, -29.0, -29.0, -24.0, -18.0, -10.0, -2.0, 3.0, 12.0, 20.0, 30.0, 29.0, 26.0, 24.0, 12.0, -1.0, -6.0, -16.0, -27.0, -26.0, -22.0, -22.0, -24.0, -24.0, -22.0, -21.0, -21.0, -23.0, -23.0, -21.0, -19.0, -21.0, -30.0, -40.0, -43.0, -36.0, -35.0, -41.0, -36.0, -23.0, -18.0, -21.0, -27.0, -30.0, -24.0, -24.0, -32.0, -40.0, -37.0, -33.0, -31.0, -31.0, -34.0, -37.0, -40.0, -40.0, -41.0, -42.0, -42.0, -39.0, -35.0, -36.0, -39.0, -41.0, -36.0, -36.0, -36.0, -36.0, -35.0, -35.0, -32.0, -30.0, -32.0, -36.0, -38.0, -39.0, -44.0, -44.0, -45.0, -44.0, -41.0, -36.0, -33.0, -36.0, -38.0, -37.0, -36.0, -35.0, -40.0, -45.0, -46.0, -47.0, -49.0, -51.0, -53.0, -55.0, -56.0, -59.0, -54.0, -47.0, -39.0, -30.0, -15.0, -1.0, 9.0, 11.0, 7.0, 1.0, -8.0, -26.0, -47.0, -50.0, -49.0, -39.0, -24.0, -11.0, 2.0, 16.0, 31.0, 31.0, 29.0, 25.0, 16.0, 5.0, -2.0, -10.0, -16.0, -31.0, -48.0, -61.0, -66.0, -65.0, -66.0, -61.0, -53.0, -43.0, -31.0, -21.0, -16.0, -14.0, -13.0, -18.0, -27.0, -35.0, -45.0, -57.0, -63.0, -60.0, -56.0, -57.0, -59.0, -58.0, -56.0, -58.0, -61.0, -61.0, -55.0, -46.0, -39.0, -40.0, -41.0, -43.0, -44.0, -46.0, -45.0, -51.0, -60.0, -58.0, -56.0, -55.0, -56.0, -60.0, -65.0, -67.0, -69.0, -68.0, -69.0, -68.0, -66.0, -64.0, -62.0, -62.0, -62.0, -62.0, -63.0, -65.0, -67.0, -70.0, -71.0, -73.0, -74.0, -75.0, -75.0, -74.0, -74.0, -72.0, -71.0, -71.0, -70.0, -71.0, -71.0, -72.0, -75.0, -74.0, -74.0, -74.0, -73.0, -73.0, -73.0, -73.0, -71.0, -69.0, -69.0, -70.0, -69.0, -67.0, -66.0, -71.0, -71.0, -70.0, -72.0, -75.0, -75.0, -72.0, -74.0, -75.0, -77.0, -75.0, -71.0, -66.0, -66.0, -76.0, -88.0, -92.0, -96.0, -82.0, -38.0, 32.0, 84.0, 124.0, 147.0, 151.0, 146.0, 150.0, 150.0, 151.0, 151.0, 150.0, 149.0, -75.0, -81.0, -86.0, -87.0, -80.0, -72.0, -65.0, -62.0, -64.0, -62.0, -63.0, -63.0, -60.0, -60.0, -61.0, -60.0, -58.0, -59.0, -61.0, -62.0, -65.0, -74.0, -77.0, -80.0, -79.0, -70.0, -63.0, -64.0, -62.0, -61.0, -66.0, -76.0, -83.0, -80.0, -75.0, -80.0, -80.0, -81.0, -82.0, -82.0, -84.0, -86.0, -85.0, -80.0, -72.0, -67.0, -64.0, -65.0, -65.0, -60.0, -58.0, -56.0, -56.0, -56.0, -53.0, -52.0, -53.0, -56.0, -58.0, -59.0, -61.0, -58.0, -43.0, -1.0};
const float32_t arrayB[1024] = {-0.03229166567325592, -0.02421874925494194, 0.03229166567325592, 0.02421874925494194, -0.015625, -0.02421874925494194, 0.04895833507180214, 0.02421874925494194, 0.0010416667209938169, -0.02421874925494194, 0.06562499701976776, 0.02421874925494194, 0.01770833320915699, -0.02421874925494194, 0.08229167014360428, 0.02421874925494194, -0.03229166567325592, -0.01171875, 0.03229166567325592, 0.03671874850988388, -0.015625, -0.01171875, 0.04895833507180214, 0.03671874850988388, 0.0010416667209938169, -0.01171875, 0.06562499701976776, 0.03671874850988388, 0.01770833320915699, -0.01171875, 0.08229167014360428, 0.03671874850988388, -0.03229166567325592, 0.0007812500116415322, 0.03229166567325592, 0.04921875149011612, -0.015625, 0.0007812500116415322, 0.04895833507180214, 0.04921875149011612, 0.0010416667209938169, 0.0007812500116415322, 0.06562499701976776, 0.04921875149011612, 0.01770833320915699, 0.0007812500116415322, 0.08229167014360428, 0.04921875149011612, -0.03229166567325592, 0.01328125037252903, 0.03229166567325592, 0.06171875074505806, -0.015625, 0.01328125037252903, 0.04895833507180214, 0.06171875074505806, 0.0010416667209938169, 0.01328125037252903, 0.06562499701976776, 0.06171875074505806, 0.01770833320915699, 0.01328125037252903, 0.08229167014360428, 0.06171875074505806, -0.06562499701976776, -0.04921875149011612, 0.06562499701976776, 0.04921875149011612, -0.03229166567325592, -0.04921875149011612, 0.0989583358168602, 0.04921875149011612, -0.06562499701976776, -0.02421874925494194, 0.06562499701976776, 0.07421875, -0.03229166567325592, -0.02421874925494194, 0.0989583358168602, 0.07421875, -0.0989583358168602, -0.07421875, 0.16562500596046448, 0.12421874701976776, 0.03437500074505806, -0.02421874925494194, 0.0989583358168602, 0.02421874925494194, 0.05104166641831398, -0.02421874925494194, 0.11562500149011612, 0.02421874925494194, 0.0677083358168602, -0.02421874925494194, 0.13229165971279144, 0.02421874925494194, 0.08437500149011612, -0.02421874925494194, 0.14895834028720856, 0.02421874925494194, 0.03437500074505806, -0.01171875, 0.0989583358168602, 0.03671874850988388, 0.05104166641831398, -0.01171875, 0.11562500149011612, 0.03671874850988388, 0.0677083358168602, -0.01171875, 0.13229165971279144, 0.03671874850988388, 0.08437500149011612, -0.01171875, 0.14895834028720856, 0.03671874850988388, 0.03437500074505806, 0.0007812500116415322, 0.0989583358168602, 0.04921875149011612, 0.05104166641831398, 0.0007812500116415322, 0.11562500149011612, 0.04921875149011612, 0.0677083358168602, 0.0007812500116415322, 0.13229165971279144, 0.04921875149011612, 0.08437500149011612, 0.0007812500116415322, 0.14895834028720856, 0.04921875149011612, 0.03437500074505806, 0.01328125037252903, 0.0989583358168602, 0.06171875074505806, 0.05104166641831398, 0.01328125037252903, 0.11562500149011612, 0.06171875074505806, 0.0677083358168602, 0.01328125037252903, 0.13229165971279144, 0.06171875074505806, 0.08437500149011612, 0.01328125037252903, 0.14895834028720856, 0.06171875074505806, 0.0010416667209938169, -0.04921875149011612, 0.13229165971279144, 0.04921875149011612, 0.03437500074505806, -0.04921875149011612, 0.16562500596046448, 0.04921875149011612, 0.0010416667209938169, -0.02421874925494194, 0.13229165971279144, 0.07421875, 0.03437500074505806, -0.02421874925494194, 0.16562500596046448, 0.07421875, -0.03229166567325592, -0.07421875, 0.23229166865348816, 0.12421874701976776, 0.10104166716337204, -0.02421874925494194, 0.16562500596046448, 0.02421874925494194, 0.11770833283662796, -0.02421874925494194, 0.1822916716337204, 0.02421874925494194, 0.13437500596046448, -0.02421874925494194, 0.19895833730697632, 0.02421874925494194, 0.1510416716337204, -0.02421874925494194, 0.21562500298023224, 0.02421874925494194, 0.10104166716337204, -0.01171875, 0.16562500596046448, 0.03671874850988388, 0.11770833283662796, -0.01171875, 0.1822916716337204, 0.03671874850988388, 0.13437500596046448, -0.01171875, 0.19895833730697632, 0.03671874850988388, 0.1510416716337204, -0.01171875, 0.21562500298023224, 0.03671874850988388, 0.10104166716337204, 0.0007812500116415322, 0.16562500596046448, 0.04921875149011612, 0.11770833283662796, 0.0007812500116415322, 0.1822916716337204, 0.04921875149011612, 0.13437500596046448, 0.0007812500116415322, 0.19895833730697632, 0.04921875149011612, 0.1510416716337204, 0.0007812500116415322, 0.21562500298023224, 0.04921875149011612, 0.10104166716337204, 0.01328125037252903, 0.16562500596046448, 0.06171875074505806, 0.11770833283662796, 0.01328125037252903, 0.1822916716337204, 0.06171875074505806, 0.13437500596046448, 0.01328125037252903, 0.19895833730697632, 0.06171875074505806, 0.1510416716337204, 0.01328125037252903, 0.21562500298023224, 0.06171875074505806, 0.0677083358168602, -0.04921875149011612, 0.19895833730697632, 0.04921875149011612, 0.10104166716337204, -0.04921875149011612, 0.23229166865348816, 0.04921875149011612, 0.0677083358168602, -0.02421874925494194, 0.19895833730697632, 0.07421875, 0.10104166716337204, -0.02421874925494194, 0.23229166865348816, 0.07421875, 0.03437500074505806, -0.07421875, 0.29895833134651184, 0.12421874701976776, 0.16770833730697632, -0.02421874925494194, 0.23229166865348816, 0.02421874925494194, 0.18437500298023224, -0.02421874925494194, 0.24895833432674408, 0.02421874925494194, 0.20104166865348816, -0.02421874925494194, 0.265625, 0.02421874925494194, 0.21770833432674408, -0.02421874925494194, 0.2822916805744171, 0.02421874925494194, 0.16770833730697632, -0.01171875, 0.23229166865348816, 0.03671874850988388, 0.18437500298023224, -0.01171875, 0.24895833432674408, 0.03671874850988388, 0.20104166865348816, -0.01171875, 0.265625, 0.03671874850988388, 0.21770833432674408, -0.01171875, 0.2822916805744171, 0.03671874850988388, 0.16770833730697632, 0.0007812500116415322, 0.23229166865348816, 0.04921875149011612, 0.18437500298023224, 0.0007812500116415322, 0.24895833432674408, 0.04921875149011612, 0.20104166865348816, 0.0007812500116415322, 0.265625, 0.04921875149011612, 0.21770833432674408, 0.0007812500116415322, 0.2822916805744171, 0.04921875149011612, 0.16770833730697632, 0.01328125037252903, 0.23229166865348816, 0.06171875074505806, 0.18437500298023224, 0.01328125037252903, 0.24895833432674408, 0.06171875074505806, 0.20104166865348816, 0.01328125037252903, 0.265625, 0.06171875074505806, 0.21770833432674408, 0.01328125037252903, 0.2822916805744171, 0.06171875074505806, 0.13437500596046448, -0.04921875149011612, 0.265625, 0.04921875149011612, 0.16770833730697632, -0.04921875149011612, 0.29895833134651184, 0.04921875149011612, 0.13437500596046448, -0.02421874925494194, 0.265625, 0.07421875, 0.16770833730697632, -0.02421874925494194, 0.29895833134651184, 0.07421875, 0.10104166716337204, -0.07421875, 0.3656249940395355, 0.12421874701976776, 0.234375, -0.02421874925494194, 0.29895833134651184, 0.02421874925494194, 0.2510416805744171, -0.02421874925494194, 0.31562501192092896, 0.02421874925494194, 0.26770833134651184, -0.02421874925494194, 0.3322916626930237, 0.02421874925494194, 0.28437501192092896, -0.02421874925494194, 0.3489583432674408, 0.02421874925494194, 0.234375, -0.01171875, 0.29895833134651184, 0.03671874850988388, 0.2510416805744171, -0.01171875, 0.31562501192092896, 0.03671874850988388, 0.26770833134651184, -0.01171875, 0.3322916626930237, 0.03671874850988388, 0.28437501192092896, -0.01171875, 0.3489583432674408, 0.03671874850988388, 0.234375, 0.0007812500116415322, 0.29895833134651184, 0.04921875149011612, 0.2510416805744171, 0.0007812500116415322, 0.31562501192092896, 0.04921875149011612, 0.26770833134651184, 0.0007812500116415322, 0.3322916626930237, 0.04921875149011612, 0.28437501192092896, 0.0007812500116415322, 0.3489583432674408, 0.04921875149011612, 0.234375, 0.01328125037252903, 0.29895833134651184, 0.06171875074505806, 0.2510416805744171, 0.01328125037252903, 0.31562501192092896, 0.06171875074505806, 0.26770833134651184, 0.01328125037252903, 0.3322916626930237, 0.06171875074505806, 0.28437501192092896, 0.01328125037252903, 0.3489583432674408, 0.06171875074505806, 0.20104166865348816, -0.04921875149011612, 0.3322916626930237, 0.04921875149011612, 0.234375, -0.04921875149011612, 0.3656249940395355, 0.04921875149011612, 0.20104166865348816, -0.02421874925494194, 0.3322916626930237, 0.07421875, 0.234375, -0.02421874925494194, 0.3656249940395355, 0.07421875, 0.16770833730697632, -0.07421875, 0.4322916567325592, 0.12421874701976776, 0.3010416626930237, -0.02421874925494194, 0.3656249940395355, 0.02421874925494194, 0.3177083432674408, -0.02421874925494194, 0.38229167461395264, 0.02421874925494194, 0.3343749940395355, -0.02421874925494194, 0.39895832538604736, 0.02421874925494194, 0.35104167461395264, -0.02421874925494194, 0.4156250059604645, 0.02421874925494194, 0.3010416626930237, -0.01171875, 0.3656249940395355, 0.03671874850988388, 0.3177083432674408, -0.01171875, 0.38229167461395264, 0.03671874850988388, 0.3343749940395355, -0.01171875, 0.39895832538604736, 0.03671874850988388, 0.35104167461395264, -0.01171875, 0.4156250059604645, 0.03671874850988388, 0.3010416626930237, 0.0007812500116415322, 0.3656249940395355, 0.04921875149011612, 0.3177083432674408, 0.0007812500116415322, 0.38229167461395264, 0.04921875149011612, 0.3343749940395355, 0.0007812500116415322, 0.39895832538604736, 0.04921875149011612, 0.35104167461395264, 0.0007812500116415322, 0.4156250059604645, 0.04921875149011612, 0.3010416626930237, 0.01328125037252903, 0.3656249940395355, 0.06171875074505806, 0.3177083432674408, 0.01328125037252903, 0.38229167461395264, 0.06171875074505806, 0.3343749940395355, 0.01328125037252903, 0.39895832538604736, 0.06171875074505806, 0.35104167461395264, 0.01328125037252903, 0.4156250059604645, 0.06171875074505806, 0.26770833134651184, -0.04921875149011612, 0.39895832538604736, 0.04921875149011612, 0.3010416626930237, -0.04921875149011612, 0.4322916567325592, 0.04921875149011612, 0.26770833134651184, -0.02421874925494194, 0.39895832538604736, 0.07421875, 0.3010416626930237, -0.02421874925494194, 0.4322916567325592, 0.07421875, 0.234375, -0.07421875, 0.4989583194255829, 0.12421874701976776, 0.36770832538604736, -0.02421874925494194, 0.4322916567325592, 0.02421874925494194, 0.3843750059604645, -0.02421874925494194, 0.4489583373069763, 0.02421874925494194, 0.4010416567325592, -0.02421874925494194, 0.46562498807907104, 0.02421874925494194, 0.4177083373069763, -0.02421874925494194, 0.48229166865348816, 0.02421874925494194, 0.36770832538604736, -0.01171875, 0.4322916567325592, 0.03671874850988388, 0.3843750059604645, -0.01171875, 0.4489583373069763, 0.03671874850988388, 0.4010416567325592, -0.01171875, 0.46562498807907104, 0.03671874850988388, 0.4177083373069763, -0.01171875, 0.48229166865348816, 0.03671874850988388, 0.36770832538604736, 0.0007812500116415322, 0.4322916567325592, 0.04921875149011612, 0.3843750059604645, 0.0007812500116415322, 0.4489583373069763, 0.04921875149011612, 0.4010416567325592, 0.0007812500116415322, 0.46562498807907104, 0.04921875149011612, 0.4177083373069763, 0.0007812500116415322, 0.48229166865348816, 0.04921875149011612, 0.36770832538604736, 0.01328125037252903, 0.4322916567325592, 0.06171875074505806, 0.3843750059604645, 0.01328125037252903, 0.4489583373069763, 0.06171875074505806, 0.4010416567325592, 0.01328125037252903, 0.46562498807907104, 0.06171875074505806, 0.4177083373069763, 0.01328125037252903, 0.48229166865348816, 0.06171875074505806, 0.3343749940395355, -0.04921875149011612, 0.46562498807907104, 0.04921875149011612, 0.36770832538604736, -0.04921875149011612, 0.4989583194255829, 0.04921875149011612, 0.3343749940395355, -0.02421874925494194, 0.46562498807907104, 0.07421875, 0.36770832538604736, -0.02421874925494194, 0.4989583194255829, 0.07421875, 0.3010416626930237, -0.07421875, 0.565625011920929, 0.12421874701976776, 0.43437498807907104, -0.02421874925494194, 0.4989583194255829, 0.02421874925494194, 0.45104166865348816, -0.02421874925494194, 0.515625, 0.02421874925494194, 0.4677083194255829, -0.02421874925494194, 0.5322916507720947, 0.02421874925494194, 0.484375, -0.02421874925494194, 0.5489583611488342, 0.02421874925494194, 0.43437498807907104, -0.01171875, 0.4989583194255829, 0.03671874850988388, 0.45104166865348816, -0.01171875, 0.515625, 0.03671874850988388, 0.4677083194255829, -0.01171875, 0.5322916507720947, 0.03671874850988388, 0.484375, -0.01171875, 0.5489583611488342, 0.03671874850988388, 0.43437498807907104, 0.0007812500116415322, 0.4989583194255829, 0.04921875149011612, 0.45104166865348816, 0.0007812500116415322, 0.515625, 0.04921875149011612, 0.4677083194255829, 0.0007812500116415322, 0.5322916507720947, 0.04921875149011612, 0.484375, 0.0007812500116415322, 0.5489583611488342, 0.04921875149011612, 0.43437498807907104, 0.01328125037252903, 0.4989583194255829, 0.06171875074505806, 0.45104166865348816, 0.01328125037252903, 0.515625, 0.06171875074505806, 0.4677083194255829, 0.01328125037252903, 0.5322916507720947, 0.06171875074505806, 0.484375, 0.01328125037252903, 0.5489583611488342, 0.06171875074505806, 0.4010416567325592, -0.04921875149011612, 0.5322916507720947, 0.04921875149011612, 0.43437498807907104, -0.04921875149011612, 0.565625011920929, 0.04921875149011612, 0.4010416567325592, -0.02421874925494194, 0.5322916507720947, 0.07421875, 0.43437498807907104, -0.02421874925494194, 0.565625011920929, 0.07421875, 0.36770832538604736, -0.07421875, 0.6322916746139526, 0.12421874701976776, 0.5010416507720947, -0.02421874925494194, 0.565625011920929, 0.02421874925494194, 0.5177083611488342, -0.02421874925494194, 0.5822916626930237, 0.02421874925494194, 0.534375011920929, -0.02421874925494194, 0.5989583134651184, 0.02421874925494194, 0.5510416626930237, -0.02421874925494194, 0.6156250238418579, 0.02421874925494194, 0.5010416507720947, -0.01171875, 0.565625011920929, 0.03671874850988388, 0.5177083611488342, -0.01171875, 0.5822916626930237, 0.03671874850988388, 0.534375011920929, -0.01171875, 0.5989583134651184, 0.03671874850988388, 0.5510416626930237, -0.01171875, 0.6156250238418579, 0.03671874850988388, 0.5010416507720947, 0.0007812500116415322, 0.565625011920929, 0.04921875149011612, 0.5177083611488342, 0.0007812500116415322, 0.5822916626930237, 0.04921875149011612, 0.534375011920929, 0.0007812500116415322, 0.5989583134651184, 0.04921875149011612, 0.5510416626930237, 0.0007812500116415322, 0.6156250238418579, 0.04921875149011612, 0.5010416507720947, 0.01328125037252903, 0.565625011920929, 0.06171875074505806, 0.5177083611488342, 0.01328125037252903, 0.5822916626930237, 0.06171875074505806, 0.534375011920929, 0.01328125037252903, 0.5989583134651184, 0.06171875074505806, 0.5510416626930237, 0.01328125037252903, 0.6156250238418579, 0.06171875074505806, 0.4677083194255829, -0.04921875149011612, 0.5989583134651184, 0.04921875149011612, 0.5010416507720947, -0.04921875149011612, 0.6322916746139526, 0.04921875149011612, 0.4677083194255829, -0.02421874925494194, 0.5989583134651184, 0.07421875, 0.5010416507720947, -0.02421874925494194, 0.6322916746139526, 0.07421875, 0.43437498807907104, -0.07421875, 0.6989583373069763, 0.12421874701976776, 0.5677083134651184, -0.02421874925494194, 0.6322916746139526, 0.02421874925494194, 0.5843750238418579, -0.02421874925494194, 0.6489583253860474, 0.02421874925494194, 0.6010416746139526, -0.02421874925494194, 0.6656249761581421, 0.02421874925494194, 0.6177083253860474, -0.02421874925494194, 0.6822916865348816, 0.02421874925494194, 0.5677083134651184, -0.01171875, 0.6322916746139526, 0.03671874850988388, 0.5843750238418579, -0.01171875, 0.6489583253860474, 0.03671874850988388, 0.6010416746139526, -0.01171875, 0.6656249761581421, 0.03671874850988388, 0.6177083253860474, -0.01171875, 0.6822916865348816, 0.03671874850988388, 0.5677083134651184, 0.0007812500116415322, 0.6322916746139526, 0.04921875149011612, 0.5843750238418579, 0.0007812500116415322, 0.6489583253860474, 0.04921875149011612, 0.6010416746139526, 0.0007812500116415322, 0.6656249761581421, 0.04921875149011612, 0.6177083253860474, 0.0007812500116415322, 0.6822916865348816, 0.04921875149011612, 0.5677083134651184, 0.01328125037252903, 0.6322916746139526, 0.06171875074505806, 0.5843750238418579, 0.01328125037252903, 0.6489583253860474, 0.06171875074505806, 0.6010416746139526, 0.01328125037252903, 0.6656249761581421, 0.06171875074505806, 0.6177083253860474, 0.01328125037252903, 0.6822916865348816, 0.06171875074505806, 0.534375011920929, -0.04921875149011612, 0.6656249761581421, 0.04921875149011612, 0.5677083134651184, -0.04921875149011612, 0.6989583373069763, 0.04921875149011612, 0.534375011920929, -0.02421874925494194, 0.6656249761581421, 0.07421875, 0.5677083134651184, -0.02421874925494194, 0.6989583373069763, 0.07421875, 0.5010416507720947, -0.07421875, 0.765625, 0.12421874701976776, 0.6343749761581421, -0.02421874925494194, 0.6989583373069763, 0.02421874925494194, 0.6510416865348816, -0.02421874925494194, 0.715624988079071, 0.02421874925494194, 0.6677083373069763, -0.02421874925494194, 0.7322916388511658, 0.02421874925494194, 0.684374988079071, -0.02421874925494194, 0.7489583492279053, 0.02421874925494194, 0.6343749761581421, -0.01171875, 0.6989583373069763, 0.03671874850988388, 0.6510416865348816, -0.01171875, 0.715624988079071, 0.03671874850988388, 0.6677083373069763, -0.01171875, 0.7322916388511658, 0.03671874850988388, 0.684374988079071, -0.01171875, 0.7489583492279053, 0.03671874850988388, 0.6343749761581421, 0.0007812500116415322, 0.6989583373069763, 0.04921875149011612, 0.6510416865348816, 0.0007812500116415322, 0.715624988079071, 0.04921875149011612, 0.6677083373069763, 0.0007812500116415322, 0.7322916388511658, 0.04921875149011612, 0.684374988079071, 0.0007812500116415322, 0.7489583492279053, 0.04921875149011612, 0.6343749761581421, 0.01328125037252903, 0.6989583373069763, 0.06171875074505806, 0.6510416865348816, 0.01328125037252903, 0.715624988079071, 0.06171875074505806, 0.6677083373069763, 0.01328125037252903, 0.7322916388511658, 0.06171875074505806, 0.684374988079071, 0.01328125037252903, 0.7489583492279053, 0.06171875074505806, 0.6010416746139526, -0.04921875149011612, 0.7322916388511658, 0.04921875149011612, 0.6343749761581421, -0.04921875149011612, 0.765625, 0.04921875149011612, 0.6010416746139526, -0.02421874925494194, 0.7322916388511658, 0.07421875, 0.6343749761581421, -0.02421874925494194, 0.765625, 0.07421875, 0.5677083134651184, -0.07421875, 0.8322916626930237, 0.12421874701976776, 0.7010416388511658, -0.02421874925494194, 0.765625, 0.02421874925494194, 0.7177083492279053, -0.02421874925494194, 0.7822916507720947, 0.02421874925494194, 0.734375, -0.02421874925494194, 0.7989583611488342, 0.02421874925494194, 0.7510416507720947, -0.02421874925494194, 0.815625011920929, 0.02421874925494194, 0.7010416388511658, -0.01171875, 0.765625, 0.03671874850988388, 0.7177083492279053, -0.01171875, 0.7822916507720947, 0.03671874850988388, 0.734375, -0.01171875, 0.7989583611488342, 0.03671874850988388, 0.7510416507720947, -0.01171875, 0.815625011920929, 0.03671874850988388, 0.7010416388511658, 0.0007812500116415322, 0.765625, 0.04921875149011612, 0.7177083492279053, 0.0007812500116415322, 0.7822916507720947, 0.04921875149011612, 0.734375, 0.0007812500116415322, 0.7989583611488342, 0.04921875149011612, 0.7510416507720947, 0.0007812500116415322, 0.815625011920929, 0.04921875149011612, 0.7010416388511658, 0.01328125037252903, 0.765625, 0.06171875074505806, 0.7177083492279053, 0.01328125037252903, 0.7822916507720947, 0.06171875074505806, 0.734375, 0.01328125037252903, 0.7989583611488342, 0.06171875074505806, 0.7510416507720947, 0.01328125037252903, 0.815625011920929, 0.06171875074505806, 0.6677083373069763, -0.04921875149011612, 0.7989583611488342, 0.04921875149011612, 0.7010416388511658, -0.04921875149011612, 0.8322916626930237, 0.04921875149011612, 0.6677083373069763, -0.02421874925494194, 0.7989583611488342, 0.07421875, 0.7010416388511658, -0.02421874925494194, 0.8322916626930237, 0.07421875, 0.6343749761581421, -0.07421875, 0.8989583253860474, 0.12421874701976776, 0.7677083611488342, -0.02421874925494194, 0.8322916626930237, 0.02421874925494194, 0.784375011920929, -0.02421874925494194, 0.8489583134651184, 0.02421874925494194, 0.8010416626930237, -0.02421874925494194, 0.8656250238418579, 0.02421874925494194, 0.8177083134651184, -0.02421874925494194, 0.8822916746139526, 0.02421874925494194};

float32_t result = 0.0;
clock_t start = clock();
for(int i=0; i<1024; i++){
vector_dot_c(arrayA, arrayB, &result, 1024); //简单的两个矩阵相乘 然后进行累加运算
}
clock_t end = clock();
LOGD("c spend time :%ld\n", end-start);

float32_t result1 = 0.0;
start = clock();
for(int i=0; i<1024; i++){
vector_dot_intrinsics(arrayA, arrayB, &result1, 1024);
}
end = clock();
LOGD("intrinsics spend time :%ld\n", end-start);

float32_t result2 = 0.0;
start = clock();
float32_t assembly_result[5] = {0.0};
for(int i=0; i<1024; i++){
vector_dot_assembly(arrayA, arrayB, assembly_result, 1024);
result2 = assembly_result[0] + assembly_result[1] + assembly_result[2] + assembly_result[3];
}
end = clock();
LOGD("assembly spend time :%ld\n\n", end-start);

LOGD("intrinsics result compare with c result diff:%f\n", abs(result-result1));
LOGD("assembly result compare with c result diff:%f\n", abs(result-result2));

}
extern "C" JNIEXPORT jstring JNICALL
Java_com_example_neon_MainActivity_stringFromJNI(
JNIEnv* env,
jobject /* this */) {
std::string hello = "Hello from C++";
test();
return env->NewStringUTF(hello.c_str());
}

测试结果

D/TAG: isCPU64 mProcessor = AArch64 Processor rev 4 (aarch64)
YES
D/TEST_NEON: c spend time :22476
D/TEST_NEON: intrinsics spend time :11470
D/TEST_NEON: assembly spend time :1594
intrinsics result compare with c result diff:0.001953
assembly result compare with c result diff:0.999023
I/Adreno: QUALCOMM build : e0ff253, I1b6e53de78
Build Date : 02/16/18
OpenGL ES Shader Compiler Version: XE031.09.00.04
Local Branch :
Remote Branch : quic/gfx-adreno.lnx.1.0.c15-rel
Remote Branch : NONE
Reconstruct Branch : NOTHING
I/OpenGLRenderer: Initialized EGL, version 1.4
D/OpenGLRenderer: Swap behavior 1
I/ViewConfigCompat: Could not find method getScaledScrollFactor() on ViewConfiguration

(2)在海思 hi3559a 直接编译 不添加如何编译选项 

root@hi3559a:~/sxj_test# gcc neon.c
neon.c: In function ‘main’:
neon.c:106:62: warning: implicit declaration of function ‘abs’ [-Wimplicit-function-declaration]
printf("intrinsics result compare with c result diff:%f\n", abs(result-result1));
^
neon.c:106:9: warning: format ‘%f’ expects argument of type ‘double’, but argument 2 has type ‘int’ [-Wformat=]
printf("intrinsics result compare with c result diff:%f\n", abs(result-result1));
^
neon.c:107:9: warning: format ‘%f’ expects argument of type ‘double’, but argument 2 has type ‘int’ [-Wformat=]
printf("assembly result compare with c result diff:%f\n", abs(result-result2));
^
root@hi3559a:~/sxj_test# ./a.out
c spend time :10614
intrinsics spend time :10654
assembly spend time :675

intrinsics result compare with c result diff:0.001953
assembly result compare with c result diff:-3.999023

不优化编译似乎并没有达到 文章提到的“使用neon汇编优化效率最高!是c语言的27倍!”

(3)、尴尬的是,构建的海思芯片ubuntu系统,貌似不支持neon指令

root@hi3559a:~/sxj_test# cat /proc/cpuinfo
processor : 0
BogoMIPS : 100.00
Features : fp asimd evtstrm aes pmull sha1 sha2 crc32
CPU implementer : 0x41
CPU architecture: 8
CPU variant : 0x0
CPU part : 0xd03
CPU revision : 4

如果支持的话 大概会这样显示

cat /proc/cpuinfo
Features : swp half thumb fastmult vfp edsp thumbee vfpv3vfpv3d16

(4)找硬件小组的同事借了一个树莓派raspberry2 可以使用neon

root@raspberrypi:~#  cat /proc/cpuinfo
processor : 0
model name : ARMv7 Processor rev 5 (v7l)
BogoMIPS : 38.40
Features : half thumb fastmult vfp edsp neon vfpv3 tls vfpv4 idiva idivt vfpd32 lpae evtstrm
CPU implementer : 0x41
CPU architecture: 7
CPU variant : 0x0
CPU part : 0xc07
CPU revision : 5

因为测试代码的汇编指令是armv8指令,树莓派是armv7,因此需要修改一下,修改参照语法 (这语法真是挣扎的学习 资料好少) 记录一下~

未完成 还有问题 指令需要修改arm7 还在学习中 待标号10 抄袭完成

g++ -O3 -mcpu=cortex-a7 -mfloat-abi=hard -mfpu=neon-vfpv4 -ffast-math -S neon.c

主要参考:

​https://developer.arm.com/architectures/instruction-sets/intrinsics/​

​https://www.yuque.com/docs/share/3eff70c4-c70f-40df-b0af-df9fa7365d3c?#​

标签:11,0.01328125037252903,0.02421874925494194,0.01171875,Neon,0.04921875149011612,S
From: https://blog.51cto.com/u_12504263/5719208

相关文章