7、在Android Studio中调用腾讯开源框架NCNN去实现Yolo-fastest（CPU+GPU）

标签：scale convolution max Yolo Studio NCNN ubuntu threshold ncnn

基本思想：想测试一下yolo-fast在Android的运行时间，因为不太喜欢（https://github.com/dog-qiuqiu/Yolo-Fastest)的源码移植，使用腾讯企鹅的第三方包https://github.com/Tencent/ncnn/releases

本人使用笔记本DELL进行环境构建，测试平板使用咸鱼网淘的~ 双十一~

第一步：创建Android Studio工程，导入opencv4.4模块（java端和c++端,具体参考:6、Android配置opencv4.5及C++ Native Yolo4.0模型检测_sxj731533730）

然后去官网https://github.com/Tencent/ncnn/releases 下载ncnn-android-lib.zip，解压；

ubuntu@ubuntu:$ tree -L 2
.
├── arm64-v8a
│   └── libncnn.a
├── armeabi-v7a
│   └── libncnn.a
├── include
│   └── ncnn
├── x86
│   └── libncnn.a
└── x86_64
    └── libncnn.a

6 directories, 4 files

然后导入Android Studio工程中，本人按照opencv4.4的文件结构导入，未修改文件结构，只做复制操作

7、在Android Studio中调用腾讯开源框架NCNN去实现Yolo-fastest（CPU+GPU）_#include

ubuntu@ubuntu:~/AndroidStudioProjects/ncnn/app/src/main$ tree -L 3
.
├── AndroidManifest.xml
├── cpp
│   ├── CMakeLists.txt
│   ├── include
│   │   ├── glslang
│   │   ├── ncnn
│   │   ├── opencv2
│   │   └── SPIRV
│   └── native-lib.cpp
├── java
│   └── com
│       └── yolofastest
├── jniLibs
│   └── libs
│       ├── arm64-v8a
│       ├── armeabi-v7a
│       ├── x86
│       └── x86_64
└── res
    ├── drawable
    │   └── ic_launcher_background.xml
    ├── drawable-v24
    │   └── ic_launcher_foreground.xml
    ├── layout
    │   └── activity_main.xml
    ├── mipmap-anydpi-v26
    │   ├── ic_launcher_round.xml
    │   └── ic_launcher.xml
    ├── mipmap-hdpi
    │   ├── ic_launcher.png
    │   └── ic_launcher_round.png
    ├── mipmap-mdpi
    │   ├── ic_launcher.png
    │   └── ic_launcher_round.png
    ├── mipmap-xhdpi
    │   ├── ic_launcher.png
    │   └── ic_launcher_round.png
    ├── mipmap-xxhdpi
    │   ├── ic_launcher.png
    │   └── ic_launcher_round.png
    ├── mipmap-xxxhdpi
    │   ├── ic_launcher.png
    │   └── ic_launcher_round.png
    └── values
        ├── colors.xml
        ├── strings.xml
        └── styles.xml

26 directories, 21 files

对应的jniLibs文件夹结构为：

ubuntu@ubuntu:~/AndroidStudioProjects/ncnn/app/src/main/jniLibs$ tree -L 3.
└── libs
    ├── arm64-v8a
    │   ├── libglslang.a
    │   ├── libncnn.a
    │   ├── libOGLCompiler.a
    │   ├── libopencv_java4.so
    │   ├── libOSDependent.a
    │   └── libSPIRV.a
    ├── armeabi-v7a
    │   ├── libglslang.a
    │   ├── libncnn.a
    │   ├── libOGLCompiler.a
    │   ├── libopencv_java4.so
    │   ├── libOSDependent.a
    │   └── libSPIRV.a
    ├── x86
    │   ├── libglslang.a
    │   ├── libncnn.a
    │   ├── libOGLCompiler.a
    │   ├── libopencv_java4.so
    │   ├── libOSDependent.a
    │   └── libSPIRV.a
    └── x86_64
        ├── libglslang.a
        ├── libncnn.a
        ├── libOGLCompiler.a
        ├── libopencv_java4.so
        ├── libOSDependent.a
        └── libSPIRV.a

5 directories, 24 files

修改对应build.gradle文件

7、在Android Studio中调用腾讯开源框架NCNN去实现Yolo-fastest（CPU+GPU）_java_02

     externalNativeBuild {
            cmake {
                cppFlags "-std=c++11"
                //arguments '-DANDROID=c++_shared'
                //abiFilters 'armeabi-v7a','arm64-v8a','x86','x86_64'
            }
            ndk{
                abiFilters 'armeabi-v7a'  // x86 armeabi arm64-v8a x86_64

            }
        }
    }
    sourceSets{
        main{
            jniLibs.srcDirs=["src/main/jniLibs/libs"]
        }
    }

然后配置对应CMakelist.txt文件配置参考了github中的ncnn官方的教程和example测试所得~

# For more information about using CMake with Android Studio, read the
# documentation: https://d.android.com/studio/projects/add-native-code.html

# Sets the minimum version of CMake required to build the native library.

cmake_minimum_required(VERSION 3.4.1)

# Creates and names a library, sets it as either STATIC
# or SHARED, and provides the relative paths to its source code.
# You can define multiple libraries, and CMake builds them for you.
# Gradle automatically packages shared libraries with your APK.

# 添加opencv的头文件目录


include_directories(${CMAKE_SOURCE_DIR}/include)

# 导入opencv的so
add_library(libopencv_java4 SHARED IMPORTED)
set_target_properties(libopencv_java4 PROPERTIES IMPORTED_LOCATION
        ${CMAKE_SOURCE_DIR}/../jniLibs/libs/${ANDROID_ABI}/libopencv_java4.so)

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fopenmp")
if(DEFINED ANDROID_NDK_MAJOR AND ${ANDROID_NDK_MAJOR} GREATER 20)
    set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -static-openmp")
endif()

#导入ncnn
add_library(libncnn STATIC IMPORTED)
set_target_properties(libncnn PROPERTIES IMPORTED_LOCATION
        ${CMAKE_SOURCE_DIR}/../jniLibs/libs/${ANDROID_ABI}/libncnn.a)





find_library(android-lib android)
add_library( # Sets the name of the library.
        native-lib

        # Sets the library as a shared library.
        SHARED

        # Provides a relative path to your source file(s).
        native-lib.cpp )

# Searches for a specified prebuilt library and stores the path as a
# variable. Because CMake includes system libraries in the search path by
# default, you only need to specify the name of the public NDK library
# you want to add. CMake verifies that the library exists before
# completing its build.

find_library( # Sets the name of the path variable.
        log-lib

        # Specifies the name of the NDK library that
        # you want CMake to locate.
        log )

# Specifies libraries CMake should link to your target library. You
# can link multiple libraries, such as libraries you define in this
# build script, prebuilt third-party libraries, or system libraries.

target_link_libraries( # Specifies the target library.
        native-lib
        jnigraphics
        libopencv_java4 # 链接opencv的so
        libncnn #链接ncnn静态的.a
        # Links the target library to the log library
        # included in the NDK.
        ${log-lib}
        ${android-lib}
        )

如果不加这两句

find_library(android-lib android)
target_link_libraries( # Specifies the target library.
        .....
        ${android-lib}
        )

会报这个错误

ncnn/src/net.cpp:650: error: undefined reference to 'AAssetManager_open'
ncnn/src/net.cpp:658: error: undefined reference to 'AAsset_close'
ncnn/src/net.cpp:671: error: undefined reference to 'AAssetManager_open'
ncnn/src/net.cpp:679: error: undefined reference to 'AAsset_close'
ncnn/src/net.cpp:691: error: undefined reference to 'AAssetManager_open'
ncnn/src/net.cpp:699: error: undefined reference to 'AAsset_close'
ncnn/src/datareader.cpp:93: error: undefined reference to 'AAsset_seek'
ncnn/src/datareader.cpp:94: error: undefined reference to 'AAsset_getBuffer'

然后开始在native-lib.cpp中开发使用小企鹅的ncnn 调用fast-yolo模型了；代码完全移植了其中的example文件夹的ncnn 源码：https://github.com/dog-qiuqiu/Yolo-Fastest

#include <jni.h>
#include <string>
#include <iostream>
#include <opencv2/core/core.hpp>
#include <opencv2/opencv.hpp>
#include <ncnn/benchmark.h>
#include <ncnn/cpu.h>
#include <ncnn/datareader.h>
#include <ncnn/net.h>
#include <ncnn/gpu.h>

#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <cstdio>
#include <vector>
#include <algorithm>


using namespace cv;
using namespace std;

extern "C" JNIEXPORT jstring JNICALL
Java_com_yolofastest_ncnn_MainActivity_stringFromJNI(
        JNIEnv* env,
        jobject /* this */) {
    std::string hello = "Hello from C++";

    static const char* class_names[] = {"background",
                                        "aeroplane", "bicycle", "bird", "boat",
                                        "bottle", "bus", "car", "cat", "chair",
                                        "cow", "diningtable", "dog", "horse",
                                        "motorbike", "person", "pottedplant",
                                        "sheep", "sofa", "train", "tvmonitor"
    };

    ncnn::Net detector;
    detector.load_param("yolo-fastest.param");
    detector.load_model("yolo-fastest.bin");
    int detector_size_width  = 320;
    int detector_size_height = 320;
    cv::Mat image=imread("dog.jpg");
    cv::Mat bgr = image.clone();
    int img_w = bgr.cols;
    int img_h = bgr.rows;

    ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB,\
                                                 bgr.cols, bgr.rows, detector_size_width, detector_size_height);

    const float mean_vals[3] = {0.f, 0.f, 0.f};
    const float norm_vals[3] = {1/255.f, 1/255.f, 1/255.f};
    in.substract_mean_normalize(mean_vals, norm_vals);

    ncnn::Extractor ex = detector.create_extractor();

    ex.set_num_threads(8);
    ex.input("data", in);
    ncnn::Mat out;
    ex.extract("output", out);

    for (int i = 0; i < out.h; i++)
    {
        int label;
        float x1, y1, x2, y2, score;
        float pw,ph,cx,cy;
        const float* values = out.row(i);

        x1 = values[2] * img_w;
        y1 = values[3] * img_h;
        x2 = values[4] * img_w;
        y2 = values[5] * img_h;

        score = values[1];
        label = values[0];
        if(x1<0) x1=0;
        if(y1<0) y1=0;
        if(x2<0) x2=0;
        if(y2<0) y2=0;

        if(x1>img_w) x1=img_w;
        if(y1>img_h) y1=img_h;
        if(x2>img_w) x2=img_w;
        if(y2>img_h) y2=img_h;
        cv::rectangle (image, cv::Point(x1, y1), cv::Point(x2, y2), cv::Scalar(255, 255, 0), 1, 1, 0);

        char text[256];
        sprintf(text, "%s %.1f%%", class_names[label], score * 100);
        int baseLine = 0;
        cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
        cv::putText(image, text, cv::Point(x1, y1 + label_size.height),
                    cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
    }

    cv::imwrite("demo.jpg", image);
    return env->NewStringUTF(hello.c_str()); //实际是把坐标框返回去
}

一切修改完成，然后在我的OPPO R9手机上运行Android Studio看看YOLO-FAST识别我们的看门狗（此看门狗非彼看门狗）哈哈哈

7、在Android Studio中调用腾讯开源框架NCNN去实现Yolo-fastest（CPU+GPU）_android_03

CPU版，使用NCNN的库为：ncnn-android-lib 是 android 的静态库(armeabi-v7a + arm64-v8a + x86 + x86_64)

GPU版，使用NCNN的库为：ncnn-android-vulkan-lib 是 android 的静态库(armeabi-v7a + arm64-v8a + x86 + x86_64，包含vulkan支持)

设备需要使用AIDA64.apk 在设备上做检测，设备是否支持vulkan，一般会在“”“设备” 栏目显示是否支持Vukan设备等信息~

7、在Android Studio中调用腾讯开源框架NCNN去实现Yolo-fastest（CPU+GPU）_android_04

7、在Android Studio中调用腾讯开源框架NCNN去实现Yolo-fastest（CPU+GPU）_android_05

如果，你的设备支持该组件，恭喜你你可以在设备上移植NCNN的GPU库，移植方法雷同cpu，我在这个200元的平板上测试，速度达到每帧121ms~~~ （croph*cropw=256*256）

在使用darkent yolo1/2/3/4/fast/*训练自己的模型之后，需要使用ncnn进行转一下格式，

https://github.com/Tencent/ncnn

下载完成之后，进行编译

ubuntu@ubuntu:~$ git clone https://github.com/Tencent/ncnn
ubuntu@ubuntu:~$ cd ncnn
ubuntu@ubuntu:~/ncnn$ mkdir build
ubuntu@ubuntu:~/ncnn$ cd build
ubuntu@ubuntu:~/ncnn/build$ cmake ..
ubuntu@ubuntu:~/ncnn/build$ make -j8
ubuntu@ubuntu:~/ncnn/build$ make install
ubuntu@ubuntu:~/ncnn/build$ cd ncnn/tools/darknet/
ubuntu@ubuntu:~/ncnn/build/tools/darknet$ darknet2ncnn **.cfg **.weight a.param a.bin 1

然后模型就转化成功，即可在我提供的源码中调用了；

我们还可以使用小企鹅对模型进行压缩，从模型的1.2MB压缩到343.3KB，小企鹅威武啊～～～

ubuntu@ubuntu:~/ncnn/build/tools$ ./ncnnoptimize original.param original.bin result.param result.bin 1

生成量化的图片列表

import os
path=r"F:\DEMOTEST\JPEGImages"
my_files = os.listdir(path);
print(my_files)

listFiles = []
for files in my_files:
    if "jpg" in files:
        #print(files)
        patha=os.path.join(path,files)
        listFiles.append(patha)

listFiles.sort()
fo = open("jpglist.txt", "w")
for fileName in listFiles:
    fo.write(fileName + "\n")
    print(fileName)
fo.close()

然后开始开始量化

ubuntu@ubuntu:~/ncnn/build/tools/quantize$ ncnn2table yolov4_opt.param yolov4_opt.bin jpglist.txt result.table mean=[0,0,0] norm=[0.00392,0.00392,0.00392] shape=[416,416,3] pixel=BGR thread=8 method=kl
build histogram 76.47% [ 900 / 1177 ]
build histogram 84.96% [ 1000 / 1177 ]
build histogram 93.46% [ 1100 / 1177 ]
0_34                                     : max = 2.567000         threshold = 2.176560         scale = 58.348949
1_42                                     : max = 228.259766       threshold = 59.349770        scale = 2.139857
2_50                                     : max = 364.625031       threshold = 47.625584        scale = 2.666634
4_63                                     : max = 117.391670       threshold = 39.694206        scale = 3.199459
5_71                                     : max = 149.158264       threshold = 48.177830        scale = 2.636067
7_82                                     : max = 171.264801       threshold = 51.136929        scale = 2.483528
10_97                                    : max = 151.492798       threshold = 59.435772        scale = 2.136760
12_110                                   : max = 67.995026        threshold = 26.344753        scale = 4.820694
13_118                                   : max = 69.356300        threshold = 26.431931        scale = 4.804795
15_129                                   : max = 88.959381        threshold = 34.119919        scale = 3.722166
18_144                                   : max = 74.642670        threshold = 64.310051        scale = 1.974808
20_157                                   : max = 24.993559        threshold = 13.235114        scale = 9.595686
21_165                                   : max = 34.969898        threshold = 22.052551        scale = 5.758971
23_176                                   : max = 35.292606        threshold = 26.236813        scale = 4.840527
26_191                                   : max = 56.171196        threshold = 33.584782        scale = 3.781475
27_201                                   : max = 25.793427        threshold = 16.290916        scale = 7.795755
28_209                                   : max = 22.421173        threshold = 16.744719        scale = 7.584481
29_217                                   : max = 20.924583        threshold = 16.853077        scale = 7.535716
32_250                                   : max = 22.421173        threshold = 16.744719        scale = 7.584481
35_264                                   : max = 24.094851        threshold = 16.488737        scale = 7.702227
36_272                                   : max = 10.068645        threshold = 7.627687         scale = 16.649870
ncnn int8 calibration table create success, best wish for your int8 inference has a low accuracy loss...\(^0^)/...233...
ubuntu@ubuntu:~/ncnn/build/tools/quantize$./ncnn2int8 yolov4_opt.param yolov4_opt.bin  yolov4_opt_int8.param  yolov4_opt_int8.bin  result.table 
quantize_convolution 0_34
quantize_convolution 1_42
quantize_convolution 2_50
quantize_convolution 4_63
quantize_convolution 5_71
quantize_convolution 7_82
quantize_convolution 10_97
quantize_convolution 12_110
quantize_convolution 13_118
quantize_convolution 15_129
quantize_convolution 18_144
quantize_convolution 20_157
quantize_convolution 21_165
quantize_convolution 23_176
quantize_convolution 26_191
quantize_convolution 27_201
quantize_convolution 28_209
quantize_convolution 29_217
quantize_convolution 32_250
quantize_convolution 35_264
quantize_convolution 36_272
fuse_requantize 0_34 1_42
fuse_requantize 1_42 2_50
fuse_requantize 26_191 27_201
fuse_requantize 28_209 29_217
fuse_requantize 35_264 36_272
fuse_requantize 27_201 27_201_bn_leaky_split
mac = 3401761792 = 3401.76 M

致谢小企鹅；

标签：scale,convolution,max,Yolo,Studio,NCNN,ubuntu,threshold,ncnn
From： https://blog.51cto.com/u_12504263/5719118

7、在Android Studio中调用腾讯开源框架NCNN去实现Yolo-fastest（CPU+GPU）

相关文章

赞助商

阅读排行