48、mmpose关键点识别模型转ncnn和mnn,并进行训练和部署

标签：scale 48 int float flip shape ncnn mnn cv

基本思想：需要一个关键点识别模型，在权衡simplepose 、paddlepose、mmpose之后，发现simplepose需要mxnet（qiuqiu大佬）支持，paddlepose的部署存在易用性问题，也可能我不熟悉.测试了paddleDetection的关键点检测，模型的确转换很容易转，但还是想迁移到ncnnn和mnn去完成任务，同时还在学习TensorRT基本知识，有个onnx还是必须的，反正找理由不用它而已，哈哈，在测试mmpose感觉还可以，毕竟还有mmploy支持部署，应该不用去改源码支持模型转换，所以选择mmpose模型，完成本次实验和自训练自己的场景

测试图片

48、mmpose关键点识别模型转ncnn和mnn,并进行训练和部署_人工智能

一、先下载mmpose进行环境配置,其他环境参考官方配置Get Started — MMPose 0.28.0 documentation即可

ubuntu@ubuntu:~$ git clone https://github.com/open-mmlab/mmpose.git
ubuntu@ubuntu:~$ pip install mmcv-full==1.6.1 -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html

二、测试识别效果

ubuntu@ubuntu:~/mmpose/demo$ python3 top_down_img_demo_with_mmdet.py mmdetection_cfg/cascade_rcnn_x101_64x4d_fpn_1class.py ../model/cascade_rcnn_x101_64x4d_fpn_20e_macaque-e45e36f5_20210409.pth ../configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/hrnet_w32_macaque_256x192.py ../model/hrnet_w32_macaque_256x192-f7e9e04f_20210407.pth --img-root ./ --img 0.jpg --out-img-root ./
load checkpoint from local path: ../model/cascade_rcnn_x101_64x4d_fpn_20e_macaque-e45e36f5_20210409.pth
load checkpoint from local path: ../model/hrnet_w32_macaque_256x192-f7e9e04f_20210407.pth
/home/ubuntu/.local/lib/python3.8/site-packages/torch/nn/functional.py:718: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at  /pytorch/c10/core/TensorImpl.h:1156.)
  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)

识别结果

48、mmpose关键点识别模型转ncnn和mnn,并进行训练和部署_人工智能_02

三、转onnx模型和简化，这里的模型转换，本菜鸡并未去转它检测模型，因为我想套用之前的fastestDet或者nanodet，这样速度就会提上来，在套个mmpose-landmark就可以，这样即使以后套在rknn上，也比较方便

检测模型cascade_rcnn_x101_64x4d_fpn_20e_macaque-e45e36f5_20210409.pth，就没必要转了，套个nanodet或者fastestDet就行，只转它的关键点检测就好,


    # test a single image, the resulting box is (x1, y1, x2, y2)
    mmdet_results = inference_detector(det_model, image_name)　//替换成其它检测头就行，需要打印出第一次的检测目标位置，作为后期写c++代码数据源图片做数据对比就行
    print(mmdet_results)
    # keep the person class bounding boxes.
    person_results = process_mmdet_results(mmdet_results, args.det_cat_id)

模型输入

48、mmpose关键点识别模型转ncnn和mnn,并进行训练和部署_mnn_03

模型输出

48、mmpose关键点识别模型转ncnn和mnn,并进行训练和部署_深度学习_04

四、模型转ncnn,尽量使用ncnn特性，别想着移植mnn，懒得学，感觉精力应该在FPGA本专业上，唉～


ubuntu@ubuntu:~/mmpose/tools/deployment$ python3 pytorch2onnx.py --config ../../configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/hrnet_w32_macaque_256x192.py --checkpoint ../../model/hrnet_w32_macaque_256x192-f7e9e04f_20210407.pth
pytorch2onnx.py:146: UserWarning: DeprecationWarning: This tool will be deprecated in future. Welcome to use the unified model deployment toolbox MMDeploy: https://github.com/open-mmlab/mmdeploy
  warnings.warn(msg)
load checkpoint from local path: ../../model/hrnet_w32_macaque_256x192-f7e9e04f_20210407.pth
Successfully exported ONNX model: tmp.onnx


ubuntu@ubuntu:~/mmpose/tools/deployment$ python3 -m onnxsim tmp.onnx tmp_sim.onnx 
Simplifying...
Finish! Here is the difference:
┏━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┓
┃            ┃ Original Model ┃ Simplified Model ┃
┡━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩
│ Add        │ 184            │ 184              │
│ Constant   │ 51             │ 0                │
│ Conv       │ 293            │ 293              │
│ Relu       │ 261            │ 261              │
│ Resize     │ 28             │ 28               │
│ Model Size │ 108.8MiB       │ 108.9MiB         │
└────────────┴────────────────┴──────────────────┘


ubuntu@ubuntu:~/MNN/build$ ./MNNConvert -f ONNX --modelFile ~/mmpose/tools/deployment/tmp_sim.onnx --MNNModel ~/mmpose/tools/deployment/tmp_sim.mnn  --bizCode mnn
Start to Convert Other Model Format To MNN Model...
[09:43:17] /home/ubuntu/MNN/tools/converter/source/onnx/onnxConverter.cpp:40: ONNX Model ir version: 6
Start to Optimize the MNN Net...
inputTensors : [ input.1, ]
outputTensors: [ 2947, ]
Converted Success!


ubuntu@ubuntu:~/ncnn/build/install/bin$ ./onnx2ncnn ~/mmpose/tools/deployment/tmp_sim.onnx ~/mmpose/tools/deployment/tmp_sim.param ~/mmpose/tools/deployment/tmp_sim.bin

部分代码查阅地方

/home/ubuntu/mmpose/mmpose/datasets/pipelines/top_down_transform.py　　#预处理开始
/home/ubuntu/mmpose/mmpose/models/heads/topdown_heatmap_base_head.py　#后处理开始
/home/ubuntu/mmpose/mmpose/models/heads/topdown_heatmap_base_head.py　　#后处理

cmakelists.txt

cmake_minimum_required(VERSION 3.16)
project(untitled10)
set(CMAKE_CXX_FLAGS "-std=c++11")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fopenmp ")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
set(CMAKE_CXX_STANDARD 11)
include_directories(${CMAKE_SOURCE_DIR})
include_directories(${CMAKE_SOURCE_DIR}/include)
include_directories(${CMAKE_SOURCE_DIR}/include/ncnn)
find_package(OpenCV REQUIRED)
#message(STATUS ${OpenCV_INCLUDE_DIRS})
#添加头文件
include_directories(${OpenCV_INCLUDE_DIRS})
#链接Opencv库

add_library(libncnn STATIC IMPORTED)
#set_target_properties(libncnn PROPERTIES IMPORTED_LOCATION ${CMAKE_SOURCE_DIR}/lib/libncnn.a)

set_target_properties(libncnn PROPERTIES IMPORTED_LOCATION ${CMAKE_SOURCE_DIR}/lib/libncnn.a)
add_executable(untitled10 main.cpp)
target_link_libraries(untitled10 ${OpenCV_LIBS}  libncnn )

源码


#include <iostream>
#include <algorithm>
#include <vector>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/opencv.hpp>
#include "platform.h"
#include "net.h"
#include "omp.h"

#if NCNN_VULKAN
#include "gpu.h"
#endif // NCNN_VULKAN


struct Keypoints {
    float x;
    float y;
    float score;

    Keypoints() : x(0), y(0), score(0) {}

    Keypoints(float x, float y, float score) : x(x), y(y), score(score) {}
};

struct Box {
    float center_x;
    float center_y;
    float scale_x;
    float scale_y;
    float scale_prob;
    float score;

    Box() : center_x(0), center_y(0), scale_x(0), scale_y(0), scale_prob(0), score(0) {}

    Box(float center_x, float center_y, float scale_x, float scale_y, float scale_prob, float score) :
            center_x(center_x), center_y(center_y), scale_x(scale_x), scale_y(scale_y), scale_prob(scale_prob),
            score(score) {}
};

void bbox_xywh2cs(float bbox[], float aspect_ratio, float padding, float pixel_std, float *center, float *scale) {
    float x = bbox[0];
    float y = bbox[1];
    float w = bbox[2];
    float h = bbox[3];
    *center = x + w * 0.5;
    *(center + 1) = y + h * 0.5;

    if (w > aspect_ratio * h)
        h = w * 1.0 / aspect_ratio;
    else if (w < aspect_ratio * h)
        w = h * aspect_ratio;


    *scale = (w / pixel_std) * padding;
    *(scale + 1) = (h / pixel_std) * padding;

}

void rotate_point(float *pt, float angle_rad, float *rotated_pt) {
    float sn = sin(angle_rad);
    float cs = cos(angle_rad);
    float new_x = pt[0] * cs - pt[1] * sn;
    float new_y = pt[0] * sn + pt[1] * cs;
    rotated_pt[0] = new_x;
    rotated_pt[1] = new_y;

}

void _get_3rd_point(cv::Point2f a, cv::Point2f b, float *direction) {

    float direction_0 = a.x - b.x;
    float direction_1 = a.y - b.y;
    direction[0] = b.x - direction_1;
    direction[1] = b.y + direction_0;


}

void get_affine_transform(float *center, float *scale, float rot, float *output_size, float *shift, bool inv,
                          cv::Mat &trans) {
    float scale_tmp[] = {0, 0};
    scale_tmp[0] = scale[0] * 200.0;
    scale_tmp[1] = scale[1] * 200.0;
    float src_w = scale_tmp[0];
    float dst_w = output_size[0];
    float dst_h = output_size[1];
    float rot_rad = M_PI * rot / 180;
    float pt[] = {0, 0};
    pt[0] = 0;
    pt[1] = src_w * (-0.5);
    float src_dir[] = {0, 0};
    rotate_point(pt, rot_rad, src_dir);
    float dst_dir[] = {0, 0};
    dst_dir[0] = 0;
    dst_dir[1] = dst_w * (-0.5);
    cv::Point2f src[3] = {cv::Point2f(0, 0), cv::Point2f(0, 0), cv::Point2f(0, 0)};
    src[0] = cv::Point2f(center[0] + scale_tmp[0] * shift[0], center[1] + scale_tmp[1] * shift[1]);
    src[1] = cv::Point2f(center[0] + src_dir[0] + scale_tmp[0] * shift[0],
                         center[1] + src_dir[1] + scale_tmp[1] * shift[1]);
    float direction_src[] = {0, 0};
    _get_3rd_point(src[0], src[1], direction_src);
    src[2] = cv::Point2f(direction_src[0], direction_src[1]);
    cv::Point2f dst[3] = {cv::Point2f(0, 0), cv::Point2f(0, 0), cv::Point2f(0, 0)};
    dst[0] = cv::Point2f(dst_w * 0.5, dst_h * 0.5);
    dst[1] = cv::Point2f(dst_w * 0.5 + dst_dir[0], dst_h * 0.5 + dst_dir[1]);
    float direction_dst[] = {0, 0};
    _get_3rd_point(dst[0], dst[1], direction_dst);
    dst[2] = cv::Point2f(direction_dst[0], direction_dst[1]);

    if (inv) {
        trans = cv::getAffineTransform(dst, src);
    } else {
        trans = cv::getAffineTransform(src, dst);
    }


}

void pretty_print(const ncnn::Mat &m, std::vector<float> &vec_heap) {
    for (int q = 0; q < m.c; q++) {
        const float *ptr = m.channel(q);
        for (int z = 0; z < m.d; z++) {
            for (int y = 0; y < m.h; y++) {
                for (int x = 0; x < m.w; x++) {
                    // printf("%f ", ptr[x]);
                    vec_heap.emplace_back(ptr[x]);
                }
                ptr += m.w;
                //printf("\n");
            }
            //printf("\n");
        }
    }
}

void flip_ncnn(const ncnn::Mat m, ncnn::Mat &in_flip) {

    for (int q = 0; q < m.c; q++) {
        float *ptr = (float *) (ncnn::Mat) m.channel(q);
        for (int z = 0; z < m.d; z++) {
            for (int y = 0; y < m.h; y++) {
                for (int x = 0; x < m.w / 2; x++) {

                    float swap = ptr[x];
                    ptr[x] = ptr[m.w - x - 1];
                    ptr[m.w - x - 1] = swap;
                    // printf("%f ", ptr[x]);
                }
                ptr += m.w;
                //printf("\n");
                //break;
            }
            //printf("\n");
            //break;
        }
        //printf("------------------------\n");
        //break;
    }
}

void flip_ncnn_mat(const ncnn::Mat m, ncnn::Mat &flip_in) {
    for (int q = 0; q < m.c; q++) {
        const float *ptr = m.channel(q);
        ncnn::Mat flip_p = flip_in.channel(q);
        float *flip_ptr = (float *) flip_p;
        for (int z = 0; z < m.d; z++) {
            for (int y = 0; y < m.h; y++) {
                for (int x = m.w - 1; x >= 0; x--) {
                    flip_ptr[m.w - 1 - x] = ptr[x];
                    //printf("%f ", ptr[x]);
                }
                ptr += m.w;
                flip_ptr += m.w;
                //printf("\n");
                //break;
            }
            //printf("\n");
            //break;
        }
        //printf("------------------------\n");
        //break;
    }
}

void
transform_preds(std::vector<cv::Point2f> coords, std::vector<Keypoints> &target_coords, float *center, float *scale,
                int w, int h, bool use_udp = false) {
    float scale_x[] = {0, 0};
    float temp_scale[] = {scale[0] * 200, scale[1] * 200};
    if (use_udp) {
        scale_x[0] = temp_scale[0] / (w - 1);
        scale_x[1] = temp_scale[1] / (h - 1);
    } else {
        scale_x[0] = temp_scale[0] / w;
        scale_x[1] = temp_scale[1] / h;
    }
    for (int i = 0; i < coords.size(); i++) {
        target_coords[i].x = coords[i].x * scale_x[0] + center[0] - temp_scale[0] * 0.5;
        target_coords[i].y = coords[i].y * scale_x[1] + center[1] - temp_scale[1] * 0.5;
    }

}

void pretty_exchange_channel(ncnn::Mat &flip_result, int flip_pairs[][2]) {

    for (int i = 0; i < sizeof(flip_pairs) / sizeof(flip_pairs[0]); i++) {
        ncnn::Mat q = flip_result.channel(flip_pairs[i][0]);
        flip_result.channel(flip_pairs[i][0]) = flip_result.channel(flip_pairs[i][1]);
        flip_result.channel(flip_pairs[i][1]) = q;
    }
}

void pretty_print(const ncnn::Mat &m) {
    for (int q = 0; q < m.c; q++) {
        const float *ptr = m.channel(q);
        for (int z = 0; z < m.d; z++) {
            for (int y = 0; y < m.h; y++) {
                for (int x = 0; x < m.w; x++) {
                    printf("%f ", ptr[x]);
                }
                ptr += m.w;
                printf("\n");
            }
            printf("\n");
        }
        printf("------------------------\n");
    }
}

int main(int argc, char **argv) {
    bool flip_test = true;
    bool heap_map= false;
    float keypoint_score=0.3f;
    int flip_pairs[][2] = {{1,  2},{3,  4},{5,  6},{7,  8},
                             {9,  10},{11, 12},{13, 14},{15, 16}};
    cv::Mat bgr = cv::imread("../0.jpg");
    cv::Mat rgb;
    cv::cvtColor(bgr, rgb, cv::COLOR_BGR2RGB);

    float image_target_w = 256;
    float image_target_h = 192;
    float padding = 1.25;
    float pixel_std = 200;
    float aspect_ratio = image_target_h / image_target_w;
    float bbox[] = {2.213932e+02, 1.935179e+02, 9.873443e+02, 1.035825e+03, 9.995332e-01};// 需要检测框架　这个矩形框来自检测框架的坐标　x y w h score
    bbox[2] = bbox[2] - bbox[0];
    bbox[3] = bbox[3] - bbox[1];
    float center[2] = {0, 0};
    float scale[2] = {0, 0};
    bbox_xywh2cs(bbox, aspect_ratio, padding, pixel_std, center, scale);
    float rot = 0;
    float shift[] = {0, 0};
    bool inv = false;
    float output_size[] = {image_target_h, image_target_w};
    cv::Mat trans;
    get_affine_transform(center, scale, rot, output_size, shift, inv, trans);
    std::cout << trans << std::endl;
    cv::Mat detect_image;//= cv::Mat::zeros(image_target_w ,image_target_h, CV_8UC3);
    cv::warpAffine(rgb, detect_image, trans, cv::Size(image_target_h, image_target_w), cv::INTER_LINEAR);

    std::cout << detect_image.cols << " " << detect_image.rows << std::endl;
    //std::cout << detect_image<<std::endl;
    ncnn::Net harnet;

    harnet.load_param("../model/tmp_sim.param");
    harnet.load_model("../model/tmp_sim.bin");

    ncnn::Mat in = ncnn::Mat::from_pixels(detect_image.data, ncnn::Mat::PIXEL_RGB, detect_image.cols,
                                          detect_image.rows);
    // transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    const float mean_vals[3] = {0.485f * 255.f, 0.456f * 255.f, 0.406f * 255.f};
    const float norm_255[3] = {(1 / 0.229f / 255.f), (1 / 0.224f / 255.f), (1 / 0.225f / 255.f)};
    in.substract_mean_normalize(mean_vals, norm_255);

    fprintf(stderr, "input shape: %d %d %d %d\n", in.d, in.h, in.w, in.c);

    ncnn::Extractor ex = harnet.create_extractor();

    ex.input("input.1", in);//input 是 .param文件中输入节点名称

    ncnn::Mat result;

    ex.extract("2947", result);
    fprintf(stderr, "output shape: %d %d %d %d\n", result.d, result.c, result.h, result.w);
    int shape_c = result.c;
    int shape_w = result.w;
    int shape_h = result.h;
    std::vector<float> vec_heap;
    pretty_print(result, vec_heap);
    std::vector<float> vec_flip_heap;
    std::vector<float> vec_result_heap = vec_heap;
    if (flip_test) {
        ncnn::Mat flip_in;
        flip_in.create(in.w, in.h, in.d, in.c);
        //pretty_print(in);
        flip_ncnn_mat(in, flip_in);//flip(3)
        //pretty_print(flip_in);
        fprintf(stderr, "input shape: %d %d %d %d\n", flip_in.d, flip_in.c, flip_in.h, flip_in.w);
        ex.input("input.1", flip_in);//input 是 .param文件中输入节点名称
        ncnn::Mat flip_result;
        ex.extract("2947", flip_result);
        fprintf(stderr, "flip_output shape: %d %d %d %d\n", flip_result.d, flip_result.c, flip_result.h, flip_result.w);
        int flip_shape_c = flip_result.c;
        int flip_shape_w = flip_result.w;
        int flip_shape_h = flip_result.h;

        pretty_exchange_channel(flip_result, flip_pairs);
        pretty_print(flip_result, vec_flip_heap);
#pragma omp parallel for num_threads(omp_get_max_threads())
        for (int i = 0; i < vec_result_heap.size(); i++) {
            vec_result_heap[i] = (vec_heap[i] + vec_flip_heap[i]) / 2;
        }
    }

    std::vector<Keypoints> all_preds;
    std::vector<int> idx;
    for (int i = 0; i < shape_c; i++) {
        auto begin = vec_result_heap.begin() + i * shape_w * shape_h;
        auto end = vec_result_heap.begin() + (i + 1) * shape_w * shape_h;
        float maxValue = *max_element(begin, end);
        int maxPosition = max_element(begin, end) - begin;
        all_preds.emplace_back(Keypoints(0, 0, maxValue));
        idx.emplace_back(maxPosition);
    }
    std::vector<cv::Point2f> vec_point;
    for (int i = 0; i < idx.size(); i++) {
        int x = idx[i] % shape_w;
        int y = idx[i] / shape_w;
        vec_point.emplace_back(cv::Point2f(x, y));
    }


    for (int i = 0; i < shape_c; i++) {
        int px = vec_point[i].x;
        int py = vec_point[i].y;
        if (px > 1 && px < shape_w - 1 && py > 1 && py < shape_h - 1) {
            float diff_0 = vec_heap[py * shape_w + px + 1] - vec_heap[py * shape_w + px - 1];
            float diff_1 = vec_heap[(py + 1) * shape_w + px] - vec_heap[(py - 1) * shape_w + px];
            vec_point[i].x += diff_0 == 0 ? 0 : (diff_0 > 0) ? 0.25 : -0.25;
            vec_point[i].y += diff_1 == 0 ? 0 : (diff_1 > 0) ? 0.25 : -0.25;
        }
    }
    std::vector<Box> all_boxes;
    if(heap_map){
        all_boxes.emplace_back(Box(center[0], center[1], scale[0], scale[1], scale[0] * scale[1] * 400, bbox[4]));
    }
    transform_preds(vec_point, all_preds, center, scale, shape_w, shape_h);
    int skeleton[][2] = {{15, 13},{13, 11},{16, 14},{14, 12},
                         {11, 12},{5,  11},{6,  12},{5,  6},
                         {5,  7},{6,  8},{7,  9},{8,  10},
                         {1,  2},{0,  1},{0,  2},{1,  3},
                         {2,  4},{3,  5},{4,  6}};

    cv::rectangle(bgr, cv::Point(bbox[0], bbox[1]), cv::Point(bbox[0] + bbox[2], bbox[1] + bbox[3]),
                  cv::Scalar(255, 0, 0));
    for (int i = 0; i < all_preds.size(); i++) {
        if(all_preds[i].score>keypoint_score){
            cv::circle(bgr, cv::Point(all_preds[i].x, all_preds[i].y), 3, cv::Scalar(0, 255, 120), -1);//画点，其实就是实心圆
        }
    }
    for (int i = 0; i < sizeof(skeleton) / sizeof(sizeof(skeleton[1])); i++) {
        int x0 = all_preds[skeleton[i][0]].x;
        int y0 = all_preds[skeleton[i][0]].y;
        int x1 = all_preds[skeleton[i][1]].x;
        int y1 = all_preds[skeleton[i][1]].y;

        cv::line(bgr, cv::Point(x0, y0), cv::Point(x1, y1),
                 cv::Scalar(0, 255, 0), 1);

    }
    cv::imshow("image", bgr);
    cv::waitKey(0);
    return 0;
}

识别结果

48、mmpose关键点识别模型转ncnn和mnn,并进行训练和部署_#include_05

第五步：转成mnn模型

ubuntu@ubuntu:~/MNN/build$ ./MNNConvert -f ONNX --modelFile  ~/CLionProjects/untitled1/model/tmp_sim.onnx --MNNModel  ~/CLionProjects/untitled1/model/tmp_sim.mnn --bizCode MNN
Start to Convert Other Model Format To MNN Model...
[13:31:14] /home/ubuntu/MNN/tools/converter/source/onnx/onnxConverter.cpp:40: ONNX Model ir version: 6
Start to Optimize the MNN Net...
inputTensors : [ input.1, ]
outputTensors: [ 2947, ]
Converted Success!

cmakelist.txt

cmake_minimum_required(VERSION 3.16)
project(untitled10)
set(CMAKE_CXX_FLAGS "-std=c++11")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fopenmp ")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
set(CMAKE_CXX_STANDARD 11)
include_directories(${CMAKE_SOURCE_DIR})
include_directories(${CMAKE_SOURCE_DIR}/include)
include_directories(${CMAKE_SOURCE_DIR}/include/MNN)
find_package(OpenCV REQUIRED)
#message(STATUS ${OpenCV_INCLUDE_DIRS})
#添加头文件
include_directories(${OpenCV_INCLUDE_DIRS})
#链接Opencv库

add_library(libmnn SHARED IMPORTED)
#set_target_properties(libncnn PROPERTIES IMPORTED_LOCATION ${CMAKE_SOURCE_DIR}/lib/libncnn.a)

set_target_properties(libmnn PROPERTIES IMPORTED_LOCATION ${CMAKE_SOURCE_DIR}/lib/libMNN.so)
add_executable(untitled10 main.cpp)
target_link_libraries(untitled10 ${OpenCV_LIBS}  libmnn )

源码


#include <iostream>
#include <algorithm>
#include <vector>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/opencv.hpp>
#include<MNN/Interpreter.hpp>
#include<MNN/ImageProcess.hpp>


#if NCNN_VULKAN
#include "gpu.h"
#endif // NCNN_VULKAN


struct Keypoints {
    float x;
    float y;
    float score;

    Keypoints() : x(0), y(0), score(0) {}

    Keypoints(float x, float y, float score) : x(x), y(y), score(score) {}
};

struct Box {
    float center_x;
    float center_y;
    float scale_x;
    float scale_y;
    float scale_prob;
    float score;

    Box() : center_x(0), center_y(0), scale_x(0), scale_y(0), scale_prob(0), score(0) {}

    Box(float center_x, float center_y, float scale_x, float scale_y, float scale_prob, float score) :
            center_x(center_x), center_y(center_y), scale_x(scale_x), scale_y(scale_y), scale_prob(scale_prob),
            score(score) {}
};

void bbox_xywh2cs(float bbox[], float aspect_ratio, float padding, float pixel_std, float *center, float *scale) {
    float x = bbox[0];
    float y = bbox[1];
    float w = bbox[2];
    float h = bbox[3];
    *center = x + w * 0.5;
    *(center + 1) = y + h * 0.5;

    if (w > aspect_ratio * h)
        h = w * 1.0 / aspect_ratio;
    else if (w < aspect_ratio * h)
        w = h * aspect_ratio;


    *scale = (w / pixel_std) * padding;
    *(scale + 1) = (h / pixel_std) * padding;

}

void rotate_point(float *pt, float angle_rad, float *rotated_pt) {
    float sn = sin(angle_rad);
    float cs = cos(angle_rad);
    float new_x = pt[0] * cs - pt[1] * sn;
    float new_y = pt[0] * sn + pt[1] * cs;
    rotated_pt[0] = new_x;
    rotated_pt[1] = new_y;

}

void _get_3rd_point(cv::Point2f a, cv::Point2f b, float *direction) {

    float direction_0 = a.x - b.x;
    float direction_1 = a.y - b.y;
    direction[0] = b.x - direction_1;
    direction[1] = b.y + direction_0;


}

void get_affine_transform(float *center, float *scale, float rot, float *output_size, float *shift, bool inv,
                          cv::Mat &trans) {
    float scale_tmp[] = {0, 0};
    scale_tmp[0] = scale[0] * 200.0;
    scale_tmp[1] = scale[1] * 200.0;
    float src_w = scale_tmp[0];
    float dst_w = output_size[0];
    float dst_h = output_size[1];
    float rot_rad = M_PI * rot / 180;
    float pt[] = {0, 0};
    pt[0] = 0;
    pt[1] = src_w * (-0.5);
    float src_dir[] = {0, 0};
    rotate_point(pt, rot_rad, src_dir);
    float dst_dir[] = {0, 0};
    dst_dir[0] = 0;
    dst_dir[1] = dst_w * (-0.5);
    cv::Point2f src[3] = {cv::Point2f(0, 0), cv::Point2f(0, 0), cv::Point2f(0, 0)};
    src[0] = cv::Point2f(center[0] + scale_tmp[0] * shift[0], center[1] + scale_tmp[1] * shift[1]);
    src[1] = cv::Point2f(center[0] + src_dir[0] + scale_tmp[0] * shift[0],
                         center[1] + src_dir[1] + scale_tmp[1] * shift[1]);
    float direction_src[] = {0, 0};
    _get_3rd_point(src[0], src[1], direction_src);
    src[2] = cv::Point2f(direction_src[0], direction_src[1]);
    cv::Point2f dst[3] = {cv::Point2f(0, 0), cv::Point2f(0, 0), cv::Point2f(0, 0)};
    dst[0] = cv::Point2f(dst_w * 0.5, dst_h * 0.5);
    dst[1] = cv::Point2f(dst_w * 0.5 + dst_dir[0], dst_h * 0.5 + dst_dir[1]);
    float direction_dst[] = {0, 0};
    _get_3rd_point(dst[0], dst[1], direction_dst);
    dst[2] = cv::Point2f(direction_dst[0], direction_dst[1]);

    if (inv) {
        trans = cv::getAffineTransform(dst, src);
    } else {
        trans = cv::getAffineTransform(src, dst);
    }


}


void
transform_preds(std::vector<cv::Point2f> coords, std::vector<Keypoints> &target_coords, float *center, float *scale,
                int w, int h, bool use_udp = false) {
    float scale_x[] = {0, 0};
    float temp_scale[] = {scale[0] * 200, scale[1] * 200};
    if (use_udp) {
        scale_x[0] = temp_scale[0] / (w - 1);
        scale_x[1] = temp_scale[1] / (h - 1);
    } else {
        scale_x[0] = temp_scale[0] / w;
        scale_x[1] = temp_scale[1] / h;
    }
    for (int i = 0; i < coords.size(); i++) {
        target_coords[i].x = coords[i].x * scale_x[0] + center[0] - temp_scale[0] * 0.5;
        target_coords[i].y = coords[i].y * scale_x[1] + center[1] - temp_scale[1] * 0.5;
    }

}

void flip_mnn_tensor(const MNN::Tensor *input, MNN::Tensor *flip_input) {
    int shape_c = input->channel();
    int shape_w = input->width();
    int shape_h = input->height();

    for (int i = 0; i < shape_c; i++) {
        for (int h = 0; h < shape_h; h++) {
            for (int w = shape_w - 1; w >= 0; w--) {
                flip_input->host<float>()[h * shape_w + shape_w - 1 - w] = input->host<float>()[h * shape_w + w];
            }
        }
    }
}

void exchange(MNN::Tensor *flip_heatmapHost, MNN::Tensor *heatmapHost, int flip_pairs[][2]) {
    int shape_c = flip_heatmapHost->channel();
    int shape_w = flip_heatmapHost->width();
    int shape_h = flip_heatmapHost->height();

    for (int i = 0; i < sizeof(flip_pairs) / sizeof(flip_pairs[0]); i++) {
        if (i == 0) {
            for (int h = 0; h < shape_h; h++) {
                for (int w = 0; w < shape_w; w++) {
                    heatmapHost->host<float>()[i * shape_h * shape_w + h * shape_w +
                                               w] = flip_heatmapHost->host<float>()[i * shape_h * shape_w +
                                                                                    h * shape_w + w];
                }
            }
            continue;
        }
        for (int h = 0; h < shape_h; h++) {
            for (int w = 0; w < shape_w; w++) {
                heatmapHost->host<float>()[flip_pairs[i][0] * shape_w * shape_h + h * shape_w +
                                           w] = flip_heatmapHost->host<float>()[flip_pairs[i][1] * shape_w * shape_h +
                                                                                h * shape_w + w];
                heatmapHost->host<float>()[flip_pairs[i][1] * shape_w * shape_h + h * shape_w +
                                           w] = flip_heatmapHost->host<float>()[flip_pairs[i][0] * shape_w * shape_h +
                                                                                h * shape_w + w];
            }
        }
    }
}

int main(int argc, char **argv) {
    bool flip_test = true;
    bool heap_map = false;
    float keypoint_score=0.3f;
    int flip_pairs[][2] = {{1,  2},{3,  4},{5,  6},
                           {7,  8},{9,  10},{11, 12},
                           {13, 14},{15, 16}};
    cv::Mat bgr = cv::imread("../0.jpg");
    cv::Mat rgb;
    cv::cvtColor(bgr, rgb, cv::COLOR_BGR2RGB);

    float image_target_w = 256;
    float image_target_h = 192;
    float padding = 1.25;
    float pixel_std = 200;
    float aspect_ratio = image_target_h / image_target_w;
    float bbox[] = {2.213932e+02, 1.935179e+02, 9.873443e+02, 1.035825e+03, 9.995332e-01};// 需要检测框架　这个矩形框来自检测框架的坐标　x y w h score
    bbox[2] = bbox[2] - bbox[0];
    bbox[3] = bbox[3] - bbox[1];
    float center[2] = {0, 0};
    float scale[2] = {0, 0};
    bbox_xywh2cs(bbox, aspect_ratio, padding, pixel_std, center, scale);
    float rot = 0;
    float shift[] = {0, 0};
    bool inv = false;
    float output_size[] = {image_target_h, image_target_w};
    cv::Mat trans;
    get_affine_transform(center, scale, rot, output_size, shift, inv, trans);
    std::cout << trans << std::endl;
    cv::Mat detect_image;
    cv::warpAffine(rgb, detect_image, trans, cv::Size(image_target_h, image_target_w), cv::INTER_LINEAR);

    std::cout << detect_image.cols << " " << detect_image.rows << std::endl;
    //std::cout << detect_image<<std::endl;

    // MNN inference
    auto mnnNet = std::shared_ptr<MNN::Interpreter>(MNN::Interpreter::createFromFile("../model/tmp_sim.mnn"));
    MNN::ScheduleConfig netConfig;
    netConfig.type = MNN_FORWARD_CPU;
    netConfig.numThread = 4;

    auto session = mnnNet->createSession(netConfig);
    auto input = mnnNet->getSessionInput(session, nullptr);

    mnnNet->resizeTensor(input, {1, 3, (int) image_target_w, (int) image_target_h});
    mnnNet->resizeSession(session);

    MNN::CV::ImageProcess::Config config;

    const float mean_vals[3] = {0.485f * 255.f, 0.456f * 255.f, 0.406f * 255.f};
    const float norm_255[3] = {(1 / 0.229f / 255.f), (1 / 0.224f / 255.f), (1 / 0.225f / 255.f)};

    std::shared_ptr<MNN::CV::ImageProcess> pretreat(
            MNN::CV::ImageProcess::create(MNN::CV::BGR, MNN::CV::BGR, mean_vals, 3,
                                          norm_255, 3));
    pretreat->convert(detect_image.data, (int) image_target_w, (int) image_target_h, detect_image.step[0], input);

    mnnNet->runSession(session);

    auto heatmap = mnnNet->getSessionOutput(session, "2947");

    MNN::Tensor heatmapHost(heatmap, heatmap->getDimensionType());
    heatmap->copyToHostTensor(&heatmapHost);

    std::vector<float> vec_heap;
    for (int i = 0; i < heatmapHost.elementSize(); i++) {
        //std::cout << heatmapHost.host<float>()[i] << " ";
        vec_heap.emplace_back(heatmapHost.host<float>()[i]);
    }

    int shape_c = heatmapHost.channel();
    int shape_w = heatmapHost.width();
    int shape_h = heatmapHost.height();

    std::vector<float> vec_flip_heap;
    std::vector<float> vec_result_heap = vec_heap;
    if (flip_test) {
        auto flip_input = mnnNet->getSessionInput(session, nullptr);
        mnnNet->resizeTensor(flip_input, {1, 3, (int) image_target_w, (int) image_target_h});
        mnnNet->resizeSession(session);

        flip_mnn_tensor(input, flip_input);
//        for (int i = 0; i < flip_input->elementSize(); i++) {
//            std::cout << flip_input->host<float>()[i] << " ";
//        }
        mnnNet->runSession(session);

        auto flip_heatmap = mnnNet->getSessionOutput(session, "2947");

        MNN::Tensor flip_heatmapHost(flip_heatmap, flip_heatmap->getDimensionType());
        MNN::Tensor heatmap(flip_heatmap, flip_heatmap->getDimensionType());
        flip_heatmap->copyToHostTensor(&flip_heatmapHost);


        exchange(&flip_heatmapHost, &heatmap, flip_pairs);
        for (int i = 0; i < flip_heatmapHost.elementSize(); i++) {

            //std::cout << heatmapHost.host<float>()[i] << " ";
            vec_flip_heap.emplace_back(heatmapHost.host<float>()[i]);

        }

        for (int i = 0; i < vec_result_heap.size(); i++) {
            vec_result_heap[i] = (vec_heap[i] + vec_flip_heap[i]) / 2;
        }

    }

    std::vector<Keypoints> all_preds;
    std::vector<int> idx;
    for (int i = 0; i < shape_c; i++) {
        auto begin = vec_result_heap.begin() + i * shape_w * shape_h;
        auto end = vec_result_heap.begin() + (i + 1) * shape_w * shape_h;
        float maxValue = *max_element(begin, end);
        int maxPosition = max_element(begin, end) - begin;
        all_preds.emplace_back(Keypoints(0, 0, maxValue));
        idx.emplace_back(maxPosition);
    }
    std::vector<cv::Point2f> vec_point;

    for (int i = 0; i < idx.size(); i++) {
        int x = idx[i] % shape_w;
        int y = idx[i] / shape_w;
        vec_point.emplace_back(cv::Point2f(x, y));
    }


    for (int i = 0; i < shape_c; i++) {
        int px = vec_point[i].x;
        int py = vec_point[i].y;
        if (px > 1 && px < shape_w - 1 && py > 1 && py < shape_h - 1) {
            float diff_0 = vec_heap[py * shape_w + px + 1] - vec_heap[py * shape_w + px - 1];
            float diff_1 = vec_heap[(py + 1) * shape_w + px] - vec_heap[(py - 1) * shape_w + px];
            vec_point[i].x += diff_0 == 0 ? 0 : (diff_0 > 0) ? 0.25 : -0.25;
            vec_point[i].y += diff_1 == 0 ? 0 : (diff_1 > 0) ? 0.25 : -0.25;
        }
    }
    std::vector<Box> all_boxes;
    if (heap_map) {
        all_boxes.emplace_back(Box(center[0], center[1], scale[0], scale[1], scale[0] * scale[1] * 400, bbox[4]));
    }
    transform_preds(vec_point, all_preds, center, scale, shape_w, shape_h);
    int skeleton[][2] = {{15, 13},
                         {13, 11},
                         {16, 14},
                         {14, 12},
                         {11, 12},
                         {5,  11},
                         {6,  12},
                         {5,  6},
                         {5,  7},
                         {6,  8},
                         {7,  9},
                         {8,  10},
                         {1,  2},
                         {0,  1},
                         {0,  2},
                         {1,  3},
                         {2,  4},
                         {3,  5},
                         {4,  6}};

    cv::rectangle(bgr, cv::Point(bbox[0], bbox[1]), cv::Point(bbox[0] + bbox[2], bbox[1] + bbox[3]),
                  cv::Scalar(255, 0, 0));
    for (int i = 0; i < all_preds.size(); i++) {
        if(all_preds[i].score>keypoint_score){
        cv::circle(bgr, cv::Point(all_preds[i].x, all_preds[i].y), 3, cv::Scalar(0, 255, 120), -1);//画点，其实就是实心圆
        }
    }
    for (int i = 0; i < sizeof(skeleton) / sizeof(sizeof(skeleton[1])); i++) {
        int x0 = all_preds[skeleton[i][0]].x;
        int y0 = all_preds[skeleton[i][0]].y;
        int x1 = all_preds[skeleton[i][1]].x;
        int y1 = all_preds[skeleton[i][1]].y;

        cv::line(bgr, cv::Point(x0, y0), cv::Point(x1, y1),
                 cv::Scalar(0, 255, 0), 1);

    }
    cv::imwrite("image.jpg", bgr);
    cv::waitKey(1);

    mnnNet->releaseModel();
    mnnNet->releaseSession(session);
    return 0;
}

测试结果

48、mmpose关键点识别模型转ncnn和mnn,并进行训练和部署_ubuntu_06

第六步：训练自己的模型，本菜就标注两张图片使用labelme然后转成了官方支持的格式

48、mmpose关键点识别模型转ncnn和mnn,并进行训练和部署_人工智能_07

48、mmpose关键点识别模型转ncnn和mnn,并进行训练和部署_mnn_08

使用lableme标注结果

48、mmpose关键点识别模型转ncnn和mnn,并进行训练和部署_人工智能_09

48、mmpose关键点识别模型转ncnn和mnn,并进行训练和部署_ubuntu_10

然后下面脚本专程mmpose对应的数据集（仿照猴子解析代码写的）,注意数据集我是从0开始编号的，如果你的数据集从1开始编号，要让idx-1 iddx-1

import numpy as np
import json
import glob
import codecs
import os
import cv2
class MyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(MyEncoder, self).default(obj)


class Tococo(object):
    def __init__(self, Path, save_path,keypoints,skeleton,label,verify_flag,verify_path):
        self.verify_flag=verify_flag
        categories= {}
        categories.update({"supercategory":label})
        categories.update({"id": 0})
        categories.update({"name": label})
        categories.update({"keypoints": keypoints})
        self.keypoints_length=len(keypoints)
        categories.update({"skeleton": skeleton})
        self.categories=[categories]
        self.imagePath=None
        self.imageData=None
        self.imageHeight=None
        self.imageWidth=None
        self.imageName=None
        self.imageName_Noext=None
        self.image=[]
        self.annotations = []
        self.jsonfile = glob.glob(os.path.join(Path,"*.json"))
        self.ext="png"
        self.imgfile = glob.glob(os.path.join(Path, "*."+self.ext))
        if len(self.imgfile)==0:
            self.imgfile = glob.glob(os.path.join(Path, "*.jpeg"))
            self.ext = "jpeg"
            if len(self.imgfile)==0:
                self.imgfile = glob.glob(os.path.join(Path, "*.jpg"))
                self.ext = "jpg"
        self.save_path = save_path  # 保存json的路径
        self.class_id = label
        self.coco = {}
        self.path = Path
        self.verify_path=verify_path

    def labelme_to_coco(self):
        for num, json_file in enumerate(self.jsonfile):
            json_file = os.path.join(self.path, json_file)
            full_path, json_file_name = os.path.split(json_file)
            json_file_name_Noext, _ = os.path.splitext(json_file_name)
            full_img=os.path.join(full_path,".".join([json_file_name_Noext,self.ext]))
            data = codecs.open(json_file, 'r')
            data = json.load(data)
            self.imageHeight,self.imageWidth=data["imageHeight"], data["imageWidth"]
            _,self.imageName=os.path.split(data["imagePath"])
            self.imageName_Noext,_=os.path.splitext(self.imageName)
            self.imagePath,self.imageData=data["imagePath"],None
            self.image.append({"file_name":self.imageName,"width":self.imageWidth,"height":self.imageHeight,"id":int(json_file_name_Noext)})
            shapes = data["shapes"]
            annotation = {}  # 一个annotation代表一张图片中的所有samples
            num_keypoints = 0
            keypoints = [0] * 3 * self.keypoints_length  #这里是我们标注的关节点个数

            for shape in shapes:
                min_value_x = self.imageWidth
                min_value_y = self.imageHeight
                max_value_x = 0
                max_value_y = 0
                rectFlag=True
                if not shape['label'].isdigit() and len(shape["points"])!=2:
                    segmentation = []
                    for item in shape["points"]:
                        if rectFlag:
                            min_value_x=min(min_value_x,item[0])
                            min_value_y = min(min_value_y, item[1])
                            max_value_x = max(max_value_x, item[0])
                            max_value_y = max(max_value_y, item[1])

                        segmentation.extend([item[0],item[1]])
                    annotation['segmentation'] = [segmentation]
                    annotation['bbox'] = [min_value_x, min_value_y, max_value_x, max_value_y]
                elif shape['label'].isdigit() and len(shape["points"])==1:
                    idx = int(shape['label'])
                    keypoints[idx* 3 + 0] = int(shape['points'][0][0])
                    keypoints[idx * 3 + 1] = int(shape['points'][0][1])
                    keypoints[idx * 3 + 2] = 2
                    num_keypoints = num_keypoints + 1

                if not shape['label'].isdigit() and len(shape["points"])==2:
                    rectFlag=False
                    min_value_x = min(min_value_x, shape["points"][0][0])
                    min_value_y = min(min_value_y, shape["points"][0][1])
                    max_value_x = max(max_value_x, shape["points"][1][0])
                    max_value_y = max(max_value_y, shape["points"][1][1])

                    annotation['bbox'] = [min_value_x, min_value_y, max_value_x, max_value_y]
            annotation['num_keypoints'] = num_keypoints
            annotation['area'] = (annotation['bbox'][2]-annotation['bbox'][0])*(annotation['bbox'][3]-annotation['bbox'][1])

            annotation['iscrowd'] = 0
            annotation['keypoints'] = keypoints
            annotation['image_id'] = int(json_file_name_Noext)  # 对应的图片ID
            annotation['category_id'] = 1
            annotation['id'] = num  # 对象id
            self.annotations.append(annotation)
            self.image_id = int(json_file_name_Noext)
            if self.verify_flag:
               _path, _img_name = os.path.split(full_img)
               img=cv2.imread(full_img)
               for item in self.categories[0]["skeleton"]:
                   idx=item[0]
                   iddx=item[1]
                   start_point=(annotation['keypoints'][idx  * 3 + 0],annotation['keypoints'][idx * 3 + 1])
                   end_point = (annotation['keypoints'][iddx * 3 + 0], annotation['keypoints'][iddx * 3 + 1])
                   cv2.circle(img, start_point, 3, (255, 0, 0), 3)
                   cv2.circle(img, end_point, 3, (255, 0, 0), 3)
                   cv2.line(img, start_point, end_point, (0, 255, 0), 2)
                   img_f=os.path.join(self.verify_path,_img_name)
                   cv2.imwrite(img_f,img)
        self.coco["images"]= self.image
        self.coco["categories"] = self.categories
        self.coco["annotations"] = self.annotations



    def get_images(self, filename, height, width):
        image = {}
        image["height"] = height
        image['width'] = width
        image["id"] = self.imagePath
        image["file_name"] = filename
        return image

    def get_categories(self, name, class_id):
        category = {}
        category["supercategory"] = self.class_id
        category['id'] = class_id
        category['name'] = name
        return category

    def save_json(self):
        self.labelme_to_coco()
        coco_data = self.coco
        # 保存json文件
        json.dump(coco_data, open(self.save_path, 'w'), indent=4, cls=MyEncoder)  # indent=4 更加美观显示




all_img_json_path = r'/home/ubuntu/Downloads/test'
save_path=r"/home/ubuntu/Downloads/dest/train.json"
verify_path=r"/home/ubuntu/Downloads/verify"
full_path, json_file_name = os.path.split(save_path)
if not os.path.exists(full_path):
    os.mkdir(full_path)

if not os.path.exists(verify_path):
    os.mkdir(verify_path)

keypoints=[ "mouse","eye","dorsal_fins","dorsal_joint_caudal","caudal_fins","caudal_joint_pelvic","pelvic_fins","pectoral_fin"]
skeleton=[[0, 1],[1, 2],[2, 3],[3, 4],[4, 5],[5, 6],[6, 7],[7,0],[2,7]] #故意写的关键点和关键点骨节数不一样
label="fish"
verify_flag=True
cocoData = Tococo(all_img_json_path, save_path, keypoints,skeleton,label,verify_flag,verify_path) 
cocoData.save_json()
print("finish")

代码生成对应的训练、测试、验证json文件，和验证结果图（由于我就标注两张，本菜就把它当作了训练、测试、验证数据集），验证文件主要查看一下，关键点顺序写的对不对,这里我故意将关键点和关键点连接的数量写的不相同，关键点８个，关键点连接９个

48、mmpose关键点识别模型转ncnn和mnn,并进行训练和部署_ubuntu_11

48、mmpose关键点识别模型转ncnn和mnn,并进行训练和部署_#include_12

第七步：开始在mmpose上开始训练模型，需要修改猴子的配置文件

修改第一个配置文件　mmpose/macaque.py at master · open-mmlab/mmpose · GitHub

修改之后的样子为macaque.py,当然也可以自己建立文件修改，我在源文件上修改的

dataset_info = dict(
    dataset_name='macaque',
    paper_info=dict(
        author='Labuguen, Rollyn and Matsumoto, Jumpei and '
        'Negrete, Salvador and Nishimaru, Hiroshi and '
        'Nishijo, Hisao and Takada, Masahiko and '
        'Go, Yasuhiro and Inoue, Ken-ichi and Shibata, Tomohiro',
        title='MacaquePose: A novel "in the wild" macaque monkey pose dataset '
        'for markerless motion capture',
        container='bioRxiv',
        year='2020',
        homepage='http://www.pri.kyoto-u.ac.jp/datasets/'
        'macaquepose/index.html',
    ),
    keypoint_info={
        0:
        dict(name='mouse', id=0, color=[51, 153, 255], type='upper', swap=''),
        1:
        dict(
            name='eye',
            id=1,
            color=[51, 153, 255],
            type='upper',
            swap=''),
        2:
        dict(
            name='dorsal_fins',
            id=2,
            color=[51, 153, 255],
            type='upper',
            swap=''),
        3:
        dict(
            name='dorsal_joint_caudal',
            id=3,
            color=[51, 153, 255],
            type='upper',
            swap=''),
        4:
        dict(
            name='caudal_fins',
            id=4,
            color=[51, 153, 255],
            type='upper',
            swap=''),
        5:
        dict(
            name='caudal_joint_pelvic',
            id=5,
            color=[0, 255, 0],
            type='upper',
            swap=''),
        6:
        dict(
            name='pelvic_fins',
            id=6,
            color=[255, 128, 0],
            type='upper',
            swap=''),
        7:
        dict(
            name='pectoral_fin',
            id=7,
            color=[0, 255, 0],
            type='upper',
            swap='')
    },
    skeleton_info={
        0:
        dict(link=('mouse', 'eye'), id=0, color=[0, 255, 0]),
        1:
        dict(link=('eye', 'dorsal_fins'), id=1, color=[0, 255, 0]),
        2:
        dict(link=('dorsal_fins', 'dorsal_joint_caudal'), id=2, color=[255, 128, 0]),
        3:
        dict(link=('dorsal_joint_caudal', 'caudal_fins'), id=3, color=[255, 128, 0]),
        4:
        dict(link=('caudal_fins', 'caudal_joint_pelvic'), id=4, color=[51, 153, 255]),
        5:
        dict(link=('caudal_joint_pelvic', 'pelvic_fins'), id=5, color=[51, 153, 255]),
        6:
        dict(link=('pelvic_fins', 'pectoral_fin'), id=6, color=[51, 153, 255]),
        7:
        dict(link=('pectoral_fin', 'mouse'),id=7,color=[51, 153, 255]),
        8:
        dict(link=('dorsal_fins', 'pectoral_fin'),id=8,color=[51, 153, 255])
    },
    joint_weights=[
        1., 1., 1., 1., 1., 1., 1., 1.2,1.2
    ],
    sigmas=[
        0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072
    ])

修改文件第二个地方、　https://github.com/open-mmlab/mmpose/blob/master/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/hrnet_w32_macaque_256x192.py

训练集目录

48、mmpose关键点识别模型转ncnn和mnn,并进行训练和部署_深度学习_13

修改原文件的27-35行修改

channel_cfg = dict(
    num_output_channels=8,
    dataset_joints=8,
    dataset_channel=[
        [0, 1, 2, 3, 4, 5, 6, 7],
    ],
    inference_channel=[
        0, 1, 2, 3, 4, 5, 6, 7
    ])

最后几行修改为

data_root = '/home/ubuntu/mmpose/data/macaque'
data = dict(
    samples_per_gpu=64,
    workers_per_gpu=2,
    val_dataloader=dict(samples_per_gpu=32),
    test_dataloader=dict(samples_per_gpu=32),
    train=dict(
        type='AnimalMacaqueDataset',
        ann_file=f'{data_root}/annotations/train.json',
        img_prefix=f'{data_root}/images/',
        data_cfg=data_cfg,
        pipeline=train_pipeline,
        dataset_info={{_base_.dataset_info}}),
    val=dict(
        type='AnimalMacaqueDataset',
        ann_file=f'{data_root}/annotations/test.json',
        img_prefix=f'{data_root}/images/',
        data_cfg=data_cfg,
        pipeline=val_pipeline,
        dataset_info={{_base_.dataset_info}}),
    test=dict(
        type='AnimalMacaqueDataset',
        ann_file=f'{data_root}/annotations/val.json',
        img_prefix=f'{data_root}/images/',
        data_cfg=data_cfg,
        pipeline=test_pipeline,
        dataset_info={{_base_.dataset_info}}),
)

第八步、训练模型，训练过程中遇到一个错误，

训练命令：

ubuntu@ubuntu:~/mmpose/tools$ python3 train.py /home/ubuntu/mmpose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/hrnet_w32_macaque_256x192.py

存在报错信息，不确定是不是我文件修改有问题，我直接改了源码

    self.ious = {(imgId, catId): computeIoU(imgId, catId) \
  File "/home/ubuntu/.local/lib/python3.8/site-packages/xtcocotools/cocoeval（imgId, catId): computeIoU(imgId, catId) \
  File "/home/ubuntu/.local/lib/python3.8/site-packages/xtcocotools/cocoeval（self.score_key)
AttributeError: 'COCOeval（232, 232, 232); background: rgb(249, 249, 249);">else:
    d = np.array(dt['keypoints'])
    k = np.count_nonzero(d[2::3] > 0)

添加了一个行

else:
    d = np.array(dt['keypoints'])
    k = np.count_nonzero(d[2::3] > 0)
　　self.score_key='score'

训练过程：

ubuntu@ubuntu:~/mmpose/tools$ python3 train.py /home/ubuntu/mmpose/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/hrnet_w32_macaque_256x192.py
/home/ubuntu/mmpose/mmpose/utils/setup_env.py:32: UserWarning: Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
  warnings.warn(
/home/ubuntu/mmpose/mmpose/utils/setup_env.py:42: UserWarning: Setting MKL_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
  warnings.warn(
2022-08-27 10:04:26,250 - mmpose - INFO - Environment info:
------------------------------------------------------------
sys.platform: linux
Python: 3.8.10 (default, Jun 22 2022, 20:18:18) [GCC 9.4.0]
CUDA available: True
GPU 0: NVIDIA GeForce RTX 3050 Laptop GPU
CUDA_HOME: /usr/local/cuda
NVCC: Build cuda_11.1.TC455_06.29069683_0
GCC: gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0
PyTorch: 1.9.0+cu111
PyTorch compiling details: PyTorch built with:
  - GCC 7.3
  - C++ Version: 201402
  - Intel(R) Math Kernel Library Version 2020.0.0 Product Build 20191122 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v2.1.2 (Git Hash 98be7e8afa711dc9b66c8ff3504129cb82013cdb)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - NNPACK is enabled
  - CPU capability usage: AVX2
  - CUDA Runtime 11.1
  - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86
  - CuDNN 8.0.5
  - Magma 2.5.2
  - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.1, CUDNN_VERSION=8.0.5, CXX_COMPILER=/opt/rh/devtoolset-7/root/usr/bin/c++, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.9.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, 

TorchVision: 0.10.0+cu111
OpenCV: 4.2.0
MMCV: 1.4.6
MMCV Compiler: GCC 7.3
MMCV CUDA Compiler: 11.1
MMPose: 0.28.1+f7b4b25
------------------------------------------------------------

....
DONE (t=0.00s).
Accumulating eval（t=0.00s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets= 20 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets= 20 ] = -1.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets= 20 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 20 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50      | area=   all | maxDets= 20 ] = -1.000
 Average Recall     (AR) @[ IoU=0.75      | area=   all | maxDets= 20 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = -1.000
2022-08-27 10:12:41,729 - mmpose - INFO - Epoch(val) [210][1] AP: -1.0000, AP .5: -1.0000, AP .75: -1.0000, AP (M): -1.0000, AP (L): -1.0000, AR: -1.0000, AR .5: -1.0000, AR .75: -1.0000, AR (M): -1.0000, AR (L): -1.0000

转换模型参考上述步骤

ubuntu@ubuntu:~/mmpose/tools$ python3 deployment/pytorch2onnx.py --config ../configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/hrnet_w32_macaque_256x192.py --checkpoint /home/ubuntu/mmpose/tools/work_dirs/hrnet_w32_macaque_256x192/latest.pth
ubuntu@ubuntu:~/mmpose/tools$ python3 deployment/pytorch2onnx.py ../configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/macaque/hrnet_w32_macaque_256x192.py /home/ubuntu/mmpose/tools/work_dirs/hrnet_w32_macaque_256x192/latest.pth

ubuntu@ubuntu:~/ncnn/build/install/bin$ ./onnx2ncnn ~/mmpose/tools/tmp.onnx ~/mmpose/tools/tmp.param ~/mmpose/tools/tmp.bin

第九步、这里以ncnn和mnn测试为例

测试代码改一下检测框、模型、骨骼关键点就行

harnet.load_param("../model/fish.param");
harnet.load_model("../model/fish.bin");   
float bbox[] = {24, 12, 549, 398, 9.995332e-01};// 需要检测框架　这个矩形框来自检测框架的坐标　x y w h score　这里来自我自己标注的检测框
int skeleton[][2] = {{0, 1},{1, 2},{2, 3},{3, 4},{4, 5},{5, 6},{6, 7},{7,0},{2,7}};//需要修成对应的关键点

注意，要是前面的标注数据你是从1开始编码的，代码中需要修改这个地方，上面的关键点skeleton也是最小下标为1了

int x0 = all_preds[skeleton[i][0]-1].x;
int y0 = all_preds[skeleton[i][0]-1].y;
int x1 = all_preds[skeleton[i][1]-1].x;
int y1 = all_preds[skeleton[i][1]-1].y;

测试结果

48、mmpose关键点识别模型转ncnn和mnn,并进行训练和部署_ubuntu_14

mnn测试结果,修改雷同

48、mmpose关键点识别模型转ncnn和mnn,并进行训练和部署_人工智能_15

因为前面的检测框数据是我手填的，所以需要套个检测框架如yolov5 fastestDet nanodet 就ok了

附录一个json2xml的数据格式，就可以进行nanodet训练

# -*- coding: utf-8 -*-
import xmltodict
import os
import sys
import json
import io
import os
from xml.dom.minidom import Document

path = '/home/ubuntu/Alan_labelme/train'
destPath = "/home/ubuntu/Alan_labelme/trainData/train"


def file_name(file_dir):
    L = []
    for root, dirs, files in os.walk(file_dir):
        for file in files:
            if os.path.splitext(file)[1] == '.json':
                L.append(os.path.join(root, file))
        return L


m_folder = os.path.basename(path)
print('m_folder=', m_folder)
m_database = 'Unknown'
print('m_database=', m_database)
m_depth = 3
print('m_depth=', m_depth)
m_segmented = 0
print('m_segmented=', m_segmented)

m_pose = 'Unspecified'
print('m_pose=', m_pose)
m_truncated = 0
print('m_truncated=', m_truncated)
m_difficult = 0
print('m_difficult=', m_difficult)
m_segmented = 0
print('m_segmented=', m_segmented)

path_list = file_name(path)
for name in enumerate(path_list):
    m_path = name[1]
    dir = os.path.dirname(m_path)
    print('dir=', dir)

    file_json = io.open(m_path, 'r', encoding='utf-8')
    json_data = file_json.read()
    data = json.loads(json_data)
    m_filename = data['imagePath']
    print('m_filename=', m_filename)
    m_path = os.path.join(dir, m_filename)
    print('m_path=', m_path)
    m_width = data['imageWidth']
    print('m_width=', m_width)
    m_height = data['imageHeight']
    print('m_height=', m_height)
    object_name = os.path.splitext(m_filename)[0]
    new_object_name = object_name + '.xml'
    print(new_object_name)
    doc = Document()  # 创建DOM文档对象
    DOCUMENT = doc.createElement('annotation')  # 创建根元素

    folder = doc.createElement('folder')
    folder_text = doc.createTextNode(m_folder)
    folder.appendChild(folder_text)
    DOCUMENT.appendChild(folder)
    doc.appendChild(DOCUMENT)

    filename = doc.createElement('filename')
    filename_text = doc.createTextNode(m_filename)
    filename.appendChild(filename_text)
    DOCUMENT.appendChild(filename)
    doc.appendChild(DOCUMENT)

    path = doc.createElement('path')
    path_text = doc.createTextNode(m_filename)
    path.appendChild(path_text)
    DOCUMENT.appendChild(path)
    doc.appendChild(DOCUMENT)

    source = doc.createElement('source')
    database = doc.createElement('database')
    database_text = doc.createTextNode(m_database)  # 元素内容写入
    database.appendChild(database_text)
    source.appendChild(database)
    DOCUMENT.appendChild(source)
    doc.appendChild(DOCUMENT)

    size = doc.createElement('size')
    width = doc.createElement('width')
    width_text = doc.createTextNode(str(m_width))  # 元素内容写入
    width.appendChild(width_text)
    size.appendChild(width)

    height = doc.createElement('height')
    height_text = doc.createTextNode(str(m_height))
    height.appendChild(height_text)
    size.appendChild(height)

    depth = doc.createElement('depth')
    depth_text = doc.createTextNode(str(m_depth))
    depth.appendChild(depth_text)
    size.appendChild(depth)

    DOCUMENT.appendChild(size)

    segmented = doc.createElement('segmented')
    segmented_text = doc.createTextNode(str(m_segmented))
    segmented.appendChild(segmented_text)
    DOCUMENT.appendChild(segmented)
    doc.appendChild(DOCUMENT)
    for i in range(len(data['shapes'])):
        if len(data['shapes'][i]['points'])!=2:
            continue
        m_xmin_0 = (data['shapes'][i]['points'][0][0] if (
                data['shapes'][i]['points'][0][0] < data['shapes'][i]['points'][1][0]) else
                    data['shapes'][i]['points'][1][0])
        print('m_xmin_0=', m_xmin_0)
        m_ymin_0 = (data['shapes'][i]['points'][0][1] if (
                data['shapes'][i]['points'][0][1] < data['shapes'][i]['points'][1][1]) else
                    data['shapes'][i]['points'][1][1])
        print('m_ymin_0=', m_ymin_0)
        m_xmax_0 = (data['shapes'][i]['points'][1][0] if (
                data['shapes'][i]['points'][0][0] < data['shapes'][i]['points'][1][0]) else
                    data['shapes'][i]['points'][0][0])
        print('m_xmax_0=', m_xmax_0)
        m_ymax_0 = (data['shapes'][i]['points'][1][1] if (
                data['shapes'][i]['points'][0][1] < data['shapes'][i]['points'][1][1]) else
                    data['shapes'][i]['points'][0][1])
        print('m_ymax_0=', m_ymax_0)
        m_name_0 = data['shapes'][i]['label']
        print('m_name_0=', m_name_0)
        object = doc.createElement('object')
        name = doc.createElement('name')
        name_text = doc.createTextNode(m_name_0)
        name.appendChild(name_text)
        object.appendChild(name)

        pose = doc.createElement('pose')
        pose_text = doc.createTextNode(m_pose)
        pose.appendChild(pose_text)
        object.appendChild(pose)

        truncated = doc.createElement('truncated')
        truncated_text = doc.createTextNode(str(m_truncated))
        truncated.appendChild(truncated_text)
        object.appendChild(truncated)

        difficult = doc.createElement('difficult')
        difficult_text = doc.createTextNode(str(m_difficult))
        difficult.appendChild(difficult_text)
        object.appendChild(difficult)

        bndbox = doc.createElement('bndbox')
        xmin = doc.createElement('xmin')
        xmin_text = doc.createTextNode(str(int(m_xmin_0)))
        xmin.appendChild(xmin_text)
        bndbox.appendChild(xmin)

        ymin = doc.createElement('ymin')
        ymin_text = doc.createTextNode(str(int(m_ymin_0)))
        ymin.appendChild(ymin_text)
        bndbox.appendChild(ymin)

        xmax = doc.createElement('xmax')
        xmax_text = doc.createTextNode(str(int(m_xmax_0)))
        xmax.appendChild(xmax_text)
        bndbox.appendChild(xmax)

        ymax = doc.createElement('ymax')
        ymax_text = doc.createTextNode(str(int(m_ymax_0)))
        ymax.appendChild(ymax_text)
        bndbox.appendChild(ymax)
        object.appendChild(bndbox)

        DOCUMENT.appendChild(object)
        new_path_filename = os.path.join(destPath, new_object_name)
        print('new_path_filename=', new_path_filename)
        f = open(new_path_filename, 'w')

        doc.writexml(f, indent='\t', newl='\n', addindent='\t', encoding='utf-8')
        f.close()

参考

Primate Research Institute, Kyoto University

Get Started — MMPose 0.28.0 documentation

标签：scale,48,int,float,flip,shape,ncnn,mnn,cv
From： https://blog.51cto.com/u_12504263/5719074

48、mmpose关键点识别模型转ncnn和mnn,并进行训练和部署

相关文章

赞助商

阅读排行