一.引言
文字识别,也称为光学字符识别(Optical Character Recognition, OCR),是一种将不同形式的文档(如扫描的纸质文档、PDF文件或数字相机拍摄的图片)中的文字转换成可编辑和可搜索的数据的技术。随着技术的发展,文字识别技术已经成为信息管理、自动化办公和智能系统的关键组成部分。
二.简介
为了易于集成和使用,我们将文字识别OCR封装为DLL(动态链接库)。这种封装方式不仅保留了算法的性能优势,还提供了跨平台和跨语言的兼容性,目前支持编程语言如下:
- C++
- Python
- 易语言
1.C++头文件
#ifndef __SN_OCR__H__
#define __SN_OCR__H__
#include "windows.h"
//返回参数
typedef struct SN_STATU {
int code; //错误码,如果为 0 表示成功,否则表示错误号
char message[4096]; //错误信息,如果为 "OK" 表示成功,否则返回错误信息
}SN_STATU;
/*启动OCR文字识别服务
*
* 参数:
* [in] szOnnxFilePath: 设置 onnx 模型文件路径,如果设置为 NULL,默认和 DLL文件同级目录
* [out] pResult: 返回错误信息,参数pResult->code(错误码)如果为 0 表示成功,否则表示错误号;
*
* 返回值:成功返回0,失败返回错误号,详细错误信息请参考 pResult
*
*/
int WINAPI apiSNInitOCRServer(char* szOnnxFilePath, SN_STATU* pStatu);
/*创建OCR文字识别句柄
*
* 参数:
* [in] szKey: 卡密(购买卡密:https://shop.4yuns.com/links/7C9F16B7)
* [in] pOnnxFilePath:设置 onnx 模型文件路径,如果设置为 NULL,默认和 DLL文件同级目录
* [out] pResult: 返回错误信息,参数pResult->code(错误码)如果为 0 表示成功,否则表示错误号;
*
* 返回值:成功返回句柄,失败返回NULL
*
*/
HANDLE WINAPI apiSNCreateOCRHandle(char* szKey, char* szOnnxFilePath, SN_STATU* pStatu);
/*获取OCR文字识别卡密到期时间
*
* 参数:
* [in] handle: 句柄(通过调用apiSNCreateOCRHandle得到)
* [out] pResult: 返回错误信息,参数pResult->code(错误码)如果为 0 表示成功,否则表示错误号;
*
* 返回值:返回卡密到期时间,失败返回NULL,错误信息请查看参数 pResult->message
*
*/
char* WINAPI apiSNGetKeyExpiresTime(HANDLE handle, SN_STATU* pResult);
/*获取OCR文字识别结果(以json字符串形式返回)
*
* 参数:
* [in] handle: 句柄(通过调用apiSNCreateOCRHandle得到)
* [in] szImageFilePath: 图片路径
* [out] pResult: 返回错误信息,参数pResult->code(错误码)如果为 0 表示成功,否则表示错误号;
*
* 返回值:返回OCR文字识别结果(以json字符串形式返回),失败返回NULL,错误信息请查看参数 pResult->message
*
*/
char* WINAPI apiSNGetOCRFromImage(HANDLE handle, char* szImageFilePath, SN_STATU* pStatu);
/*释放OCR文字识别句柄(释放内存)
*
* 参数:
* [in] handle: 句柄(通过调用apiSNCreateOCRHandle得到)
*
* 返回值:返回 0 表示成功,其他值表示错误号;
*
*/
int WINAPI apiSNDestroyOCRHandle(HANDLE handle);
#endif
2.Python调用dll接口
from ctypes import cdll, c_char_p, Structure, byref
import ctypes
# 定义SN_STATU结构体
class SN_STATU(Structure):
_fields_ = [("code", ctypes.c_int),
("message", c_char_p * 4096)]
# 加载DLL
lib = cdll.LoadLibrary('D://SNOCR.dll')
# 设置函数参数类型
lib.apiSNInitOCRServer.argtypes = [c_char_p, ctypes.POINTER(SN_STATU)]
lib.apiSNInitOCRServer.restype = ctypes.c_int
lib.apiSNCreateOCRHandle.argtypes = [c_char_p, c_char_p, ctypes.POINTER(SN_STATU)]
lib.apiSNCreateOCRHandle.restype = ctypes.c_void_p
lib.apiSNGetKeyExpiresTime.argtypes = [ctypes.c_void_p, ctypes.POINTER(SN_STATU)]
lib.apiSNGetKeyExpiresTime.restype = c_char_p
lib.apiSNGetOCRFromImage.argtypes = [ctypes.c_void_p, c_char_p, ctypes.POINTER(SN_STATU)]
lib.apiSNGetOCRFromImage.restype = c_char_p
lib.apiSNDestroyOCRHandle.argtypes = [ctypes.c_void_p]
lib.apiSNDestroyOCRHandle.restype = ctypes.c_int
# 初始化变量
statu = SN_STATU()
key = b"SNKJe9xffLhdFY7r3TcffXq44ThDVcE3BQFQFfVA9VG4"
onnx_path = b"D://SNOCR.onnx"
image_path = b"D://7.jpg"
# 1. 启动OCR服务
ret = lib.apiSNInitOCRServer(onnx_path, byref(statu))
if ret < 0:
print(f"Error:{statu.message.decode('utf-8')}")
exit()
# 2. 创建OCR句柄
handle = lib.apiSNCreateOCRHandle(key, onnx_path, byref(statu))
if not handle:
print(f"Error:{statu.message.decode('utf-8')}")
exit()
# 3. 获取卡密到期时间
expires_time = lib.apiSNGetKeyExpiresTime(handle, byref(statu))
if not expires_time:
print(f"Error:{statu.message.decode('utf-8')}")
exit()
print(f"Expires Time: {expires_time.decode('utf-8')}")
# 4. 识别OCR,返回Json字符串
ocr_result = lib.apiSNGetOCRFromImage(handle, image_path, byref(statu))
if not ocr_result:
print(f"Error:{statu.message.decode('utf-8')}")
exit()
try:
print(f"OCR Result: {ocr_result.decode('utf-8')}")
except UnicodeDecodeError:
print(f"OCR Result: {ocr_result.decode('GBK')}")
# 5. 释放内存
lib.apiSNDestroyOCRHandle(handle)
# 等待输入,防止程序直接退出
input("Press Enter to exit...")
三.效果演示
1.图片1
识别效果:
{
"type": 0,
"task_id": 1,
"err_code": 0,
"ocr_result": {
"single_result": [{
"left": 24.700000,
"top": 17.333332,
"right": 326.299957,
"bottom": 32.499992,
"str_utf8": "在易语言中,如何为按钮添加点击事件处理代码?",
"rate": "0.995533"
}, {
"left": 23.806435,
"top": 63.233559,
"right": 324.158508,
"bottom": 79.905136,
"str_utf8": "我想在GUI中添加一个文本框,我应该如何操作?",
"rate": "0.967164"
}, {
"left": 25.554037,
"top": 110.492912,
"right": 354.041595,
"bottom": 124.103584,
"str_utf8": "易语言中有哪些控件是常用的,它们各自有什么特点?",
"rate": "0.996690"
}],
"width": "416",
"height": "152"
}
}
2.图片2
识别效果:
{
"type": 0,
"task_id": 1,
"err_code": 0,
"ocr_result": {
"single_result": [{
"left": 323.454163,
"top": 14.279167,
"right": 389.262512,
"bottom": 37.870834,
"str_utf8": "国热点",
"rate": "0.745956"
}, {
"left": 17.981834,
"top": 57.859264,
"right": 258.913910,
"bottom": 75.778633,
"str_utf8": "Python即将成为年度语言,TIOB",
"rate": "0.961854"
}, {
"left": 18.625000,
"top": 86.916664,
"right": 227.845840,
"bottom": 100.575000,
"str_utf8": "12月TIOBE编程语言榜单已发布",
"rate": "0.956302"
}, {
"left": 18.004168,
"top": 130.995834,
"right": 262.612488,
"bottom": 147.758331,
"str_utf8": "传Win11硬件门槛大降,老旧PC",
"rate": "0.990127"
}, {
"left": 17.383333,
"top": 158.933334,
"right": 256.404175,
"bottom": 173.212494,
"str_utf8": "TPM20是未来Windows不可或缺的",
"rate": "0.993834"
}, {
"left": 17.976627,
"top": 203.605774,
"right": 259.543152,
"bottom": 221.703751,
"str_utf8": "OpenAlSora上线即炸服、1条视",
"rate": "0.979807"
}, {
"left": 17.362951,
"top": 231.025940,
"right": 256.424805,
"bottom": 246.494904,
"str_utf8": "作为一款旨在理解和模拟现实的AI基",
"rate": "0.929840"
}, {
"left": 19.866667,
"top": 276.891663,
"right": 247.712463,
"bottom": 293.654175,
"str_utf8": "彻底放弃React!频繁更新和管",
"rate": "0.984215"
}, {
"left": 17.383333,
"top": 304.829163,
"right": 261.991669,
"bottom": 319.108337,
"str_utf8": "React 真的已经不再适合现代开发了吗?",
"rate": "0.973967"
}, {
"left": 18.591219,
"top": 349.496246,
"right": 256.436981,
"bottom": 367.271393,
"str_utf8": "告别VMware!被博通收购后涨价",
"rate": "0.997881"
}, {
"left": 17.949070,
"top": 376.545624,
"right": 266.386017,
"bottom": 392.872589,
"str_utf8": "面对VMware增长十倍的收费,近日英",
"rate": "0.990023"
}, {
"left": 322.172638,
"top": 56.995289,
"right": 555.074097,
"bottom": 75.172531,
"str_utf8": "TOP专家加盟OpenCloudOS年会",
"rate": "0.971594"
}, {
"left": 322.833344,
"top": 86.295837,
"right": 412.854156,
"bottom": 99.954170,
"str_utf8": "睹重磅阵容!",
"rate": "0.936653"
}, {
"left": 322.803558,
"top": 130.880295,
"right": 564.365479,
"bottom": 147.844025,
"str_utf8": "亿级订单系统的数据库查询性能优",
"rate": "0.995852"
}, {
"left": 321.567688,
"top": 157.681427,
"right": 560.010559,
"bottom": 173.852203,
"str_utf8": "为什么没考虑Donis?因为ES是团队应",
"rate": "0.967674"
}, {
"left": 321.591675,
"top": 204.254166,
"right": 469.970825,
"bottom": 221.016663,
"str_utf8": "ChatGPT崩了上热搜",
"rate": "0.984410"
}, {
"left": 322.833252,
"top": 232.191650,
"right": 485.491608,
"bottom": 246.470810,
"str_utf8": "Gemini20发布|极客头条",
"rate": "0.958749"
}, {
"left": 320.350000,
"top": 276.270844,
"right": 564.337524,
"bottom": 294.275000,
"str_utf8": "2024OpenCloudOS年会即将启航",
"rate": "0.951624"
}, {
"left": 320.970825,
"top": 304.829163,
"right": 435.204163,
"bottom": 319.108337,
"str_utf8": "前瞻亮点独家揭秘",
"rate": "0.956549"
}, {
"left": 322.212494,
"top": 350.150000,
"right": 520.879150,
"bottom": 366.912506,
"str_utf8": "降价+豪礼!双十二福利来了",
"rate": "0.984479"
}, {
"left": 320.970825,
"top": 376.845825,
"right": 518.395813,
"bottom": 392.366669,
"str_utf8": "最值得抢购的C++系列精品课程",
"rate": "0.965407"
}],
"width": "596",
"height": "417"
}
}
四.常见问题
1.是否支持多线程
支持
五.更新日志
- 2024.12.15 OCR 文字识别支持C++/Python/易语言