以前我简单介绍过tesseract-wasm,基于此wasm 包我们可以直接基于nodejs 调用tesseract 的方法实现ocr 处理,以下是一个简单的demo
基于fastify 开发了一个简单的api,同时包含了一个简单的web 可以测试
项目结构
- package.json
{
"name": "tesseract",
"version": "1.0.0",
"main": "index.js",
"license": "MIT",
"dependencies": {
"@fastify/static": "^6.12.0",
"fastify": "^4.24.3",
"fastify-file-upload": "^4.0.0",
"sharp": "^0.32.6",
"tesseract-wasm": "^0.10.0"
},
"scripts": {
"dev": "node demo.mjs"
}
}
- demo.mjs
import { readFileSync } from "node:fs";
import { fileURLToPath } from "node:url";
import path from "node:path"
import { fastify } from "fastify";
import { createOCREngine } from "tesseract-wasm";
import { loadWasmBinary } from "tesseract-wasm/node";
import sharp from "sharp";
import fileUpload from 'fastify-file-upload'
import {fastifyStatic} from '@fastify/static'
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
async function loadImage(path) {
const image = await sharp(path).ensureAlpha();
const { width, height } = await image.metadata();
return {
data: await image.raw().toBuffer(),
width,
height,
};
}
/** Resolve a URL relative to the current module. */
function resolve(path) {
return fileURLToPath(new URL(path, import.meta.url).href);
}
const wasmBinary = await loadWasmBinary();
// 基于wasm创建引擎
const engine = await createOCREngine({ wasmBinary });
// 加载中文模型
const model = readFileSync("chi_sim.traineddata");
engine.loadModel(model);
const app = fastify({ logger: true });
// fastify 文件处理插件
app.register(fileUpload)
// 静态文件插件,注册简单测试页面
app.register(fastifyStatic, {
root: path.join(__dirname, 'public'),
prefix: '/', // optional: default '/'
})
// ocr 服务调用
app.post('/ocr', async function (req, reply) {
// some code to handle file
console.log(`starting index`, Date.now().toLocaleString());
const file = req.body.file
const image = await loadImage(file.data);
engine.loadImage(image);
const text = engine.getText((progress) => {
console.log(`\rRecognizing text (${progress}% done)...`);
});
console.log(`ending`, Date.now().toLocaleString());
reply.send({
code: 200,
text: text,
});
})
app.listen({
port: 3000,
host: "0.0.0.0"
}, (err, address) => {
if (err) {
app.log.error(err)
process.exit(1)
}
app.log.info(`server listening on ${address}`)
})
- 静态页面
index.html
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>OCR Demo</title>
<style>
body {
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
height: 100vh;
margin: 0;
padding: 0;
background-color: #f0f0f0;
}
#file-upload {
margin-top: 20px;
}
#display-area {
display: flex;
justify-content: space-around;
width: 100%;
}
#image-display img {
width: 100%;
height: 100%;
height: auto;
}
#image-display,
#text-display {
width: 500px;
height: 500px;
overflow: auto;
}
</style>
</head>
<body>
<input type="file" id="file-upload" accept="image/*">
<div id="display-area">
<div id="image-display"></div>
<div id="text-display"></div>
</div>
<script type="module" src="my.js"></script>
</body>
</html>
- my.js
处理接口调用进行显示处理 - docker 集成
FROM node:18.18.2-bullseye-slim
WORKDIR /app
COPY package.json /app/package.json
COPY yarn.lock /app/yarn.lock
COPY demo.mjs /app/demo.mjs
COPY public/ /app/public
COPY chi_sim.traineddata /app/chi_sim.traineddata
RUN yarn
EXPOSE 3000
ENTRYPOINT [ "node","demo.mjs" ]
- 启动&&效果
启动
yarn dev 或者docker-compose up -d
效果
说明
简单demo 我已经push 到docker hub了,可以直接使用dalongrong/tesseract-wasm:ocr-web
启动方式
docker run -d -p 3000:3000 dalongrong/tesseract-wasm:ocr-web
以上只是一个简单的示例,可以参考调整,目前来说并ocr 识别并不是很快
参考资料
https://fastify.dev/
https://github.com/huangang/fastify-file-upload
https://github.com/tesseract-ocr/tesseract
https://github.com/robertknight/tesseract-wasm
https://github.com/robertknight/tesseract-wasm/tree/main/examples
https://github.com/libvips/libvips
https://github.com/lovell/sharp
https://github.com/rongfengliang/tesseract-wasm-learning
https://flaviocopes.com/fix-dirname-not-defined-es-module-scope/