使用python-ddddocr库识别图片文字内容
环境: conda create -n py312 python=3.12 && conda activate py312
依赖安装:pip install ddddocr flask
Python代码:
from flask import Flask, request
import ddddocr
import base64
import json
from PIL import Image
import io
app = Flask(__name__)
@app.route('/')
def index():
return "Hello, Flask!"
@app.route('/ocr', methods=['POST'])
def ocr():
# get json data from body
'''Request example
{
"type": "file",
"image": "./public/images/test.png"
}
OR
{
"type": "base64",
"image": "..."
}
'''
data = request.json
# print(data)
img_str = data.get('image', '')
if not img_str:
return json.dumps({"error": "No image data provided"}), 400, {'Content-Type': 'application/json'}
# Decode the base64 string to an image
if data.get('type', '') == 'base64':
img_data = decode_base64_to_img(img_str)
# Read the image from a file
if data.get('type', '') == 'file':
with open(img_str, 'rb') as f:
img_data = f.read()
# Perform OCR on the image
ocr = ddddocr.DdddOcr()
result = ocr.classification(img_data)
# Return the OCR result
# print(result)
return json.dumps({"result": result}), 200, {'Content-Type': 'application/json'}
# Base64 conversion to image
def decode_base64_to_img(base64_str):
# Remove the prefix if it exists
if base64_str.startswith("data:image/png;base64,"):
base64_str = base64_str.replace("data:image/png;base64,", "")
# Decode the base64 string
img_data = base64.b64decode(base64_str)
# Convert bytes to an image
img = Image.open(io.BytesIO(img_data))
return img
if __name__ == '__main__':
app.run(debug=True, host='0.0.0.0', port=5002)
使用阿里云OCR识别
import ocr_api20210707 from '@alicloud/ocr-api20210707';
import OpenApi from '@alicloud/openapi-client';
import Credential, { Config } from '@alicloud/credentials';
import Util from '@alicloud/tea-util';
import Stream from '@alicloud/darabonba-stream'
class OCRClient {
static createClient() {
const credentialsConfig = new Credential.Config({
type: 'access_key',
accessKeyId: 'YourAccessKeyId',
accessKeySecret: 'YourAccessKeySecret',
});
const credential = new Credential.default(credentialsConfig);
let config = new OpenApi.Config({
credential: credential,
});
config.endpoint = `ocr-api.cn-hangzhou.aliyuncs.com`;
return new ocr_api20210707.default(config);
}
static async main(img_type, value) {
let client = OCRClient.createClient();
let reqData
if (img_type === 'url') {
reqData = {
url: value,
type: 'General',
}
}
if (img_type === 'file') {
let img_path = value ? value : './public/captcha.png';
let bodyStream = Stream.default.readFromFilePath(img_path);
reqData = {
body: bodyStream,
type: 'General'
}
}
let recognizeAllTextRequest = new ocr_api20210707.RecognizeAllTextRequest(reqData);
let runtime = new Util.RuntimeOptions({ });
try {
let resp = await client.recognizeAllTextWithOptions(recognizeAllTextRequest, runtime);
const response = JSON.parse(Util.default.toJSONString(resp));
return response.body.data;
} catch (error) {
console.log(error.message);
Util.default.assertAsString(error.message);
}
}
}
// exports.OCRClient = OCRClient; // commonjs export
export default OCRClient; // es6 module export
// Call example:
// OCRClient.main('url','https://cdn.nodjoy.com/temp/author.jpg');
// OCRClient.main('file','./public/captcha.png');
使用tesseract.js识别图片内容
npm install express tesseract.js sharp svg-captcha
import express from "express";
import sharp from "sharp";
import path from "path";
import fs from "fs";
import svgCaptcha from 'svg-captcha';
const app = express();
// 生成图片验证码
app.get('/captcha', async (req, res) => {
const captcha = svgCaptcha.create({
size: 4,
noise: 2,
color: true,
background: '#ccf'
});
// console.log(captcha.data); // <svg>...</svg>
// console.log(captcha.text); // 'b3j5'
const pngBuffer = await sharp(Buffer.from(captcha.data)).png().toBuffer();
const base64 = pngBuffer.toString('base64');
// save image file to folder
// const fileName = 'captcha_' + Date.now() + (Math.floor(Math.random() * 1000) + 1000) + '.png';
// const filePath = path.join('./public', fileName);
// fs.writeFileSync(filePath, pngBuffer);
res.json({
image: `data:image/png;base64,${base64}`,
success: true
});
});
app.listen(3000, () => {
console.log(`Server running...`);
});
// 识别
import Tesseract from "tesseract.js";
const imageBase64 = "..."
try {
// 识别文字
const result = await Tesseract.recognize(
imageBase64, // 图片数据
'eng', // 可替换为 'chi_sim' 等语言
{
// logger: m => console.log(m) // 可选:输出识别进度
}
);
console.log('tesseractOCR识别结果:', result.data.text);
} catch (err) {
console.log('tesseractOCR识别出错');
}