使用python-ddddocr库识别图片文字内容

环境： conda create -n py312 python=3.12 && conda activate py312
依赖安装：pip install ddddocr flask

Python代码:

from flask import Flask, request
import ddddocr
import base64
import json
from PIL import Image
import io

app = Flask(__name__)

@app.route('/')
def index():
    return "Hello, Flask!"
@app.route('/ocr', methods=['POST'])
def ocr():
    # get json data from body
    '''Request example
    {
    "type": "file",
    "image": "./public/images/test.png"
    }
    OR
    {
    "type": "base64",
    "image": "data:image/png;base64,/9j/4AAQSkZJRgAB..."
    }
    '''
    data = request.json
    # print(data)
    img_str = data.get('image', '')
    if not img_str:
        return json.dumps({"error": "No image data provided"}), 400, {'Content-Type': 'application/json'}
    # Decode the base64 string to an image
    if data.get('type', '') == 'base64':
        img_data = decode_base64_to_img(img_str)
    # Read the image from a file
    if data.get('type', '') == 'file':
        with open(img_str, 'rb') as f:
            img_data = f.read()

    # Perform OCR on the image
    ocr = ddddocr.DdddOcr()
    result = ocr.classification(img_data)
    # Return the OCR result
    # print(result)
    return json.dumps({"result": result}), 200, {'Content-Type': 'application/json'}

# Base64 conversion to image
def decode_base64_to_img(base64_str):
    # Remove the prefix if it exists
    if base64_str.startswith("data:image/png;base64,"):
        base64_str = base64_str.replace("data:image/png;base64,", "")
    # Decode the base64 string
    img_data = base64.b64decode(base64_str)
    # Convert bytes to an image
    img = Image.open(io.BytesIO(img_data))
    return img

if __name__ == '__main__':
    app.run(debug=True, host='0.0.0.0', port=5002)

使用阿里云OCR识别

https://ai.aliyun.com/ocr

import ocr_api20210707 from '@alicloud/ocr-api20210707';
import OpenApi from '@alicloud/openapi-client';
import Credential, { Config } from '@alicloud/credentials';
import Util from '@alicloud/tea-util';
import Stream from '@alicloud/darabonba-stream'

class OCRClient {

  static createClient() {
    const credentialsConfig = new Credential.Config({
        type: 'access_key',
        accessKeyId: 'YourAccessKeyId',
        accessKeySecret: 'YourAccessKeySecret',
    });

    const credential = new Credential.default(credentialsConfig);
    let config = new OpenApi.Config({
      credential: credential,
    });

    config.endpoint = `ocr-api.cn-hangzhou.aliyuncs.com`;
    return new ocr_api20210707.default(config);
  }

  static async main(img_type, value) {
    let client = OCRClient.createClient();
    
    let reqData
    if (img_type === 'url') {
        reqData = {
            url: value,
            type: 'General',
        }
    }
    if (img_type === 'file') {
        let img_path = value ? value : './public/captcha.png';
        let bodyStream = Stream.default.readFromFilePath(img_path);
        reqData = {
            body: bodyStream,
            type: 'General'
        }
    }
    let recognizeAllTextRequest = new ocr_api20210707.RecognizeAllTextRequest(reqData);
    
    let runtime = new Util.RuntimeOptions({ });
    try {
      let resp = await client.recognizeAllTextWithOptions(recognizeAllTextRequest, runtime);
      const response = JSON.parse(Util.default.toJSONString(resp));
      return response.body.data;
    } catch (error) {
      console.log(error.message);
      Util.default.assertAsString(error.message);
    }    
  }
}

// exports.OCRClient = OCRClient; // commonjs export
export default OCRClient; // es6 module export
// Call example: 
// OCRClient.main('url','https://cdn.nodjoy.com/temp/author.jpg');
// OCRClient.main('file','./public/captcha.png');

使用tesseract.js识别图片内容

npm install express tesseract.js sharp svg-captcha

import express from "express";
import sharp from "sharp";
import path from "path";
import fs from "fs";
import svgCaptcha from 'svg-captcha';

const app = express();

// 生成图片验证码
app.get('/captcha', async (req, res) => {
  const captcha = svgCaptcha.create({
    size: 4,
    noise: 2,
    color: true,
    background: '#ccf'
  }); 
  // console.log(captcha.data); // <svg>...</svg>
  // console.log(captcha.text); // 'b3j5'

  const pngBuffer = await sharp(Buffer.from(captcha.data)).png().toBuffer();
  const base64 = pngBuffer.toString('base64');
  // save image file to folder
  // const fileName = 'captcha_' + Date.now() + (Math.floor(Math.random() * 1000) + 1000) + '.png';
  // const filePath = path.join('./public', fileName);
  // fs.writeFileSync(filePath, pngBuffer);
  res.json({
    image: `data:image/png;base64,${base64}`,
    success: true
  });
});

app.listen(3000, () => {
  console.log(`Server running...`);
});

// 识别
import Tesseract from "tesseract.js";
const imageBase64 = "data:image/png;base64,iVBORw..."
try {
    // 识别文字
    const result = await Tesseract.recognize(
        imageBase64, // 图片数据
        'eng',       // 可替换为 'chi_sim' 等语言
        {
            // logger: m => console.log(m) // 可选：输出识别进度
        }
    );
    console.log('tesseractOCR识别结果:', result.data.text);
    } catch (err) {
    console.log('tesseractOCR识别出错');
}