脚本实现:

import glob
import cv2
import numpy as np
import PySimpleGUI as sg
from cnocr import CnOcr
from threading import Thread

running = False


def make_window():
    layouts = [
        [
            sg.Input(key="-path-", size=(40, 8), disabled=True),
            sg.FolderBrowse("选择文件夹")
        ],
        [sg.Button("开始识别", key="-start-")],
        [sg.ProgressBar(max_value=100, key="-progress-")]
    ]
    return sg.Window("批量图片文本识别工具", layouts, finalize=True)


def ocr(path):
    files = sum([glob.glob(path + "/" + i) for i in ["*.jpg", "*.jpeg", "*.png"]], [])
    n = len(files)
    ocr_model = CnOcr()
    out_file = path + "/" + "output.txt"
    for i in files:
        print(i)

        out = ocr_model.ocr(cv2.imdecode(np.fromfile(i, dtype=np.uint8), -1))

        key = []
        for j in range(len(out)):
            key.append(str(out[j]['text']))

        ocrtest = ''.join(key)
        print(ocrtest)

        with open(out_file, "a+") as f:
            try:
                f.write(str(i))
                f.write("\n")
                f.write(ocrtest)
                f.write("\n\n\n")
            except:
                print("运行失败")
        if n == 1:
            print('只有一张图片')
            continue
        window["-progress-"].update_bar(int(j / (n - 1) * 100))
    global running
    running = False


if __name__ == '__main__':
    window = make_window()
    while True:
        event, values = window.read(timeout=50)
        if event == sg.WINDOW_CLOSED:
            break
        if event == "-start-" and not running:
            running = True
            path = window["-path-"].get()
            Thread(target=ocr, args=(path,), daemon=True).start()

  ● 相关依赖库安装不了时请自行百度解决;

  ● 可以实现单一图片的识别,也可以批量进行图片识别;

  ● 尽量将需要识别的图片单独放在一个目录下,识别后输出的文件在该图片所在的目录下的output.txt文件中,请自行根据实际情况进行提取;

  ● 识别结果有一定的误差,视具体的图片质量而定,但大多数情况下,没有干扰的图片识别准确率还是挺高的;

图片Alt

图片Alt

版权声明:如无特殊说明,文章均为本站原创,转载请注明出处

本文链接:https://ahuaiph.top/article/ahuaiph_003/

许可协议:署名-非商业性使用 4.0 国际许可协议