脚本实现:
import glob import cv2 import numpy as np import PySimpleGUI as sg from cnocr import CnOcr from threading import Thread running = False def make_window(): layouts = [ [ sg.Input(key="-path-", size=(40, 8), disabled=True), sg.FolderBrowse("选择文件夹") ], [sg.Button("开始识别", key="-start-")], [sg.ProgressBar(max_value=100, key="-progress-")] ] return sg.Window("批量图片文本识别工具", layouts, finalize=True) def ocr(path): files = sum([glob.glob(path + "/" + i) for i in ["*.jpg", "*.jpeg", "*.png"]], []) n = len(files) ocr_model = CnOcr() out_file = path + "/" + "output.txt" for i in files: print(i) out = ocr_model.ocr(cv2.imdecode(np.fromfile(i, dtype=np.uint8), -1)) key = [] for j in range(len(out)): key.append(str(out[j]['text'])) ocrtest = ''.join(key) print(ocrtest) with open(out_file, "a+") as f: try: f.write(str(i)) f.write("\n") f.write(ocrtest) f.write("\n\n\n") except: print("运行失败") if n == 1: print('只有一张图片') continue window["-progress-"].update_bar(int(j / (n - 1) * 100)) global running running = False if __name__ == '__main__': window = make_window() while True: event, values = window.read(timeout=50) if event == sg.WINDOW_CLOSED: break if event == "-start-" and not running: running = True path = window["-path-"].get() Thread(target=ocr, args=(path,), daemon=True).start()
● 相关依赖库安装不了时请自行百度解决;
● 可以实现单一图片的识别,也可以批量进行图片识别;
● 尽量将需要识别的图片单独放在一个目录下,识别后输出的文件在该图片所在的目录下的output.txt文件中,请自行根据实际情况进行提取;
● 识别结果有一定的误差,视具体的图片质量而定,但大多数情况下,没有干扰的图片识别准确率还是挺高的;
版权声明:如无特殊说明,文章均为本站原创,转载请注明出处
本文链接:https://ahuaiph.top/article/ahuaiph_003/
许可协议:署名-非商业性使用 4.0 国际许可协议