1. 百度 PaddleOCR 介绍
2. 环境安装
pip install paddlepaddle -i https://pypi.tuna.tsinghua.edu.cn/simple
pip install paddleocr --upgrade PyMuPDF==1.23.8 -i https://mirror.baidu.com/pypi/simple
# 进入 https://github.com/PaddlePaddle/PaddleOCR 有个 requirements.txt #把PyMuPDF一行注释掉
pip install -r requirements.txt -i https://mirror.baidu.com/pypi/simple
3. 用法: my_ocr.py 图片或文件夹
#!/bin/env python
import os
import sys
import time
import logging # 关闭WARNING
from tqdm.auto import trange # 进度条
from paddleocr import PaddleOCR # 百度识别
logging.disable(logging.DEBUG) # 关闭DEBUG日志的打印
logging.disable(logging.WARNING) # 关闭WARNING日志的打印
file = "out.txt"
def ocr_imgs(img):
result = ocr.ocr(img, cls=False)
if len(result[0]) == 0:
result = ocr.ocr(img, cls=False, det=False)
with open(file, 'a') as f:
f.write(f'\n{img.center(50, "-")}\n')
for idx in range(len(result)):
res = result[idx]
for line in res:
if isinstance(line, list):
f.write(f'{line[-1][0]}\n')
elif isinstance(line, tuple):
f.write(f'{line[0]}\n')
# f.flush()
def check_args():
if len(sys.argv) < 2:
print("Usage: %s <path> or <path/file>" % sys.argv[0])
exit()
arg = sys.argv[1]
if os.path.isfile(arg):
arg = os.path.dirname(arg)
single_file = True
elif os.path.isdir(arg):
single_file = False
os.chdir(arg)
os.remove(file) if os.path.exists(file) else False
return single_file
########################################################################
if __name__ == "__main__":
print(f"[{time.strftime('%X')}] 识别开始...")
start = time.time()
imagelist = [os.path.basename(sys.argv[1])] if check_args() else list(filter(os.path.isfile, os.listdir()))
imagelist.sort(key=str.lower)
ocr = PaddleOCR(use_angle_cls=False, lang="ch") # use_angle_cls 竖文字
for i in trange(len(imagelist),leave=False):
image = imagelist[i]
fn, ex = os.path.splitext(image)
if ex in ['.jpg', '.jpeg', '.png']: # bmp/webp/tiff/svg/gif
ocr_imgs(image)
end = time.time()
run_time = round(end - start)
print(f"[{time.strftime('%X')}] 结束耗时{run_time}秒")
cmd="gedit " + file + "&"
os.system(cmd)
文章来源地址https://www.toymoban.com/news/detail-689109.html
文章来源:https://www.toymoban.com/news/detail-689109.html
到了这里,关于ubuntu OCR 脚本的文章就介绍完了。如果您还想了解更多内容,请在右上角搜索TOY模板网以前的文章或继续浏览下面的相关文章,希望大家以后多多支持TOY模板网!