-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathExtraJobs.py
65 lines (54 loc) · 2.41 KB
/
ExtraJobs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
from __future__ import annotations
import typing
import pytesseract
import paddleocr
from IR import IIR
from Util import *
from AbstractFlagIndex import *
from IR import *
from Strategies.AbstractStrategy import *
class IIROcrPass(IIRPass):
def __init__(self, config: dict, dest: str, strategy: AbstractOcrStrategy):
self.config: dict = config
self.dest: str = dest
self.strategy: AbstractOcrStrategy = strategy
self.suffix: str = config["suffix"]
self.separator: str = config["separator"]
self.doPaddle: bool = config["doPaddle"]
self.paddleLang: str = config["paddleLang"]
self.doTeseract: bool = config["doTesseract"]
self.tesseractLang: str = config["tesseractLang"]
self.filename = self.dest + self.suffix
def apply(self, iir: IIR):
file = open(self.filename, "w")
print(f"Writing to {self.filename}")
paddle = paddleocr.PaddleOCR(use_angle_cls=True, lang=self.paddleLang, show_log=False)
ocrFrameFlagIndex: AbstractFlagIndex = self.strategy.getOcrFrameFlagIndex()
for i, interval in enumerate(iir.intervals):
buff: str = ""
name: str = interval.getName(i)
frame: av.frame.Frame = interval.getFlag(ocrFrameFlagIndex)
img: cv.Mat = avFrame2CvMat(frame)
if self.doPaddle:
paddleFrame = self.strategy.cutOcrFrame(img)
paddleResult = paddle.ocr(paddleFrame, cls=False, bin=False)
paddleText: str = ""
for line in paddleResult:
if line is None:
continue
lineText = "".join([wordInfo[1][0] for wordInfo in line])
paddleText += lineText + '\n'
paddleText = paddleText.strip()
buff += paddleText
if self.doTeseract:
if buff != "":
buff += self.separator
tesseractFrame = self.strategy.cutCleanOcrFrame(img)
tesseractFrame = ensureMat(tesseractFrame)
tesseractText: str = pytesseract.image_to_string(tesseractFrame, config=f"-l {self.tesseractLang} --psm 6")
tesseractText = tesseractText[:-1].replace("\n", "")
buff += tesseractText
file.write(f"{name},{buff}\n")
if i % 10 == 0:
print(name)
file.close()