1
+ try :
2
+ import cv2 .cv as cv
3
+ OPENCV_AVAILABLE = True
4
+ except ImportError :
5
+ OPENCV_AVAILABLE = False
6
+
7
+ from subprocess import Popen , PIPE
8
+ import os
9
+
10
+ PROG_NAME = 'tesseract'
11
+ TEMP_IMAGE = 'tmp.bmp'
12
+ TEMP_FILE = 'tmp'
13
+
14
+ #All the PSM arguments as a variable name (avoid having to know them)
15
+ PSM_OSD_ONLY = 0
16
+ PSM_SEG_AND_OSD = 1
17
+ PSM_SEG_ONLY = 2
18
+ PSM_AUTO = 3
19
+ PSM_SINGLE_COLUMN = 4
20
+ PSM_VERTICAL_ALIGN = 5
21
+ PSM_UNIFORM_BLOCK = 6
22
+ PSM_SINGLE_LINE = 7
23
+ PSM_SINGLE_WORD = 8
24
+ PSM_SINGLE_WORD_CIRCLE = 9
25
+ PSM_SINGLE_CHAR = 10
26
+
27
+ class TesseractException (Exception ): #Raised when tesseract does not return 0
28
+ pass
29
+
30
+ class TesseractNotFound (Exception ): #When tesseract is not found in the path
31
+ pass
32
+
33
+ def check_path (): #Check if tesseract is in the path raise TesseractNotFound otherwise
34
+ for path in os .environ .get ('PATH' , '' ).split (':' ):
35
+ filepath = os .path .join (path , PROG_NAME )
36
+ if os .path .exists (filepath ) and not os .path .isdir (filepath ):
37
+ return True
38
+ raise TesseractNotFound
39
+
40
+ def process_request (input_file , output_file , lang = None , psm = None ):
41
+ args = [PROG_NAME , input_file , output_file ] #Create the arguments
42
+ if lang is not None :
43
+ args .append ("-l" )
44
+ args .append (lang )
45
+ if psm is not None :
46
+ args .append ("-psm" )
47
+ args .append (str (psm ))
48
+ proc = Popen (args , stdout = PIPE , stderr = PIPE ) #Open process
49
+ ret = proc .communicate () #Launch it
50
+
51
+ code = proc .returncode
52
+ if code != 0 :
53
+ if code == 2 :
54
+ raise TesseractException , "File not found"
55
+ if code == - 11 :
56
+ raise TesseractException , "Language code invalid: " + ret [1 ]
57
+ else :
58
+ raise TesseractException , ret [1 ]
59
+
60
+ def iplimage_to_string (im , lang = None , psm = None ):
61
+ if not OPENCV_AVAILABLE :
62
+ print "OpenCV not Available"
63
+ return - 1
64
+ else :
65
+ cv .SaveImage (TEMP_IMAGE , im )
66
+ txt = image_to_string (TEMP_IMAGE , lang , psm )
67
+ os .remove (TEMP_IMAGE )
68
+ return txt
69
+
70
+ def image_to_string (file ,lang = None , psm = None ):
71
+ check_path () #Check if tesseract available in the path
72
+ process_request (file , TEMP_FILE , lang , psm ) #Process command
73
+ f = open (TEMP_FILE + ".txt" ,"r" ) #Open back the file
74
+ txt = f .read ()
75
+ os .remove (TEMP_FILE + ".txt" )
76
+ return txt
77
+
78
+
79
+ if __name__ == '__main__' :
80
+ print image_to_string ("image.jpg" , "fra" , PSM_AUTO ) #Example
0 commit comments