Skip to content

Commit 2b0d214

Browse files
committed
Initial commit
1 parent dc1468a commit 2b0d214

File tree

2 files changed

+108
-1
lines changed

2 files changed

+108
-1
lines changed

README.md

+28-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,31 @@
11
Pytesser
22
========
33

4-
Python wrapper for the tesseract OCR engine. The module is based on OpenCV
4+
Python wrapper for the tesseract OCR engine. The module is based on OpenCV.
5+
Article : http://robindavid.comli.com/pytesser-python-wrapper-for-the-tesseract-ocr-engine/
6+
7+
Informations
8+
------------
9+
10+
There is already multiples module called pytesser, but this one is slightly different on the following point:
11+
12+
* It implement all the features of tesseract engine it includes the choise of the language and the page segmentation mode.
13+
* All the module is contained in one file (the others modules I have tried are quite messy.
14+
* It support OpenCV, so you can directly provide an IplImage to the module.
15+
16+
How to use it ?
17+
---------------
18+
19+
There is to ways to use it. Either you give it a filename, either directly an IplImage. For a filename you can do:
20+
21+
import pytesser
22+
txt = pytesser.image_to_string("myimage.jpg") #By default language is eng, and page seg mode auto
23+
24+
#To give specifify parameters:
25+
txt = pytesser.image_to_string("myimage.jpg","fra",pytesser.PSM_SINGLE_WORD) #Analyse image as a single french word
26+
27+
28+
Or you can directly give it an IplImage like this:
29+
30+
image = cv.LoadImage("myimage.jpg")
31+
txt = pytesser.iplimage_to_string(image)

pytesser.py

+80
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
try:
2+
import cv2.cv as cv
3+
OPENCV_AVAILABLE = True
4+
except ImportError:
5+
OPENCV_AVAILABLE = False
6+
7+
from subprocess import Popen, PIPE
8+
import os
9+
10+
PROG_NAME = 'tesseract'
11+
TEMP_IMAGE = 'tmp.bmp'
12+
TEMP_FILE = 'tmp'
13+
14+
#All the PSM arguments as a variable name (avoid having to know them)
15+
PSM_OSD_ONLY = 0
16+
PSM_SEG_AND_OSD = 1
17+
PSM_SEG_ONLY = 2
18+
PSM_AUTO = 3
19+
PSM_SINGLE_COLUMN = 4
20+
PSM_VERTICAL_ALIGN = 5
21+
PSM_UNIFORM_BLOCK = 6
22+
PSM_SINGLE_LINE = 7
23+
PSM_SINGLE_WORD = 8
24+
PSM_SINGLE_WORD_CIRCLE = 9
25+
PSM_SINGLE_CHAR = 10
26+
27+
class TesseractException(Exception): #Raised when tesseract does not return 0
28+
pass
29+
30+
class TesseractNotFound(Exception): #When tesseract is not found in the path
31+
pass
32+
33+
def check_path(): #Check if tesseract is in the path raise TesseractNotFound otherwise
34+
for path in os.environ.get('PATH', '').split(':'):
35+
filepath = os.path.join(path, PROG_NAME)
36+
if os.path.exists(filepath) and not os.path.isdir(filepath):
37+
return True
38+
raise TesseractNotFound
39+
40+
def process_request(input_file, output_file, lang=None, psm=None):
41+
args = [PROG_NAME, input_file, output_file] #Create the arguments
42+
if lang is not None:
43+
args.append("-l")
44+
args.append(lang)
45+
if psm is not None:
46+
args.append("-psm")
47+
args.append(str(psm))
48+
proc = Popen(args, stdout=PIPE, stderr=PIPE) #Open process
49+
ret = proc.communicate() #Launch it
50+
51+
code = proc.returncode
52+
if code != 0:
53+
if code == 2:
54+
raise TesseractException, "File not found"
55+
if code == -11:
56+
raise TesseractException, "Language code invalid: "+ret[1]
57+
else:
58+
raise TesseractException, ret[1]
59+
60+
def iplimage_to_string(im, lang=None, psm=None):
61+
if not OPENCV_AVAILABLE:
62+
print "OpenCV not Available"
63+
return -1
64+
else:
65+
cv.SaveImage(TEMP_IMAGE, im)
66+
txt = image_to_string(TEMP_IMAGE, lang, psm)
67+
os.remove(TEMP_IMAGE)
68+
return txt
69+
70+
def image_to_string(file,lang=None, psm=None):
71+
check_path() #Check if tesseract available in the path
72+
process_request(file, TEMP_FILE, lang, psm) #Process command
73+
f = open(TEMP_FILE+".txt","r") #Open back the file
74+
txt = f.read()
75+
os.remove(TEMP_FILE+".txt")
76+
return txt
77+
78+
79+
if __name__ =='__main__':
80+
print image_to_string("image.jpg", "fra", PSM_AUTO) #Example

0 commit comments

Comments
 (0)