-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsamples.py
189 lines (161 loc) · 4.95 KB
/
samples.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
# samples.py
# ----------
# Licensing Information: Please do not distribute or publish solutions to this
# project. You are free to use and extend these projects for educational
# purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
# John DeNero ([email protected]) and Dan Klein ([email protected]).
# For more info, see http://inst.eecs.berkeley.edu/~cs188/sp09/pacman.html
import util
## Constants
DATUM_WIDTH = 0 # in pixels
DATUM_HEIGHT = 0 # in pixels
## Module Classes
class Datum:
"""
A datum is a pixel-level encoding of digits or face/non-face edge maps.
Digits are from the MNIST dataset and face images are from the
easy-faces and background categories of the Caltech 101 dataset.
Each digit is 28x28 pixels, and each face/non-face image is 60x74
pixels, each pixel can take the following values:
0: no edge (blank)
1: gray pixel (+) [used for digits only]
2: edge [for face] or black pixel [for digit] (#)
Pixel data is stored in the 2-dimensional array pixels, which
maps to pixels on a plane according to standard euclidean axes
with the first dimension denoting the horizontal and the second
the vertical coordinate:
28 # # # # # #
27 # # # # # #
.
.
.
3 # # + # # #
2 # # # # # #
1 # # # # # #
0 # # # # # #
0 1 2 3 ... 27 28
For example, the + in the above diagram is stored in pixels[2][3], or
more generally pixels[column][row].
The contents of the representation can be accessed directly
via the getPixel and getPixels methods.
"""
def __init__(self, data,width,height):
"""
Create a new datum from file input (standard MNIST encoding).
"""
DATUM_HEIGHT = height
DATUM_WIDTH=width
self.height = DATUM_HEIGHT
self.width = DATUM_WIDTH
if data == None:
data = [[' ' for i in range(DATUM_WIDTH)] for j in range(DATUM_HEIGHT)]
self.pixels = util.arrayInvert(convertToInteger(data))
def getPixel(self, column, row):
"""
Returns the value of the pixel at column, row as 0, or 1.
"""
return self.pixels[column][row]
def getPixels(self):
"""
Returns all pixels as a list of lists.
"""
return self.pixels
def getAsciiString(self):
"""
Renders the data item as an ascii image.
"""
rows = []
data = util.arrayInvert(self.pixels)
for row in data:
ascii = map(asciiGrayscaleConversionFunction, row)
rows.append( "".join(ascii) )
return "\n".join(rows)
def __str__(self):
return self.getAsciiString()
# Data processing, cleanup and display functions
def loadDataFile(filename, n,width,height):
"""
Reads n data images from a file and returns a list of Datum objects.
(Return less then n items if the end of file is encountered).
"""
DATUM_WIDTH=width
DATUM_HEIGHT=height
fin = readlines(filename)
fin.reverse()
items = []
for i in range(n):
data = []
for j in range(height):
data.append(list(fin.pop()))
if len(data[0]) < DATUM_WIDTH-1:
# we encountered end of file...
print "Truncating at %d examples (maximum)" % i
break
items.append(Datum(data,DATUM_WIDTH,DATUM_HEIGHT))
return items
import zipfile
import os
def readlines(filename):
"Opens a file or reads it from the zip archive data.zip"
if(os.path.exists(filename)):
return [l[:-1] for l in open(filename).readlines()]
else:
z = zipfile.ZipFile('data.zip')
return z.read(filename).split('\n')
def loadLabelsFile(filename, n):
"""
Reads n labels from a file and returns a list of integers.
"""
fin = readlines(filename)
labels = []
for line in fin[:min(n, len(fin))]:
if line == '':
break
labels.append(int(line))
return labels
def asciiGrayscaleConversionFunction(value):
"""
Helper function for display purposes.
"""
if(value == 0):
return ' '
elif(value == 1):
return '+'
elif(value == 2):
return '#'
def IntegerConversionFunction(character):
"""
Helper function for file reading.
"""
if(character == ' '):
return 0
elif(character == '+'):
return 1
elif(character == '#'):
return 2
def convertToInteger(data):
"""
Helper function for file reading.
"""
if type(data) != type([]):
return IntegerConversionFunction(data)
else:
return map(convertToInteger, data)
# Testing
def _test():
import doctest
doctest.testmod() # Test the interactive sessions in function comments
n = 1
# items = loadDataFile("facedata/facedatatrain", n,60,70)
# labels = loadLabelsFile("facedata/facedatatrainlabels", n)
items = loadDataFile("digitdata/trainingimages", n,28,28)
labels = loadLabelsFile("digitdata/traininglabels", n)
for i in range(1):
print items[i]
print items[i]
print (items[i].height)
print (items[i].width)
print dir(items[i])
print items[i].getPixels()
if __name__ == "__main__":
_test()