-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathWRITER6.py
338 lines (323 loc) · 10.9 KB
/
WRITER6.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
#The newest version of writer. This writer will combine all letter attributes into one attribute, will also look at the actual python changes being made and make a string attribute out of them, and will also make an attribute for the number of characters changed.
import json
import codecs
from sys import argv
import unicodedata
script, arff_file = argv
answer = raw_input("\nWARNING: \nTHIS WILL OVERWRITE THE %s FILE! \nPress any key to continue..." % arff_file)
txt2 = open(arff_file, 'w')
text = ''
print "\n%s file cleared..." % arff_file
# Add the attributes so it is ready for the data to be entered.
attributes = ["\n@relation python_mistakes", "\n", "\n@attribute commit_message string", "\n@attribute additions numeric", "\n@attribute deletions numeric", "\n@attribute changes numeric", "\n@attribute actual_changes string"]
for i in range(127):
if i == 9:
attributes.append("\n@attribute attribute_TAB numeric")
elif i == 10:
attributes.append("\n@attribute attribute_NEWLINE numeric")
elif i == 13:
attributes.append("\n@attribute attribute_CARRIAGE_RETURN numeric")
elif (i > 32 and i < 65) or (i > 90 and i < 97) or (i > 122 and i < 127):
name = unicodedata.name(unicode(chr(i)))
name = name.replace(" ", "_")
attributes.append("\n@attribute attribute_%s numeric" % name)
attributes.append("\n@attribute LETTERS_CHANGED numeric")
attributes.append("\n@attribute class {yes, no}")
attributes.append("\n")
attributes.append("\n@data")
for item in attributes:
text = item
txt2.write(text)
filenames = ['monicano.json', 'monicayes.json', 'eno.json', 'eyes.json', 'NEWyes.json', 'NEWno.json']
print "\nThe current list of files to write from is:"
for i in filenames:
print i
answer = raw_input("\nDo you want to change this list? y/n ")
if 'y' in str(answer):
filenames = []
print "Please type 'DONE' when you are done adding files."
while answer != 'DONE':
answer = raw_input("\nType the name of the file you want to write from (or type DONE): ")
if 'yes' not in answer and 'no' not in answer:
print "\nINVALID FILENAME: The filename needs to have the word 'yes' or 'no' in it."
elif answer != 'DONE':
filenames.append(answer)
print "\nThe current list of files to write from is:"
for i in filenames:
print i
print "\nPreparing to write to these files..."
# Edit the keys in all the files to make them compatible
answer = raw_input("\nWARNING: \nThe dictionary keys Mistake, Tags, Length, and Why Not will be removed from all of these files to make them compatible with this program. \nKeys will also be modified to add colons if missing. \nDuplicate entries will be deleted. \nPress any key to continue...")
links = []
for eachfile in filenames:
with open(eachfile) as data_file:
data = json.load(data_file)
store = None
for i in data:
if i.get("Additions") != None:
store = i.get("Additions")
i.pop("Additions")
i["Additions:"] = store
print "Changed one Additions to Additions:"
for i in data:
if i.get("Deletions") != None:
store = i.get("Deletions")
i.pop("Deletions")
i["Deletions:"] = store
print "Changed one Deletions to Deletions:"
for i in data:
if i.get("Changes") != None:
store = i.get("Changes")
i.pop("Changes")
i["Changes:"] = store
print "Changed one Changes to Changes:"
for i in data:
if i.get("Length") != None:
store = i.get("Length")
i.pop("Length")
i["Length:"] = store
print "Changed one Length to Length:"
for i in data:
if i.get("Commit Corrections") != None:
store = i.get("Commit Corrections")
i.pop("Commit Corrections")
i["Commit Corrections:"] = store
print "Changed one Commit Corrections to Commit Corrections:"
for i in data:
if i.get("Commit \t\t\t\t\t\t\t\t\tCorrections:") != None:
store = i.get("Commit \t\t\t\t\t\t\t\t\tCorrections:")
i.pop("Commit \t\t\t\t\t\t\t\t\tCorrections:")
i["Commit Corrections:"] = store
print "Changed one Commit \t\t\t\t\t\t\t\t\tCorrections: to Commit Corrections:"
for i in data:
if i.get("Commit Mistakes") != None:
store = i.get("Commit Mistakes")
i.pop("Commit Mistakes")
i["Commit Mistakes:"] = store
print "Changed one Commit Mistakes to Commit Mistakes:"
for i in data:
if i.get("Commit Mistake") != None:
store = i.get("Commit Mistake")
i.pop("Commit Mistake")
i["Commit Mistakes:"] = store
print "Changed one Commit Mistake to Commit Mistakes:"
for i in data:
if i.get("Tags") != None:
store = i.get("Tags")
i.pop("Tags")
i["Tags:"] = store
print "Changed one Tags to Tags:"
for i in data:
if i.get("Mistakes") != None:
store = i.get("Mistakes")
i.pop("Mistakes")
i["Mistakes:"] = store
print "Changed one Mistakes to Mistakes:"
for i in data:
if i.get("Mistake") != None:
store = i.get("Mistake")
i.pop("Mistake")
i["Mistakes:"] = store
print "Changed one Mistake to Mistakes:"
for i in data:
if i.get("Why Not") != None:
store = i.get("Why Not")
i.pop("Why Not")
i["Why Not:"] = store
print "Changed one Why Not to Why Not:"
for i in data:
if i.get("Tags") != None:
store = i.get("Tags")
i.pop("Tags")
i["Tags:"] = store
print "Changed one Tags to Tags:"
for i in data:
if i.get("Keyword:") != None:
del i["Keyword:"]
for i in data:
if i.get("Mistakes:") != None:
del i["Mistakes:"]
for i in data:
if i.get("Mistake:") != None:
del i["Mistake:"]
for i in data:
if i.get("Why Not:") != None:
del i["Why Not:"]
for i in data:
if i.get("Length:") != None:
del i["Length:"]
for i in data:
if i.get("Tags:") != None:
del i["Tags:"]
for i in data:
assert "Additions:" in i
assert "Deletions:" in i
assert "Changes:" in i
assert "Commit Mistakes:" in i
assert "Commit Corrections:" in i
assert "Tags:" not in i
assert "Message:" in i
assert "Why Not:" not in i
assert "Mistakes:" not in i
assert "Length:" not in i
assert len(i) == 6
for i in data:
commitlink = i.get("Commit Corrections:")
if commitlink in links:
print commitlink
data.remove(i)
else:
links.append(commitlink)
eachfile = open(eachfile, 'w')
with eachfile as outfile:
json.dump(data, outfile, indent = 2)
eachfile.close()
print "\nFiles have been modified."
# Add the data to the arfffile, but keep track of the links so there are no duplicates.
links = []
yes = 0
no = 0
characters = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', ' ']
badcharacters = ["'"]
answer = raw_input("\n\nSometimes there are way to many no entries. Do you want to set a max limit for no entries? y/n")
if 'y' in str(answer):
maxno = int(raw_input("What is the max number of no entries? >>> "))
else:
maxno = "Nothing"
print "No max for no entries."
for i in filenames:
print "\nAdding from %r file.." % i
txt = open(i, 'a+')
data = json.load(txt)
if 'yes' in i:
chicken = "yes"
else:
chicken = "no"
for thing in data:
commitlink = thing.get("Commit Corrections:")
if commitlink in links:
print "This is a duplicate entry, so I am skipping it."
else:
links.append(commitlink)
changes = thing.get("Changes:")
additions = []
deletions = []
# Count up the number of characters changed
delchar = []
addchar = []
chardiff = []
for i in range(128):
addchar.append(0)
delchar.append(0)
chardiff.append(0)
for i in changes:
if str(i[0]) == '+':
i = i[1:]
additions.append(i)
elif str(i[0]) == '-':
i = i[1:]
deletions.append(i)
else:
print "I didn't recognize %s as a + or a -" % i[0]
for i in additions:
for eachletter in i:
if ord(eachletter) < 128:
addchar[ord(eachletter)] += 1
for i in deletions:
for eachletter in i:
if ord(eachletter) < 128:
delchar[ord(eachletter)] += 1
# Total characters changed
for i in range(127):
chardiff[i] = abs(delchar[i] - addchar[i])
c = 0
for i in chardiff:
c += i
letterschanged = 0
# Each character changed
for i in chardiff:
if (chardiff.index(i) > 64 and chardiff.index(i) < 91) or (chardiff.index(i) > 96 and chardiff.index(i) < 123):
letterschanged += 1
# Get the message and get rid of characters it won't recognize
m = thing.get("Message:")
m = m.encode('ascii', errors='ignore')
characters = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', ' ']
for letter in m:
if not(str.lower(letter) in characters):
m = m.replace(letter, "")
m = m.replace("'", "")
m = m.replace("\n", "")
m = m.replace("class", "")
m = m.replace("Class", "")
a = thing.get("Additions:")
d = thing.get("Deletions:")
ch = thing.get("Changes:")
newletter = None
newchanges = []
newitem = None
# Total amount of letters changed
for i in ch:
list1 = []
for letter in i:
list1.append(letter)
if list1[0] == '+' or list1[0] == '-':
list1.pop(0)
newchanges.append("".join(list1))
ch = []
for i in newchanges:
ch.append(i)
newchanges = []
newletter = None
characters = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
for item in ch:
for letter in item:
if letter.lower() not in characters:
if ord(letter) < 127 and letter != '\n' and letter != '\t' and letter != "\r":
newletter = unicodedata.name(unicode(letter))
newletter = newletter.replace(" ", "_")
newchanges.append(newletter)
elif letter == '\n':
newchanges.append("NEWLINE")
elif letter == '\t':
newchanges.append("TAB")
elif letter == "\r":
newchanges.append("SLASH_R")
else:
newchanges.append(letter)
# Get the actual changes and make a string out of them
string = ""
good = True
for i in newchanges:
if i.lower() not in characters:
string = " ".join([string, i])
good = False
elif good == False:
string = " ".join([string, i])
good = True
else:
string = "".join([string, i])
good = True
string = string.replace("class", "")
# Add the attributes
text = "\n"
stuff = "'%s', %s, %s, %s, '%s'" % (m, a, d, c, string)
text = "".join([text, stuff])
counter = 0
for i in chardiff:
if counter == 9 or counter == 10 or counter == 13 or (counter > 32 and counter < 65) or (counter > 90 and counter < 97) or (counter > 122 and counter < 127):
text = ", ".join([text, str(i)])
counter += 1
text = ", ".join([text, str(letterschanged)])
text = ", ".join([text, chicken])
if maxno != "Nothing" and (chicken == 'no' and no == maxno or no > maxno):
print "Reached max no, not adding."
else:
txt2.write(text)
# Add up the number of yes and no entries
if chicken == 'yes':
yes += 1
else:
no += 1
txt.close()
txt2.close()
print "\nAll done! I have added %d yes entries and %d no entries. \nThank you for using %s" % (yes, no, script)