-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmy_model.py
371 lines (326 loc) · 18.5 KB
/
my_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
"""
my_model.py: template for models to be used in the TWIMLfest 2020 Codenames competition
Dan Hilgart <[email protected]> and Yuri Shlyakhter <[email protected]>
see https://czechgames.com/files/rules/codenames-rules-en.pdf for game rules
"""
"""
------------------------------------------------------------------------------------------------------------------------
Required Imports
------------------------------------------------------------------------------------------------------------------------
Do not remove these
"""
import TWIML_codenames
import numpy as np
"""
------------------------------------------------------------------------------------------------------------------------
Your Imports
------------------------------------------------------------------------------------------------------------------------
Add/modify as necessary
"""
### YOUR CODE HERE
import spacy # after installing, be sure to run 'python -m spacy download en_core_web_lg'
import itertools
import pickle
import json
import os
import time
### END YOUR CODE
"""
------------------------------------------------------------------------------------------------------------------------
Your Global Variables
------------------------------------------------------------------------------------------------------------------------
Place anything here that you want to be loaded when this module is imported by TWIML_codenames_API_client.py
For example, if you are loading word vectors, load them here as global variables so they do not have to be loaded each
time the generate_clue and generate_guesses functions are called
"""
### YOUR CODE HERE
nlp = spacy.load("en_core_web_lg") # if OSError: [E050] Can't find model 'en_core_web_lg', run this from command line:
# 'python -m spacy download en_core_web_lg'
# clue_word_distances is a dict containing a 2D numpy array of word distances that have already been pre-calculated in
# order to speed up compute time. The dict also contains 2 additional dicts, both of form {word:index}, for the words on
# the 2 axes: the first axis ('boardwords') is the list of words that the gameboards can be made from, while the second
# axis ('clue_words') is the list of candidate clue words. Boardwords is a list of 400 words, a copy of wordlist.txt.
# clue_words is a list of 6698 nouns based on nounlist.txt sourced from http://www.desiquintans.com/nounlist
# Note: wordlist will be a different set of words for week 2 than week 1. You will need to re-download
# 'clue_word_distances.pkl' from the repo after week 1 is complete before entering your bot in week 2
clue_word_distances = pickle.load(open(os.path.join('distances','clue_word_distances.pkl'),'rb'))
### END YOUR CODE
"""
------------------------------------------------------------------------------------------------------------------------
Your Functions
------------------------------------------------------------------------------------------------------------------------
Add/modify functions as necessary
"""
### YOUR CODE HERE
def dist(word1, word2):
"""
Calculates the vector-distance between two words
"""
tokens = nlp(word1 + " " + word2)
return 1 - tokens[0].similarity(tokens[1])
def is_lemma(w1, w2):
"""
Checks whether or not two words have the same lemma
"""
return(nlp(str(w1))[0].lemma_ == nlp(str(w2))[0].lemma_)
def list_is_lemma(boardwords, clue):
"""
Checks if any word on the board and the clue word have the same lemma
"""
for row in boardwords:
for word in row:
if is_lemma(word, clue):
return True
return False
def count_guessed_good_words(team_num, boardmarkers):
"""
Counts the number of correctly guessed words from your team.
Unused function, written as a tentative to make the guesser smarter.
"""
count = 0
for row in boardmarkers:
for marker in row:
if marker == team_num:
count += 1
return count
def init_json_file(path):
"""
Creates a JSON file to track each game's clues and guesses (from your team)
Unused function, written as a tentative to make the guesser smarter.
"""
if not os.path.exists(path):
with open(path, 'w+') as json_file:
init_json = {"bad_words":[]}
json.dump(init_json, json_file)
def add_bad_word(path, potential_bad_word):
"""
Adds potentially incorrect words to the JSON file that is tracking the game.
Idea: If a word is below the threshold but number of guesses is already reached,
then this word is not desired <==> bad word.
Unused function, written as a tentative to make the guesser smarter.
"""
with open(path) as json_file:
bad_words = json.load(json_file)["bad_words"]
if not potential_bad_word in bad_words:
bad_words.append(potential_bad_word)
with open(path, 'w') as json_file:
modified_json = {"bad_words":bad_words}
json.dump(modified_json, json_file)
# print("POTENTIAL BAD WORD:" + potential_bad_word)
def remaining_minus_bad(myBestTuplesList, path):
"""
Returns the number of unguessed words - number of unguessed bad words (saved in the JSON file).
Unused function, written as a tentative to make the guesser smarter.
"""
finalList = [tup[0] for tup in myBestTuplesList]
with open(path) as json_file:
bad_words = json.load(json_file)["bad_words"]
for b_word in bad_words:
if b_word in finalList: finalList.remove(b_word)
return finalList
### END YOUR CODE
"""
------------------------------------------------------------------------------------------------------------------------
Required Functions
------------------------------------------------------------------------------------------------------------------------
These are the two required functions that you must have in your model file.
"""
def generate_clue(game_id, team_num, gameboard: TWIML_codenames.Gameboard):
"""
This is the function that will be called when your bot is the Spymaster
Your bot will need to provide a clue_word and a clue_count which will be used by your teammate's bot to guess words
Make sure to provide a legal clue (see TWIML_codenames.py for how legality is assessed) or the turn will be skipped
The following inputs will be provided:
@param game_id (int): the unique identifier for this game. Can be used to locally track info about this game as it
plays out
@param team_num (int): 1 if you are on the first team, 2 if you are on the second team. This matches with the
gameboard key
@param gameboard (TWIML_codenames.Gameboard): an object containing the current state of the gameboard. Note that
this is a copy of the gameboard so any changes made to it will not impact the true gameboard. See
TWIML_codenames.py for the full details of the TWIML_codenames.Gameboard class. Some useful commands:
gameboard.boardwords -- 5x5 np.array[str]: the 5x5 grid of words. Remains unchanged after initialization
gameboard.boardkey -- 5x5 np.array[int]: the key that tells which words belong to which team. Remains
unchanged after initialization. (1 = team 1, 2 = team 2, 0 = neutral, -1 = assassin)
gameboard.boardmarkers -- 5x5 np.array[float]: the array that tracks which words have been tapped and what
was revealed. Starts as an array of np.NaNs. As words are tapped (guessed), the values from the boardkey
are added for each tapped word.
gameboard.unguessed_words(team_num[int](optional)) -- list[str]: returns a list of unguessed words for the
supplied team_num (1 = team 1, 2 = team 2, 0 = neutral, -1 = assassin). If no team_num is supplied, will
return all remaining unguessed words.
gameboard.remaining(team_num[int]) -- list[str]: Counts how many cards are left for the supplied team_num
Please return the outputs as follows:
@returns clue_word (str): the one-word clue that must not match any part of the remaining words on the board
@returns clue_count (int): the count of how many board-words are related to the clue word. There are two special
cases for the clue count:
A Spymaster can give a clue count of 0 which communicates to the Operative that they should guess words that
are NOT related to the clue word.
A spymaster may also give a clue for infinity, allowing the Operative to make as many guesses as they like.
To give a clue for infinity, provide an int of 10.
"""
### YOUR CODE HERE
threshold = 0.7
unguessed_good_words = gameboard.unguessed_words(team_num)
unguessed_bad_words = [word for word in gameboard.unguessed_words() if word not in unguessed_good_words]
# filter out words that contain, or are contained in, words on the board:
full_candidates=[]
for candidate in clue_word_distances['clue_words'].keys(): # see definition of clue_word_distances in the 'Your Global Variables' section above.
duplicate = False
for unguessed_word in gameboard.unguessed_words():
if (candidate in unguessed_word) or (unguessed_word in candidate):# or is_lemma(unguessed_word, candidate):
duplicate = True
break
if duplicate == False:
full_candidates.append(candidate)
# sample down the list of candidates by a factor of 3 for two reasons: 1) to improve runtime and 2) to avoid getting
# stuck giving the same clue word over and over again
candidates = full_candidates
# candidates = [word for word in
# np.random.choice(full_candidates, len(full_candidates)//3, replace=False)]
# Anthony: a dict of dicts. Key of Inner dict is unguessed_good_word, and inside: key = clue_candidate, pair = distance(of candidate and unguessed word)
good_word_distances = {}
for good_word in unguessed_good_words:
good_word_distances[good_word] = {}
for clue_candidate in candidates:
good_word_distances[good_word][clue_candidate] = clue_word_distances['distances'][clue_word_distances['boardwords'][good_word]][clue_word_distances['clue_words'][clue_candidate]]
# Anthony: a dict of dicts. Key of Inner dict is unguessed_bad_word, and inside: key = clue_candidate, pair = distance(of candidate and unguessed word)
bad_word_distances = {}
for bad_word in unguessed_bad_words:
bad_word_distances[bad_word] = {}
for clue_candidate in candidates:
bad_word_distances[bad_word][clue_candidate] = clue_word_distances['distances'][clue_word_distances['boardwords'][bad_word]][clue_word_distances['clue_words'][clue_candidate]]
clue_count = 0
clue_word = None
d = float('Inf')
for clue_candidate in candidates:
biggest_candidate_d = 0
num_safe_words = 0 # THIRD VERSION
boardwordAndDistance_list = [] # THIRD VERSION
if(clue_candidate == 'id' or clue_candidate=='wont'):
continue
for bad_word in unguessed_bad_words:
boardwordAndDistance_list.append((bad_word,bad_word_distances[bad_word][clue_candidate]))
for good_word in unguessed_good_words:
boardwordAndDistance_list.append((good_word,good_word_distances[good_word][clue_candidate]))
boardwordAndDistance_list.sort(key=lambda tup: tup[1])
for word, dist in boardwordAndDistance_list:
if (word in unguessed_good_words) and (dist < threshold):
num_safe_words += 1
biggest_candidate_d = dist # always grows going through the list
if (dist > threshold):
break
else:
break
if num_safe_words > clue_count and not '-' in clue_candidate and not list_is_lemma(gameboard.boardwords, clue_candidate):#is_lemma(word, clue_candidate):
clue_count = num_safe_words
clue_word = clue_candidate
elif (num_safe_words == clue_count) and (d > biggest_candidate_d) and not '-' in clue_candidate and not list_is_lemma(gameboard.boardwords, clue_candidate):
clue_word = clue_candidate
d = biggest_candidate_d
#########
if not clue_word:
# if it didn't find a good clue word, return a random word
clue_word = str(np.random.choice(full_candidates,1)[0])
clue_count = 1
### END YOUR CODE
return clue_word, clue_count
def generate_guesses(game_id, team_num, clue_word, clue_count, unguessed_words, boardwords, boardmarkers):
"""
This is the function that will be called when your bot is the Operative
Your teammate's bot will provide you with a clue_word and a clue_count. Use them to generate a list of words to
guess.
The following inputs will be provided:
@param game_id (int): the unique identifier for this game. Can be used to locally track info about this game as it
plays out
@param team_num (int): 1 if you are on the first team, 2 if you are on the second team. This matches with the
boardmarkers array
@param clue_word (str): the one-word clue from your spymaster
@param clue_count (int): the count of how many board-words are related to the clue word. There are two special
cases for the clue count:
A Spymaster can give a clue count of 0 which communicates to the Operative that they should guess words that
are NOT related to the clue word.
A spymaster may also give a clue for infinity, allowing the Operative to make as many guesses as they like.
An int of 10 is used to represent a clue for infinity.
@param unguessed_words (list[str]): a 1-d list of all the remaining words that have not yet been tapped
@param boardwords (5x5 np.array[str]): the 5x5 grid of words. Remains unchanged after initialization
@param boardmarkers (5x5 np.array[float]): the array that tracks which words have been tapped and what was revealed.
Starts as an array of np.NaNs. As words are tapped (guessed), the team number (1 = team 1, 2 = team 2,
0 = neutral, -1 = assassin) of each tapped word are added.
Please return the outputs as follows:
@returns guesses (list[str]): a list of the words that you would like to tap in the order you want them tapped.
Words on the list will continue to be tapped until a word is tapped that is not one of your team's words
"""
### YOUR CODE HERE
# Algorithm based on the following paper:
# Cooperation and Codenames:Understanding Natural Language Processing via Codenames
# by A. Kim, M. Ruzmaykin, A. Truong, and A. Summerville 2019
threshold_for_guessing = 0.83 # Anthony: was 0.7
# path = os.path.join('guessing',f'guess_{game_id}_{team_num}.json')
# init_json_file(path)
# myBestTuplesList = []
# myGuesses = []
# # Generate list of (word, distance) tuples "myBestTuplesList"
# for word in unguessed_words:
# if clue_word in clue_word_distances['clue_words'].keys():
# curr_dist = clue_word_distances['distances'][clue_word_distances['boardwords'][word], clue_word_distances['clue_words'][clue_word]]
# else:
# curr_dist = dist(clue_word, word)
# myBestTuplesList.append((word, curr_dist))
# # Sort myBestTuplesList by increasing value of distance
# myBestTuplesList.sort(key=lambda tup: tup[1])
# # Pick top 'clue_count' words (and remove them from myBestTuplesList)
# while len(myGuesses) < clue_count:
# best_tuple = myBestTuplesList[0]
# if best_tuple[1] < threshold_for_guessing:
# myGuesses.append(best_tuple[0])
# myBestTuplesList.remove(best_tuple)
# # If we picked 'clue_count' words and the first remaining tuple inside myBestTuplesList has distance < 0.7, then it's a bad word
# if myBestTuplesList[0][1] < 0.7:
# potential_bad_word = myBestTuplesList[0][0]
# add_bad_word(path, potential_bad_word) # add bad word to json in case it wasn't added
# # get list of remaining words without the bad words
# remaining_ambiguous_words = remaining_minus_bad(myBestTuplesList, path)
# # Get how many good words left (excluding current guesses)
# total_good_words = 8 + (team_num % 2)
# good_words_left = total_good_words - count_guessed_good_words(team_num, boardmarkers) - len(myGuesses) # my guesses are impeccable
# # if what's left is unguessed_good and unguessed_known_bad, then add the unguessed_good
# if good_words_left == len(remaining_ambiguous_words):
# # print(f"It's alive!!! guesses before: {myGuesses}")
# myGuesses += remaining_ambiguous_words
# # print(f"Guesses after: {myGuesses}")
# if len(myGuesses) == 0:
# myGuesses.append(str(np.random.choice(unguessed_words,1)[0]))
# return myGuesses
guesses = []
if clue_word in clue_word_distances['clue_words'].keys():
while len(guesses) < clue_count:
best = None
d = float('Inf')
for word in unguessed_words:
# distance = clue_word_distances['distances'][clue_word_distances['boardwords'][word], clue_word_distances['clue_words'][clue_word]]
distance = clue_word_distances['distances'][clue_word_distances['boardwords'][word]][clue_word_distances['clue_words'][clue_word]]
if (distance < d):
d = distance
best = word
if (best and d < threshold_for_guessing):
guesses.append(best)
unguessed_words.remove(best)
else:
break
else:
while len(guesses) < clue_count:
best = None
d = float('Inf')
for word in unguessed_words:
distance = dist(clue_word, word)
if (distance < d):
d = distance
best = word
if (best and d < threshold_for_guessing):
guesses.append(best)
unguessed_words.remove(best)
else:
break
if len(guesses) == 0:
guesses.append(str(np.random.choice(unguessed_words,1)[0]))
### END YOUR CODE
return guesses