-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtemplate_my_model.py
234 lines (215 loc) · 13.1 KB
/
template_my_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
"""
my_model.py: template for models to be used in the TWIMLfest 2020 Codenames competition
Dan Hilgart <[email protected]> and Yuri Shlyakhter <[email protected]>
see https://czechgames.com/files/rules/codenames-rules-en.pdf for game rules
"""
"""
------------------------------------------------------------------------------------------------------------------------
Required Imports
------------------------------------------------------------------------------------------------------------------------
Do not remove these
"""
import TWIML_codenames
import numpy as np
"""
------------------------------------------------------------------------------------------------------------------------
Your Imports
------------------------------------------------------------------------------------------------------------------------
Add/modify as necessary
"""
### YOUR CODE HERE
import spacy # after installing, be sure to run 'python -m spacy download en_core_web_lg'
import itertools
import pickle
import os
### END YOUR CODE
"""
------------------------------------------------------------------------------------------------------------------------
Your Global Variables
------------------------------------------------------------------------------------------------------------------------
Place anything here that you want to be loaded when this module is imported by TWIML_codenames_API_client.py
For example, if you are loading word vectors, load them here as global variables so they do not have to be loaded each
time the generate_clue and generate_guesses functions are called
"""
### YOUR CODE HERE
nlp = spacy.load("en_core_web_lg") # if OSError: [E050] Can't find model 'en_core_web_lg', run this from command line:
# 'python -m spacy download en_core_web_lg'
# clue_word_distances is a dict containing a 2D numpy array of word distances that have already been pre-calculated in
# order to speed up compute time. The dict also contains 2 additional dicts, both of form {word:index}, for the words on
# the 2 axes: the first axis ('boardwords') is the list of words that the gameboards can be made from, while the second
# axis ('clue_words') is the list of candidate clue words. Boardwords is a list of 400 words, a copy of wordlist.txt.
# clue_words is a list of 6698 nouns based on nounlist.txt sourced from http://www.desiquintans.com/nounlist
# Note: wordlist will be a different set of words for week 2 than week 1. You will need to re-download
# 'clue_word_distances.pkl' from the repo after week 1 is complete before entering your bot in week 2
clue_word_distances = pickle.load(open(os.path.join('distances','clue_word_distances.pkl'),'rb'))
### END YOUR CODE
"""
------------------------------------------------------------------------------------------------------------------------
Your Functions
------------------------------------------------------------------------------------------------------------------------
Add/modify functions as necessary
"""
### YOUR CODE HERE
def dist(word1, word2):
"""
Calculates the vector-distance between two words
"""
tokens = nlp(word1 + " " + word2)
return 1 - tokens[0].similarity(tokens[1])
### END YOUR CODE
"""
------------------------------------------------------------------------------------------------------------------------
Required Functions
------------------------------------------------------------------------------------------------------------------------
These are the two required functions that you must have in your model file.
"""
def generate_clue(game_id, team_num, gameboard: TWIML_codenames.Gameboard):
"""
This is the function that will be called when your bot is the Spymaster
Your bot will need to provide a clue_word and a clue_count which will be used by your teammate's bot to guess words
Make sure to provide a legal clue (see TWIML_codenames.py for how legality is assessed) or the turn will be skipped
The following inputs will be provided:
@param game_id (int): the unique identifier for this game. Can be used to locally track info about this game as it
plays out
@param team_num (int): 1 if you are on the first team, 2 if you are on the second team. This matches with the
gameboard key
@param gameboard (TWIML_codenames.Gameboard): an object containing the current state of the gameboard. Note that
this is a copy of the gameboard so any changes made to it will not impact the true gameboard. See
TWIML_codenames.py for the full details of the TWIML_codenames.Gameboard class. Some useful commands:
gameboard.boardwords -- 5x5 np.array[str]: the 5x5 grid of words. Remains unchanged after initialization
gameboard.boardkey -- 5x5 np.array[int]: the key that tells which words belong to which team. Remains
unchanged after initialization. (1 = team 1, 2 = team 2, 0 = neutral, -1 = assassin)
gameboard.boardmarkers -- 5x5 np.array[float]: the array that tracks which words have been tapped and what
was revealed. Starts as an array of np.NaNs. As words are tapped (guessed), the values from the boardkey
are added for each tapped word.
gameboard.unguessed_words(team_num[int](optional)) -- list[str]: returns a list of unguessed words for the
supplied team_num (1 = team 1, 2 = team 2, 0 = neutral, -1 = assassin). If no team_num is supplied, will
return all remaining unguessed words.
gameboard.remaining(team_num[int]) -- list[str]: Counts how many cards are left for the supplied team_num
Please return the outputs as follows:
@returns clue_word (str): the one-word clue that must not match any part of the remaining words on the board
@returns clue_count (int): the count of how many board-words are related to the clue word. There are two special
cases for the clue count:
A Spymaster can give a clue count of 0 which communicates to the Operative that they should guess words that
are NOT related to the clue word.
A spymaster may also give a clue for infinity, allowing the Operative to make as many guesses as they like.
To give a clue for infinity, provide an int of 10.
"""
### YOUR CODE HERE
# Algorithm based on the following paper:
# Cooperation and Codenames:Understanding Natural Language Processing via Codenames
# by A. Kim, M. Ruzmaykin, A. Truong, and A. Summerville 2019
threshold = 0.7
margin = 0.2
unguessed_good_words = gameboard.unguessed_words(team_num)
unguessed_bad_words = [word for word in gameboard.unguessed_words() if word not in unguessed_good_words]
# filter out words that contain, or are contained in, words on the board:
full_candidates=[]
for candidate in clue_word_distances['clue_words'].keys(): # see definition of clue_word_distances in the 'Your Global Variables' section above.
duplicate = False
for unguessed_word in gameboard.unguessed_words():
if candidate in unguessed_word:
duplicate = True
break
elif unguessed_word in candidate:
duplicate = True
break
if duplicate == False:
full_candidates.append(candidate)
# sample down the list of candidates by a factor of 3 for two reasons: 1) to improve runtime and 2) to avoid getting
# stuck giving the same clue word over and over again
candidates = [word for word in
np.random.choice(full_candidates, len(full_candidates)//3, replace=False)]
good_word_distances = {}
for good_word in unguessed_good_words:
good_word_distances[good_word] = {}
for clue_candidate in candidates:
good_word_distances[good_word][clue_candidate] = clue_word_distances['distances'][
clue_word_distances['boardwords'][good_word], # the index number of the good_word on the boardwords index
clue_word_distances['clue_words'][clue_candidate] # the index number of the clue_candidate on the clue_words index
]
bad_word_distances = {}
for bad_word in unguessed_bad_words:
bad_word_distances[bad_word] = {}
for clue_candidate in candidates:
bad_word_distances[bad_word][clue_candidate] = clue_word_distances['distances'][
clue_word_distances['boardwords'][bad_word], # the index number of the bad_word on the boardwords index
clue_word_distances['clue_words'][clue_candidate] # the index number of the clue_candidate on the clue_words index
]
clue_count = 0
clue_word = None
low_score = float('Inf')
for clue_count_to_try in range(1,len(unguessed_good_words)+1):
for good_word_combo in itertools.combinations(unguessed_good_words,clue_count_to_try):
for clue_candidate in candidates:
w_d = float('Inf')
for bad_word in unguessed_bad_words:
if bad_word_distances[bad_word][clue_candidate] < w_d:
w_d = bad_word_distances[bad_word][clue_candidate]
d_r_sum = 0
d_r_max = 0
for good_word in good_word_combo:
this_distance = good_word_distances[good_word][clue_candidate]
d_r_sum += this_distance
if this_distance > d_r_max:
d_r_max = this_distance
d_r_score = d_r_sum / len(good_word_combo) / len(good_word_combo)
if d_r_score <= low_score and d_r_max < w_d - margin and d_r_max < threshold:
low_score = d_r_score
clue_word = clue_candidate
clue_count = clue_count_to_try
if not clue_word:
# if it didn't find a good clue word, return a random word
clue_word = str(np.random.choice(full_candidates,1)[0])
clue_count = 1
### END YOUR CODE
return clue_word, clue_count
def generate_guesses(game_id, team_num, clue_word, clue_count, unguessed_words, boardwords, boardmarkers):
"""
This is the function that will be called when your bot is the Operative
Your teammate's bot will provide you with a clue_word and a clue_count. Use them to generate a list of words to
guess.
The following inputs will be provided:
@param game_id (int): the unique identifier for this game. Can be used to locally track info about this game as it
plays out
@param team_num (int): 1 if you are on the first team, 2 if you are on the second team. This matches with the
boardmarkers array
@param clue_word (str): the one-word clue from your spymaster
@param clue_count (int): the count of how many board-words are related to the clue word. There are two special
cases for the clue count:
A Spymaster can give a clue count of 0 which communicates to the Operative that they should guess words that
are NOT related to the clue word.
A spymaster may also give a clue for infinity, allowing the Operative to make as many guesses as they like.
An int of 10 is used to represent a clue for infinity.
@param unguessed_words (list[str]): a 1-d list of all the remaining words that have not yet been tapped
@param boardwords (5x5 np.array[str]): the 5x5 grid of words. Remains unchanged after initialization
@param boardmarkers (5x5 np.array[float]): the array that tracks which words have been tapped and what was revealed.
Starts as an array of np.NaNs. As words are tapped (guessed), the team number (1 = team 1, 2 = team 2,
0 = neutral, -1 = assassin) of each tapped word are added.
Please return the outputs as follows:
@returns guesses (list[str]): a list of the words that you would like to tap in the order you want them tapped.
Words on the list will continue to be tapped until a word is tapped that is not one of your team's words
"""
### YOUR CODE HERE
# Algorithm based on the following paper:
# Cooperation and Codenames:Understanding Natural Language Processing via Codenames
# by A. Kim, M. Ruzmaykin, A. Truong, and A. Summerville 2019
threshold_for_guessing = 0.7
guesses = []
while len(guesses) < clue_count:
best = None
d = float('Inf')
for word in unguessed_words:
distance = dist(clue_word, word)
if (distance < d):
d = distance
best = word
if (best and d < threshold_for_guessing):
guesses.append(best)
unguessed_words.remove(best)
else:
break
if len(guesses) == 0:
guesses.append(str(np.random.choice(unguessed_words,1)[0]))
### END YOUR CODE
return guesses