From 3742c1e6d71013c47ab2046e87c8de754c44ae2f Mon Sep 17 00:00:00 2001 From: mrfesol Date: Tue, 2 Jun 2015 15:41:02 +0200 Subject: [PATCH 1/5] MTD-bi algorithm --- easyAI/AI/DictTT.py | 16 ++++---- easyAI/AI/HashTT.py | 4 +- easyAI/AI/MTDbi.py | 73 +++++++++++++++++++++++++++++++++++++ easyAI/AI/Negamax.py | 21 ++++++----- easyAI/AI/__init__.py | 4 +- easyAI/__init__.py | 2 +- easyAI/games/Chopsticks.py | 19 ++++++---- easyAI/games/ConnectFour.py | 5 ++- easyAI/games/Nim.py | 14 ++++--- 9 files changed, 123 insertions(+), 35 deletions(-) create mode 100644 easyAI/AI/MTDbi.py diff --git a/easyAI/AI/DictTT.py b/easyAI/AI/DictTT.py index e01d3b8..0685c23 100644 --- a/easyAI/AI/DictTT.py +++ b/easyAI/AI/DictTT.py @@ -13,13 +13,15 @@ def __init__(self, num_buckets=1024, own_hash = None): self.dict = [] for i in range(num_buckets): self.dict.append((None, None)) - self.keys = dict() + #self.keys = dict() self.hash = hash if own_hash != None: - own_hash.modulo = len(self.dict) + own_hash.modulo = len(self.dict)-1 self.hash = own_hash.get_hash self.num_collisions = 0 self.num_calls = 0 + self.num_calcs = 0 + self.num_lookups = 0 def hash_key(self, key): """ @@ -27,7 +29,7 @@ def hash_key(self, key): an index for the dict. """ self.num_calls += 1 - return self.hash(key) % len(self.dict) + return self.hash(key) & len(self.dict)-1 def get_slot(self, key, default=None): """ @@ -59,10 +61,10 @@ def set(self, key, value): self.dict[slot] = (key, value) - if self.keys.__contains__(key): - self.keys[key] = self.keys[key] + 1 - else: - self.keys[key] = 1 + #if self.keys.__contains__(key): + # self.keys[key] = self.keys[key] + 1 + #else: + # self.keys[key] = 1 def delete(self, key): """ diff --git a/easyAI/AI/HashTT.py b/easyAI/AI/HashTT.py index 5921b17..acdc67f 100644 --- a/easyAI/AI/HashTT.py +++ b/easyAI/AI/HashTT.py @@ -6,7 +6,7 @@ class HashTT: """ def __init__(self): - self.modulo = 1024 #default value + self.modulo = 1023 #default value def before(self, key): """ @@ -31,7 +31,7 @@ def get_hash(self, key, depth = 0): if type(key) is str and len(key) <= 1: return self.hash_char(key) for v in list(key): - ret_hash = self.join(ret_hash, self.get_hash(v, depth+1)) % self.modulo + ret_hash = self.join(ret_hash, self.get_hash(v, depth+1)) & self.modulo if depth == 0: ret_hash = self.after(key, ret_hash) return ret_hash diff --git a/easyAI/AI/MTDbi.py b/easyAI/AI/MTDbi.py new file mode 100644 index 0000000..d22d0c6 --- /dev/null +++ b/easyAI/AI/MTDbi.py @@ -0,0 +1,73 @@ +#contributed by mrfesol (Tomasz Wesolowski) + +from easyAI.AI.MTdriver import mtd + +class MTDbi: + """ + This implements MTDbi algorithm. The following example shows + how to setup the AI and play a Connect Four game: + + >>> from easyAI import Human_Player, AI_Player, MTDbi + >>> AI = DUAL(7) + >>> game = ConnectFour([AI_Player(AI),Human_Player()]) + >>> game.play() + + Parameters + ----------- + + depth: + How many moves in advance should the AI think ? + (2 moves = 1 complete turn) + + scoring: + A function f(game)-> score. If no scoring is provided + and the game object has a ``scoring`` method it ill be used. + + win_score: + Score LARGER than the largest score of game, but smaller than inf. + It's required to run algorithm. + + tt: + A transposition table (a table storing game states and moves) + scoring: can be none if the game that the AI will be given has a + ``scoring`` method. + + Notes + ----- + + The score of a given game is given by + + >>> scoring(current_game) - 0.01*sign*current_depth + + for instance if a lose is -100 points, then losing after 4 moves + will score -99.96 points but losing after 8 moves will be -99.92 + points. Thus, the AI will chose the move that leads to defeat in + 8 turns, which makes it more difficult for the (human) opponent. + This will not always work if a ``win_score`` argument is provided. + + """ + + def __init__(self, depth, scoring=None, win_score=100000, tt=None): + self.scoring = scoring + self.depth = depth + self.tt = tt + self.win_score= win_score + + def __call__(self,game): + """ + Returns the AI's best move given the current state of the game. + """ + + scoring = self.scoring if self.scoring else ( + lambda g: g.scoring() ) # horrible hack + + first = 0 #essence of MTDbi algorithm + next = (lambda lowerbound, upperbound, bestValue: (lowerbound + upperbound)/2) + + self.alpha = mtd(game, + first, next, + self.depth, + scoring, + self.tt) + + return game.ai_move diff --git a/easyAI/AI/Negamax.py b/easyAI/AI/Negamax.py index 0d77328..bc38f38 100644 --- a/easyAI/AI/Negamax.py +++ b/easyAI/AI/Negamax.py @@ -4,6 +4,7 @@ """ import pickle +from easyAI.games.ThreeMusketeers import MOVES LOWERBOUND, EXACT, UPPERBOUND = -1,0,1 inf = float('infinity') @@ -18,6 +19,10 @@ def negamax(game, depth, origDepth, scoring, alpha=+inf, beta=-inf, http://en.wikipedia.org/wiki/Negamax """ + + #if tt != None: + #tt.d.num_calcs += 1 + alphaOrig = alpha # Is there a transposition table and is this game in it ? @@ -27,6 +32,7 @@ def negamax(game, depth, origDepth, scoring, alpha=+inf, beta=-inf, # The game has been visited in the past if lookup['depth'] >= depth: + #tt.d.num_lookups += 1 flag, value = lookup['flag'], lookup['value'] if flag == EXACT: if depth == origDepth: @@ -60,17 +66,14 @@ def negamax(game, depth, origDepth, scoring, alpha=+inf, beta=-inf, possible_moves = [lookup['move']] + possible_moves else: - possible_moves = game.possible_moves() - - state = game best_move = possible_moves[0] if depth == origDepth: state.ai_move = possible_moves[0] - bestValue = -inf + best_value = -inf unmake_move = hasattr(state, 'unmake_move') @@ -89,7 +92,7 @@ def negamax(game, depth, origDepth, scoring, alpha=+inf, beta=-inf, game.switch_player() game.unmake_move(move) - bestValue = max( bestValue, move_alpha ) + best_value = max( best_value, move_alpha ) if alpha < move_alpha : alpha = move_alpha best_move = move @@ -101,12 +104,12 @@ def negamax(game, depth, origDepth, scoring, alpha=+inf, beta=-inf, if tt != None: assert best_move in possible_moves - tt.store(game=state, depth=depth, value = bestValue, + tt.store(game=state, depth=depth, value = best_value, move= best_move, - flag = UPPERBOUND if (bestValue <= alphaOrig) else ( - LOWERBOUND if (bestValue >= beta) else EXACT)) + flag = UPPERBOUND if (best_value <= alphaOrig) else ( + LOWERBOUND if (best_value >= beta) else EXACT)) - return bestValue + return best_value class Negamax: diff --git a/easyAI/AI/__init__.py b/easyAI/AI/__init__.py index 4ce5c3b..d2eb06e 100644 --- a/easyAI/AI/__init__.py +++ b/easyAI/AI/__init__.py @@ -4,4 +4,6 @@ from .MTdriver import mtd from .SSS import SSS from .DUAL import DUAL -from .HashTT import HashTT \ No newline at end of file +from .MTDbi import MTDbi +from .HashTT import HashTT +from .DictTT import DictTT \ No newline at end of file diff --git a/easyAI/__init__.py b/easyAI/__init__.py index 32a660c..baaf60e 100644 --- a/easyAI/__init__.py +++ b/easyAI/__init__.py @@ -6,5 +6,5 @@ from .AI import Negamax, id_solve, df_solve from .AI import TT from .AI import mtd -from .AI import SSS, DUAL +from .AI import SSS, DUAL, MTDbi from .AI import HashTT, DictTT \ No newline at end of file diff --git a/easyAI/games/Chopsticks.py b/easyAI/games/Chopsticks.py index 30d300b..a24c72b 100644 --- a/easyAI/games/Chopsticks.py +++ b/easyAI/games/Chopsticks.py @@ -5,6 +5,7 @@ from copy import deepcopy from easyAI.AI.DictTT import DictTT from easyAI.AI.Hashes import JSWHashTT +from easyAI.AI import MTDbi class Chopsticks( TwoPlayersGame ): """ @@ -120,15 +121,19 @@ def back_to_startstate(self, move): return hands_min == 1 and hands_max == 1 if __name__ == "__main__": - from easyAI import Negamax, AI_Player, SSS, DUAL + from easyAI import Negamax, AI_Player, SSS, DUAL, MTDbi from easyAI.AI.TT import TT - ai_algo_neg = Negamax(4) - ai_algo_sss = SSS(4) - dict_tt = DictTT(32, JSWHashTT()) - ai_algo_dual = DUAL(4, tt=TT(dict_tt)) - Chopsticks( [AI_Player(ai_algo_neg),AI_Player(ai_algo_dual)]).play() #first player never wins + + dict_tt = DictTT(32) + ai_algo_sss = SSS(6, tt=TT(dict_tt)) + ai_algo_neg = Negamax(6, tt=TT(dict_tt)) + ai_algo_bi = MTDbi(6, tt=TT(dict_tt)) + #ai_algo_dual = DUAL(4, tt=TT(dict_tt)) + Chopsticks( [AI_Player(Negamax(6)),AI_Player(ai_algo_bi)]).play() #first player never wins print '-'*10 print 'Statistics of custom dictionary:' print 'Calls of hash: ', dict_tt.num_calls - print 'Collisions: ', dict_tt.num_collisions \ No newline at end of file + print 'Collisions: ', dict_tt.num_collisions + print 'Num of calculations: ', dict_tt.num_calcs + print 'Num lookups: ', dict_tt.num_lookups \ No newline at end of file diff --git a/easyAI/games/ConnectFour.py b/easyAI/games/ConnectFour.py index 2e03c01..41c93da 100644 --- a/easyAI/games/ConnectFour.py +++ b/easyAI/games/ConnectFour.py @@ -1,4 +1,5 @@ from easyAI.AI.DictTT import DictTT +from easyAI.AI.MTDbi import MTDbi try: import numpy as np except ImportError: @@ -71,10 +72,10 @@ def find_four(board, nplayer): if __name__ == '__main__': # LET'S PLAY ! - from easyAI import Human_Player, AI_Player, Negamax, SSS, DUAL + from easyAI import Human_Player, AI_Player, Negamax, SSS, DUAL, MTDbi ai_algo_neg = Negamax(5) - ai_algo_sss = SSS(5) + ai_algo_sss = MTDbi(5) game = ConnectFour([AI_Player(ai_algo_neg), AI_Player(ai_algo_sss)]) game.play() if game.lose(): diff --git a/easyAI/games/Nim.py b/easyAI/games/Nim.py index 77b20b2..eb29d54 100644 --- a/easyAI/games/Nim.py +++ b/easyAI/games/Nim.py @@ -48,19 +48,21 @@ def ttentry(self): return tuple(self.piles) #optional, speeds up AI if __name__ == "__main__": # IN WHAT FOLLOWS WE SOLVE THE GAME AND START A MATCH AGAINST THE AI - from easyAI import AI_Player, Human_Player, Negamax, id_solve + from easyAI import AI_Player, Human_Player, Negamax, id_solve, SSS, DictTT from easyAI.AI import TT # we first solve the game - w, d, m, tt = id_solve(Nim, range(5, 20), win_score = 80) - print - w, d, len(tt.d) + #w, d, m, tt = id_solve(Nim, range(5, 20), win_score = 80) + #print + #w, d, len(tt.d) # the previous line prints -1, 16 which shows that if the # computer plays second with an AI depth of 16 (or 15) it will # always win in 16 (total) moves or less. # Now let's play (and lose !) against the AI - ai = Negamax(16, tt = TT()) - game = Nim([Human_Player(), AI_Player(tt)]) + dict_tt = DictTT() + ai1 = Negamax(1024, tt = TT()) + ai2 = SSS(10, tt = TT(dict_tt)) + game = Nim([AI_Player(ai1), AI_Player(ai2)]) game.play() # You will always lose this game ! print("player %d wins" % game.nplayer) From 6c6188ac35d0a3d4479221e8553b45e9e50a4335 Mon Sep 17 00:00:00 2001 From: mrfesol Date: Tue, 2 Jun 2015 16:10:24 +0200 Subject: [PATCH 2/5] MTD-f algorithm --- easyAI/AI/DUAL.py | 4 +- easyAI/AI/DictTT.py | 12 ++++-- easyAI/AI/HashTT.py | 5 ++- easyAI/AI/MTDbi.py | 4 +- easyAI/AI/MTDf.py | 82 ++++++++++++++++++++++++++++++++++++++ easyAI/AI/MTdriver.py | 4 +- easyAI/AI/SSS.py | 4 +- easyAI/AI/__init__.py | 1 + easyAI/__init__.py | 2 +- easyAI/games/Chopsticks.py | 13 ++---- 10 files changed, 108 insertions(+), 23 deletions(-) create mode 100644 easyAI/AI/MTDf.py diff --git a/easyAI/AI/DUAL.py b/easyAI/AI/DUAL.py index 3c98f00..8bc1716 100644 --- a/easyAI/AI/DUAL.py +++ b/easyAI/AI/DUAL.py @@ -61,8 +61,8 @@ def __call__(self,game): scoring = self.scoring if self.scoring else ( lambda g: g.scoring() ) # horrible hack - first = -self.win_score #essence of DUAL algorithm - next = (lambda lowerbound, upperbound, bestValue: bestValue + 1) + first = (lambda game, tt: -self.win_score) #essence of DUAL algorithm + next = (lambda lowerbound, upperbound, bestValue, bound: bestValue + 1) self.alpha = mtd(game, first, next, diff --git a/easyAI/AI/DictTT.py b/easyAI/AI/DictTT.py index 0685c23..dfbeac5 100644 --- a/easyAI/AI/DictTT.py +++ b/easyAI/AI/DictTT.py @@ -20,7 +20,6 @@ def __init__(self, num_buckets=1024, own_hash = None): self.hash = own_hash.get_hash self.num_collisions = 0 self.num_calls = 0 - self.num_calcs = 0 self.num_lookups = 0 def hash_key(self, key): @@ -46,7 +45,8 @@ def get_slot(self, key, default=None): def get(self, key, default=None): """ Gets the value for the given key, or the default. - """ + """ + self.num_lookups += 1 i, k, v = self.get_slot(key, default=default) return v @@ -99,4 +99,10 @@ def __iter__(self): def __contains__(self, key): return self.keys.__contains__(key) - \ No newline at end of file + + def print_stats(self): + print '-'*10 + print 'Statistics of custom dictionary:' + print 'Calls of hash: ', self.num_calls + print 'Collisions: ', self.num_collisions + print 'Num lookups: ', self.num_lookups \ No newline at end of file diff --git a/easyAI/AI/HashTT.py b/easyAI/AI/HashTT.py index acdc67f..5a11eea 100644 --- a/easyAI/AI/HashTT.py +++ b/easyAI/AI/HashTT.py @@ -13,7 +13,7 @@ def before(self, key): Returns initial value of hash. It's also the place where you can initialize some auxiliary variables """ - return 0 + return 1 def after(self, key, hash): """ @@ -25,7 +25,8 @@ def get_hash(self, key, depth = 0): """ Recursively computes a hash """ - ret_hash = self.before(key) + if depth == 0: + ret_hash = self.before(key) if type(key) is int: return self.hash_int(key) if type(key) is str and len(key) <= 1: diff --git a/easyAI/AI/MTDbi.py b/easyAI/AI/MTDbi.py index d22d0c6..7392e35 100644 --- a/easyAI/AI/MTDbi.py +++ b/easyAI/AI/MTDbi.py @@ -61,8 +61,8 @@ def __call__(self,game): scoring = self.scoring if self.scoring else ( lambda g: g.scoring() ) # horrible hack - first = 0 #essence of MTDbi algorithm - next = (lambda lowerbound, upperbound, bestValue: (lowerbound + upperbound)/2) + first = (lambda game, tt: 0) #essence of MTDbi algorithm + next = (lambda lowerbound, upperbound, bestValue, bound: (lowerbound + upperbound)/2) self.alpha = mtd(game, first, next, diff --git a/easyAI/AI/MTDf.py b/easyAI/AI/MTDf.py new file mode 100644 index 0000000..b1b6004 --- /dev/null +++ b/easyAI/AI/MTDf.py @@ -0,0 +1,82 @@ +#contributed by mrfesol (Tomasz Wesolowski) + +from easyAI.AI.MTdriver import mtd + +class MTDf: + """ + This implements MTDbi algorithm. The following example shows + how to setup the AI and play a Connect Four game: + + >>> from easyAI import Human_Player, AI_Player, MTDbi + >>> AI = DUAL(7) + >>> game = ConnectFour([AI_Player(AI),Human_Player()]) + >>> game.play() + + Parameters + ----------- + + depth: + How many moves in advance should the AI think ? + (2 moves = 1 complete turn) + + scoring: + A function f(game)-> score. If no scoring is provided + and the game object has a ``scoring`` method it ill be used. + + win_score: + Score LARGER than the largest score of game, but smaller than inf. + It's required to run algorithm. + + tt: + A transposition table (a table storing game states and moves) + scoring: can be none if the game that the AI will be given has a + ``scoring`` method. + + Notes + ----- + + The score of a given game is given by + + >>> scoring(current_game) - 0.01*sign*current_depth + + for instance if a lose is -100 points, then losing after 4 moves + will score -99.96 points but losing after 8 moves will be -99.92 + points. Thus, the AI will chose the move that leads to defeat in + 8 turns, which makes it more difficult for the (human) opponent. + This will not always work if a ``win_score`` argument is provided. + + """ + + def __init__(self, depth, scoring=None, win_score=100000, tt=None): + self.scoring = scoring + self.depth = depth + self.tt = tt + self.win_score= win_score + + @staticmethod + def first(game, tt): + lookup = None if (tt is None) else tt.lookup(game) + if lookup == None: + return 0 + lowerbound, upperbound = lookup['lowerbound'], lookup['upperbound'] + return (lowerbound+upperbound)/2 + + def __call__(self,game): + """ + Returns the AI's best move given the current state of the game. + """ + + scoring = self.scoring if self.scoring else ( + lambda g: g.scoring() ) # horrible hack + + + first = MTDf.first #essence of MTDf algorithm + next = (lambda lowerbound, upperbound, bestValue, bound: bestValue if bestValue < bound else bestValue + 1) + + self.alpha = mtd(game, + first, next, + self.depth, + scoring, + self.tt) + + return game.ai_move diff --git a/easyAI/AI/MTdriver.py b/easyAI/AI/MTdriver.py index 3db44c5..cd9d2b8 100644 --- a/easyAI/AI/MTdriver.py +++ b/easyAI/AI/MTdriver.py @@ -92,10 +92,10 @@ def mtd(game, first, next, depth, scoring, tt = None): For more details read following paper: http://arxiv.org/ftp/arxiv/papers/1404/1404.1515.pdf """ - bound, best_value = first, first + bound, best_value = first(game, tt), first(game, tt) lowerbound, upperbound = -inf, inf while True: - bound = next(lowerbound, upperbound, best_value) + bound = next(lowerbound, upperbound, best_value, bound) best_value = mt(game, bound - eps, depth, depth, scoring, tt) if best_value < bound: upperbound = best_value diff --git a/easyAI/AI/SSS.py b/easyAI/AI/SSS.py index 2c70eb1..41a5d5a 100644 --- a/easyAI/AI/SSS.py +++ b/easyAI/AI/SSS.py @@ -61,8 +61,8 @@ def __call__(self,game): scoring = self.scoring if self.scoring else ( lambda g: g.scoring() ) # horrible hack - first = self.win_score #essence of SSS algorithm - next = (lambda lowerbound, upperbound, bestValue: bestValue) + first = (lambda game, tt: self.win_score) #essence of SSS algorithm + next = (lambda lowerbound, upperbound, bestValue, bound: bestValue) self.alpha = mtd(game, first, next, diff --git a/easyAI/AI/__init__.py b/easyAI/AI/__init__.py index d2eb06e..6271b08 100644 --- a/easyAI/AI/__init__.py +++ b/easyAI/AI/__init__.py @@ -5,5 +5,6 @@ from .SSS import SSS from .DUAL import DUAL from .MTDbi import MTDbi +from .MTDf import MTDf from .HashTT import HashTT from .DictTT import DictTT \ No newline at end of file diff --git a/easyAI/__init__.py b/easyAI/__init__.py index baaf60e..e6f9ba6 100644 --- a/easyAI/__init__.py +++ b/easyAI/__init__.py @@ -6,5 +6,5 @@ from .AI import Negamax, id_solve, df_solve from .AI import TT from .AI import mtd -from .AI import SSS, DUAL, MTDbi +from .AI import SSS, DUAL, MTDbi, MTDf from .AI import HashTT, DictTT \ No newline at end of file diff --git a/easyAI/games/Chopsticks.py b/easyAI/games/Chopsticks.py index a24c72b..d15a9ce 100644 --- a/easyAI/games/Chopsticks.py +++ b/easyAI/games/Chopsticks.py @@ -121,19 +121,14 @@ def back_to_startstate(self, move): return hands_min == 1 and hands_max == 1 if __name__ == "__main__": - from easyAI import Negamax, AI_Player, SSS, DUAL, MTDbi + from easyAI import Negamax, AI_Player, SSS, DUAL, MTDbi, MTDf from easyAI.AI.TT import TT dict_tt = DictTT(32) ai_algo_sss = SSS(6, tt=TT(dict_tt)) ai_algo_neg = Negamax(6, tt=TT(dict_tt)) ai_algo_bi = MTDbi(6, tt=TT(dict_tt)) + ai_algo_f = MTDf(5, tt=TT(dict_tt)) #ai_algo_dual = DUAL(4, tt=TT(dict_tt)) - Chopsticks( [AI_Player(Negamax(6)),AI_Player(ai_algo_bi)]).play() #first player never wins - - print '-'*10 - print 'Statistics of custom dictionary:' - print 'Calls of hash: ', dict_tt.num_calls - print 'Collisions: ', dict_tt.num_collisions - print 'Num of calculations: ', dict_tt.num_calcs - print 'Num lookups: ', dict_tt.num_lookups \ No newline at end of file + Chopsticks( [AI_Player(Negamax(5)),AI_Player(ai_algo_f)]).play() #first player never wins + dict_tt.print_stats() \ No newline at end of file From e4d2c3db8740b0e4f7b477a2c667b8fe952b95f2 Mon Sep 17 00:00:00 2001 From: mrfesol Date: Tue, 2 Jun 2015 16:17:19 +0200 Subject: [PATCH 3/5] MTD-step algorithm --- easyAI/AI/MTDbi.py | 4 +- easyAI/AI/MTDf.py | 6 +-- easyAI/AI/MTDstep.py | 78 ++++++++++++++++++++++++++++++++++++++ easyAI/AI/__init__.py | 1 + easyAI/__init__.py | 2 +- easyAI/games/Chopsticks.py | 5 ++- 6 files changed, 88 insertions(+), 8 deletions(-) create mode 100644 easyAI/AI/MTDstep.py diff --git a/easyAI/AI/MTDbi.py b/easyAI/AI/MTDbi.py index 7392e35..49c31b3 100644 --- a/easyAI/AI/MTDbi.py +++ b/easyAI/AI/MTDbi.py @@ -4,11 +4,11 @@ class MTDbi: """ - This implements MTDbi algorithm. The following example shows + This implements MTD-bi algorithm. The following example shows how to setup the AI and play a Connect Four game: >>> from easyAI import Human_Player, AI_Player, MTDbi - >>> AI = DUAL(7) + >>> AI = MTDbi(7) >>> game = ConnectFour([AI_Player(AI),Human_Player()]) >>> game.play() diff --git a/easyAI/AI/MTDf.py b/easyAI/AI/MTDf.py index b1b6004..e9bb746 100644 --- a/easyAI/AI/MTDf.py +++ b/easyAI/AI/MTDf.py @@ -4,11 +4,11 @@ class MTDf: """ - This implements MTDbi algorithm. The following example shows + This implements MTD-f algorithm. The following example shows how to setup the AI and play a Connect Four game: - >>> from easyAI import Human_Player, AI_Player, MTDbi - >>> AI = DUAL(7) + >>> from easyAI import Human_Player, AI_Player, MTDf + >>> AI = MTDf(7) >>> game = ConnectFour([AI_Player(AI),Human_Player()]) >>> game.play() diff --git a/easyAI/AI/MTDstep.py b/easyAI/AI/MTDstep.py new file mode 100644 index 0000000..260916c --- /dev/null +++ b/easyAI/AI/MTDstep.py @@ -0,0 +1,78 @@ +#contributed by mrfesol (Tomasz Wesolowski) + +from easyAI.AI.MTdriver import mtd + +class MTDstep: + """ + This implements MTD-step algorithm. The following example shows + how to setup the AI and play a Connect Four game: + + >>> from easyAI import Human_Player, AI_Player, MTDstep + >>> AI = MTDstep(7) + >>> game = ConnectFour([AI_Player(AI),Human_Player()]) + >>> game.play() + + Parameters + ----------- + + depth: + How many moves in advance should the AI think ? + (2 moves = 1 complete turn) + + scoring: + A function f(game)-> score. If no scoring is provided + and the game object has a ``scoring`` method it ill be used. + + win_score: + Score LARGER than the largest score of game, but smaller than inf. + It's required to run algorithm. + + tt: + A transposition table (a table storing game states and moves) + scoring: can be none if the game that the AI will be given has a + ``scoring`` method. + + step_size: + Size of jump from one bound to next + + Notes + ----- + + The score of a given game is given by + + >>> scoring(current_game) - 0.01*sign*current_depth + + for instance if a lose is -100 points, then losing after 4 moves + will score -99.96 points but losing after 8 moves will be -99.92 + points. Thus, the AI will chose the move that leads to defeat in + 8 turns, which makes it more difficult for the (human) opponent. + This will not always work if a ``win_score`` argument is provided. + + """ + + def __init__(self, depth, scoring=None, win_score=100000, tt=None, step_size = 100): + self.scoring = scoring + self.depth = depth + self.tt = tt + self.win_score = win_score + self.step_size = step_size + + def __call__(self,game): + """ + Returns the AI's best move given the current state of the game. + """ + + scoring = self.scoring if self.scoring else ( + lambda g: g.scoring() ) # horrible hack + + + first = (lambda game, tt: self.win_score) + next = (lambda lowerbound, upperbound, bestValue, bound: max(lowerbound + 1, bestValue - self.step_size)) + + self.alpha = mtd(game, + first, next, + self.depth, + scoring, + self.tt) + + return game.ai_move diff --git a/easyAI/AI/__init__.py b/easyAI/AI/__init__.py index 6271b08..895c0b4 100644 --- a/easyAI/AI/__init__.py +++ b/easyAI/AI/__init__.py @@ -6,5 +6,6 @@ from .DUAL import DUAL from .MTDbi import MTDbi from .MTDf import MTDf +from .MTDstep import MTDstep from .HashTT import HashTT from .DictTT import DictTT \ No newline at end of file diff --git a/easyAI/__init__.py b/easyAI/__init__.py index e6f9ba6..1b6e02c 100644 --- a/easyAI/__init__.py +++ b/easyAI/__init__.py @@ -6,5 +6,5 @@ from .AI import Negamax, id_solve, df_solve from .AI import TT from .AI import mtd -from .AI import SSS, DUAL, MTDbi, MTDf +from .AI import SSS, DUAL, MTDbi, MTDf, MTDstep from .AI import HashTT, DictTT \ No newline at end of file diff --git a/easyAI/games/Chopsticks.py b/easyAI/games/Chopsticks.py index d15a9ce..be6005d 100644 --- a/easyAI/games/Chopsticks.py +++ b/easyAI/games/Chopsticks.py @@ -121,7 +121,7 @@ def back_to_startstate(self, move): return hands_min == 1 and hands_max == 1 if __name__ == "__main__": - from easyAI import Negamax, AI_Player, SSS, DUAL, MTDbi, MTDf + from easyAI import Negamax, AI_Player, SSS, DUAL, MTDbi, MTDf, MTDstep from easyAI.AI.TT import TT dict_tt = DictTT(32) @@ -129,6 +129,7 @@ def back_to_startstate(self, move): ai_algo_neg = Negamax(6, tt=TT(dict_tt)) ai_algo_bi = MTDbi(6, tt=TT(dict_tt)) ai_algo_f = MTDf(5, tt=TT(dict_tt)) + ai_algo_step = MTDstep(5, tt=TT(dict_tt)) #ai_algo_dual = DUAL(4, tt=TT(dict_tt)) - Chopsticks( [AI_Player(Negamax(5)),AI_Player(ai_algo_f)]).play() #first player never wins + Chopsticks( [AI_Player(Negamax(5)),AI_Player(ai_algo_step)]).play() #first player never wins dict_tt.print_stats() \ No newline at end of file From 5573fb939d277c9acd28514ac6d0eabf73410efb Mon Sep 17 00:00:00 2001 From: mrfesol Date: Mon, 8 Jun 2015 18:15:21 +0200 Subject: [PATCH 4/5] Introducing Monte Carlo Tree Search algorithm --- easyAI/AI/MCTS.py | 125 +++++++++++++++++++++++++++++++++++++ easyAI/AI/__init__.py | 3 +- easyAI/__init__.py | 2 +- easyAI/games/Chopsticks.py | 2 +- easyAI/games/Nim.py | 7 ++- 5 files changed, 134 insertions(+), 5 deletions(-) create mode 100644 easyAI/AI/MCTS.py diff --git a/easyAI/AI/MCTS.py b/easyAI/AI/MCTS.py new file mode 100644 index 0000000..b8ebb95 --- /dev/null +++ b/easyAI/AI/MCTS.py @@ -0,0 +1,125 @@ +import random +from math import sqrt, log + +class MCTS: + """ + This implements Monte Carlo Tree Search algorithm. + More information at: http://mcts.ai/index.html + The following example shows + how to setup the AI and play a Connect Four game: + + >>> from easyAI import Human_Player, AI_Player, MTDf + >>> AI = MonteCarloTreeSearch() + >>> game = ConnectFour([AI_Player(AI),Human_Player()]) + >>> game.play() + + Parameters + ----------- + + iterations: + Indicates how many iteration algorithm should perform. + Larger value = More accurate result + + max_depth: + How many moves in advance should the AI think ? + (2 moves = 1 complete turn) + + expand_factor: + Defines how much is algorithm willing to expand unvisited nodes. + Usually between 0.3 and 1.0 + + scoring: + A function f(game)-> score. If no scoring is provided + and the game object has a ``scoring`` method it ill be used. + Scoring function MUST return values from interval [0, win_score] + + win_score: + The largest score of game. + It's required to run algorithm. + + """ + + def __init__(self, iterations = 5000, winscore=100, depth = 20, expand_factor=0.3, scoring=None): + self.scoring = scoring + self.iterations = iterations + self.winscore = winscore + self.max_depth = depth + self.expand_factor = expand_factor + + def __call__(self,game): + """ + Returns the AI's best move given the current state of the game. + """ + rootnode = MCTSNode(state = game) + + scoring = self.scoring if self.scoring else ( + lambda g: g.scoring() ) # horrible hack + + for i in range(self.iterations): + node = rootnode + state = game.copy() + depth = 0 + + # Select + while node.untried == [] and node.children != []: + node = node.select_child(self.expand_factor) + state.make_move(node.move) + state.switch_player() + depth += 1 + + # Expand + if node.untried != []: + m = random.choice(node.untried) + state.make_move(m) + state.switch_player() + node = node.add_child(m,state) + + # Rollout, + while state.possible_moves() != [] and depth < self.max_depth: + state.make_move(random.choice(state.possible_moves())) + state.switch_player() + depth += 1 + + # Backpropagate + score = 1 - max(0, (scoring(state)/self.winscore)) + while node != None: + node.update(score) + node = node.parent + score = 1-score + + rootnode.children.sort(key = lambda c: c.visits) + return rootnode.children[-1].move + +class MCTSNode: + def __init__(self, move = None, parent = None, state = None): + self.move = move + self.parent = parent + self.children = [] + self.wins = 0.0 + self.visits = 0.0 + self.untried = state.possible_moves() + self.last_player = state.nopponent + + def formula(self): + return self.wins/self.visits + + def formula_exp(self): + return 0.3*sqrt(2*log(self.parent.visits)/self.visits) + + def select_child(self, expand_factor): + """ Using the UCB1 formula to select_child a child node. + """ + return sorted(self.children, key = lambda c: c.wins/c.visits + expand_factor*sqrt(2*log(self.visits)/c.visits))[-1] + + def add_child(self, m, s): + n = MCTSNode(move = m, parent = self, state = s) + self.untried.remove(m) + self.children.append(n) + return n + + def update(self, result): + self.visits += 1 + self.wins += result + + def __repr__(self): + return "[P: " + str(self.last_player) + " M:" + str(self.move) + " W/V:" + str(self.wins) + "/" + str(self.visits) + " F: " + str(self.formula()) + " F_exp: " + str(self.formula_exp()) + "]" diff --git a/easyAI/AI/__init__.py b/easyAI/AI/__init__.py index 895c0b4..8f9a43f 100644 --- a/easyAI/AI/__init__.py +++ b/easyAI/AI/__init__.py @@ -8,4 +8,5 @@ from .MTDf import MTDf from .MTDstep import MTDstep from .HashTT import HashTT -from .DictTT import DictTT \ No newline at end of file +from .DictTT import DictTT +from .MCTS import MCTS \ No newline at end of file diff --git a/easyAI/__init__.py b/easyAI/__init__.py index 1b6e02c..78efd67 100644 --- a/easyAI/__init__.py +++ b/easyAI/__init__.py @@ -6,5 +6,5 @@ from .AI import Negamax, id_solve, df_solve from .AI import TT from .AI import mtd -from .AI import SSS, DUAL, MTDbi, MTDf, MTDstep +from .AI import SSS, DUAL, MTDbi, MTDf, MTDstep, MCTS from .AI import HashTT, DictTT \ No newline at end of file diff --git a/easyAI/games/Chopsticks.py b/easyAI/games/Chopsticks.py index be6005d..60f8131 100644 --- a/easyAI/games/Chopsticks.py +++ b/easyAI/games/Chopsticks.py @@ -93,7 +93,7 @@ def scoring(self): Very simple heuristic counting 'alive' hands """ if self.lose(): - return -100 + return 0 if self.win(): return 100 alive = [0] * 2 diff --git a/easyAI/games/Nim.py b/easyAI/games/Nim.py index eb29d54..34a47e6 100644 --- a/easyAI/games/Nim.py +++ b/easyAI/games/Nim.py @@ -1,4 +1,6 @@ from easyAI import TwoPlayersGame +from easyAI.AI import MCTS +from easyAI.AI.MCTS import MCTS class Nim(TwoPlayersGame): @@ -60,9 +62,10 @@ def ttentry(self): return tuple(self.piles) #optional, speeds up AI # Now let's play (and lose !) against the AI dict_tt = DictTT() - ai1 = Negamax(1024, tt = TT()) + ai1 = Negamax(7) ai2 = SSS(10, tt = TT(dict_tt)) - game = Nim([AI_Player(ai1), AI_Player(ai2)]) + ai3 = MCTS(20000) + game = Nim([AI_Player(ai3), AI_Player(ai1)]) game.play() # You will always lose this game ! print("player %d wins" % game.nplayer) From dd321f05dc323659c6846d209260454ee47a651e Mon Sep 17 00:00:00 2001 From: mrfesol Date: Mon, 15 Jun 2015 22:36:52 +0200 Subject: [PATCH 5/5] Small improvements. --- easyAI/AI/DictTT.py | 12 ++++++------ easyAI/AI/HashTT.py | 4 ++-- easyAI/AI/Hashes.py | 2 +- easyAI/AI/MCTS.py | 11 ++++++++--- easyAI/AI/MTDf.py | 3 ++- easyAI/AI/solving.py | 2 +- easyAI/games/Chopsticks.py | 18 +++++++++--------- easyAI/games/Nim.py | 22 ++++++++++++---------- 8 files changed, 41 insertions(+), 33 deletions(-) diff --git a/easyAI/AI/DictTT.py b/easyAI/AI/DictTT.py index dfbeac5..048bc2e 100644 --- a/easyAI/AI/DictTT.py +++ b/easyAI/AI/DictTT.py @@ -16,7 +16,7 @@ def __init__(self, num_buckets=1024, own_hash = None): #self.keys = dict() self.hash = hash if own_hash != None: - own_hash.modulo = len(self.dict)-1 + own_hash.modulo = len(self.dict) self.hash = own_hash.get_hash self.num_collisions = 0 self.num_calls = 0 @@ -101,8 +101,8 @@ def __contains__(self, key): return self.keys.__contains__(key) def print_stats(self): - print '-'*10 - print 'Statistics of custom dictionary:' - print 'Calls of hash: ', self.num_calls - print 'Collisions: ', self.num_collisions - print 'Num lookups: ', self.num_lookups \ No newline at end of file + print ('-'*10) + print ('Statistics of custom dictionary:') + print ('Calls of hash: ', self.num_calls) + print ('Collisions: ', self.num_collisions) + print ('Num lookups: ', self.num_lookups) \ No newline at end of file diff --git a/easyAI/AI/HashTT.py b/easyAI/AI/HashTT.py index 5a11eea..e9b3c69 100644 --- a/easyAI/AI/HashTT.py +++ b/easyAI/AI/HashTT.py @@ -6,7 +6,7 @@ class HashTT: """ def __init__(self): - self.modulo = 1023 #default value + self.modulo = 1024 #default value def before(self, key): """ @@ -32,7 +32,7 @@ def get_hash(self, key, depth = 0): if type(key) is str and len(key) <= 1: return self.hash_char(key) for v in list(key): - ret_hash = self.join(ret_hash, self.get_hash(v, depth+1)) & self.modulo + ret_hash = self.join(ret_hash, self.get_hash(v, depth+1)) % self.modulo if depth == 0: ret_hash = self.after(key, ret_hash) return ret_hash diff --git a/easyAI/AI/Hashes.py b/easyAI/AI/Hashes.py index 1879ef8..7321109 100644 --- a/easyAI/AI/Hashes.py +++ b/easyAI/AI/Hashes.py @@ -66,7 +66,7 @@ def before(self, key): return 0 def join(self, one, two): one = (one << 4) + two; - self.g = one & 0xf0000000L; + self.g = one & 0xf0000000; if self.g != 0: one ^= self.g >> 24 diff --git a/easyAI/AI/MCTS.py b/easyAI/AI/MCTS.py index b8ebb95..baa9e97 100644 --- a/easyAI/AI/MCTS.py +++ b/easyAI/AI/MCTS.py @@ -1,3 +1,5 @@ +#contributed by mrfesol (Tomasz Wesolowski) + import random from math import sqrt, log @@ -74,7 +76,7 @@ def __call__(self,game): state.switch_player() node = node.add_child(m,state) - # Rollout, + # Rollout while state.possible_moves() != [] and depth < self.max_depth: state.make_move(random.choice(state.possible_moves())) state.switch_player() @@ -109,7 +111,8 @@ def formula_exp(self): def select_child(self, expand_factor): """ Using the UCB1 formula to select_child a child node. """ - return sorted(self.children, key = lambda c: c.wins/c.visits + expand_factor*sqrt(2*log(self.visits)/c.visits))[-1] + return sorted(self.children, key = lambda c: c.wins/c.visits + \ + expand_factor*sqrt(2*log(self.visits)/c.visits))[-1] def add_child(self, m, s): n = MCTSNode(move = m, parent = self, state = s) @@ -122,4 +125,6 @@ def update(self, result): self.wins += result def __repr__(self): - return "[P: " + str(self.last_player) + " M:" + str(self.move) + " W/V:" + str(self.wins) + "/" + str(self.visits) + " F: " + str(self.formula()) + " F_exp: " + str(self.formula_exp()) + "]" + return "[P: " + str(self.last_player) + " M:" + str(self.move) + \ + " W/V:" + str(self.wins) + "/" + str(self.visits) + " F: " + \ + str(self.formula()) + " F_exp: " + str(self.formula_exp()) + "]" diff --git a/easyAI/AI/MTDf.py b/easyAI/AI/MTDf.py index e9bb746..f42c4cf 100644 --- a/easyAI/AI/MTDf.py +++ b/easyAI/AI/MTDf.py @@ -71,7 +71,8 @@ def __call__(self,game): first = MTDf.first #essence of MTDf algorithm - next = (lambda lowerbound, upperbound, bestValue, bound: bestValue if bestValue < bound else bestValue + 1) + next = (lambda lowerbound, upperbound, bestValue, bound: bestValue + if bestValue < bound else bestValue + 1) self.alpha = mtd(game, first, next, diff --git a/easyAI/AI/solving.py b/easyAI/AI/solving.py index fcbccaf..6d9930f 100644 --- a/easyAI/AI/solving.py +++ b/easyAI/AI/solving.py @@ -78,7 +78,7 @@ def id_solve(game, ai_depths, win_score, scoring=None, result = (+1 if alpha>= win_score else ( -1 if alpha <= -win_score else 0)) - return result, depth, game.ai_move + return result, depth, game.ai_move, tt def df_solve(game, win_score, maxdepth=50, tt=None, depth=0): diff --git a/easyAI/games/Chopsticks.py b/easyAI/games/Chopsticks.py index 60f8131..999915c 100644 --- a/easyAI/games/Chopsticks.py +++ b/easyAI/games/Chopsticks.py @@ -83,9 +83,9 @@ def show(self): print("Player %d: " %(i+1)), for j in range(self.numhands): if self.hands[i][j] > 0: - print('|'*self.hands[i][j] + '\t'), + print('|'*self.hands[i][j] + '\t',) else: - print('x\t'), + print('x\t',) print('') def scoring(self): @@ -125,11 +125,11 @@ def back_to_startstate(self, move): from easyAI.AI.TT import TT dict_tt = DictTT(32) - ai_algo_sss = SSS(6, tt=TT(dict_tt)) - ai_algo_neg = Negamax(6, tt=TT(dict_tt)) - ai_algo_bi = MTDbi(6, tt=TT(dict_tt)) - ai_algo_f = MTDf(5, tt=TT(dict_tt)) - ai_algo_step = MTDstep(5, tt=TT(dict_tt)) - #ai_algo_dual = DUAL(4, tt=TT(dict_tt)) - Chopsticks( [AI_Player(Negamax(5)),AI_Player(ai_algo_step)]).play() #first player never wins + ai_algo_sss = SSS(6, tt=TT(dict_tt)) # SSS algorithm + ai_algo_neg = Negamax(6, tt=TT(dict_tt)) # Negamax algorithm + ai_algo_bi = MTDbi(6, tt=TT(dict_tt)) # MTDbi algorithm + ai_algo_f = MTDf(5, tt=TT(dict_tt)) # MTDf algorithm + ai_algo_step = MTDstep(5, tt=TT(dict_tt)) # MTDstep algorithm + ai_algo_dual = DUAL(4, tt=TT(dict_tt)) # DUAL algorithm + Chopsticks( [AI_Player(ai_algo_neg),AI_Player(ai_algo_step)]).play() dict_tt.print_stats() \ No newline at end of file diff --git a/easyAI/games/Nim.py b/easyAI/games/Nim.py index 34a47e6..12477fd 100644 --- a/easyAI/games/Nim.py +++ b/easyAI/games/Nim.py @@ -53,21 +53,23 @@ def ttentry(self): return tuple(self.piles) #optional, speeds up AI from easyAI import AI_Player, Human_Player, Negamax, id_solve, SSS, DictTT from easyAI.AI import TT # we first solve the game - #w, d, m, tt = id_solve(Nim, range(5, 20), win_score = 80) - #print - #w, d, len(tt.d) + #w, d, m, tt = id_solve(Nim, range(5, 10), win_score = 80) + #print (w, d, len(tt.d)) # the previous line prints -1, 16 which shows that if the # computer plays second with an AI depth of 16 (or 15) it will # always win in 16 (total) moves or less. # Now let's play (and lose !) against the AI - dict_tt = DictTT() - ai1 = Negamax(7) - ai2 = SSS(10, tt = TT(dict_tt)) - ai3 = MCTS(20000) - game = Nim([AI_Player(ai3), AI_Player(ai1)]) - game.play() # You will always lose this game ! - print("player %d wins" % game.nplayer) + ai_negamax = Negamax(7) + ai_mcts = MCTS(20000) # 20000 iterations + ai_mcts_weak = MCTS() # 10000 iterations (default) + game = Nim([AI_Player(ai_mcts), AI_Player(ai_negamax)]) + game.play() + print("player %d wins" % game.nplayer) #MCTS often wins + + game = Nim([AI_Player(ai_mcts_weak), AI_Player(ai_negamax)]) + game.play() + print("player %d wins" % game.nplayer) #MCTS often loses # Note that with the transposition table tt generated by id_solve # we can setup a perfect AI which doesn't have to think: