diff --git a/AI.py b/AI.py index ede434d..c49b34a 100644 --- a/AI.py +++ b/AI.py @@ -8,42 +8,15 @@ import json from ast import literal_eval from Board import Board +from Player import Player +from Value_Iteration_AI import Value_Iteration_AI import matplotlib.pyplot as plt -class Player: - """ - A class to be inherited by any class representing a checkers player. - This is used so that other functions can be written for more general use, - without worry of crashing (e.g. play_n_games). - - NOTES: - 1) Create set playerID method - """ - - def set_board(self, the_board): - """ - Sets the Board object which is known by the AI. - """ - self.board = the_board - - def game_completed(self): - """ - Should be overridden if AI implementing this class should be notified - of when a game ends, before the board is wiped. - """ - pass - - def get_next_move(self): - """ - Gets the desired next move from the AI. - """ - pass - def reward_function(state_info1, state_info2): """ Reward for transitioning from state with state_info1 to state with state_info2. - + NOTE: 1) do something better with where/how this is implemented 2) should give some kind of negative for tieing @@ -52,8 +25,8 @@ def reward_function(state_info1, state_info2): return 12 if state_info2[0] == 0 and state_info2[2] == 0: return -12 - return state_info2[0]-state_info1[0] + 2*(state_info2[2]-state_info1[2])-(state_info2[1]-state_info1[1])-2*(state_info2[3]-state_info1[3]) - + return state_info2[0] - state_info1[0] + 2 * (state_info2[2] - state_info1[2]) - ( + state_info2[1] - state_info1[1]) - 2 * (state_info2[3] - state_info1[3]) class Q_Learning_AI(Player): """ @@ -65,18 +38,18 @@ class Q_Learning_AI(Player): 2) handle the rewards function which is coded as if the function were already defined """ - - def __init__(self, the_player_id, the_learning_rate, the_discount_factor, info_location=None, the_random_move_probability=0, the_board=None): + def __init__(self, the_player_id, the_learning_rate, the_discount_factor, info_location=None, + the_random_move_probability=0, the_board=None): """ Initialize the instance variables to be stored by the AI. """ self.random_move_probability = the_random_move_probability - self.learning_rate = the_learning_rate + self.learning_rate = the_learning_rate self.discount_factor = the_discount_factor self.player_id = the_player_id self.board = the_board self.pre_last_move_state = None - self.post_last_move_state = None + self.post_last_move_state = None if not info_location is None: self.load_transition_information(info_location) else: @@ -88,39 +61,39 @@ def set_random_move_probability(self, probability): """ self.random_move_probability = probability - def set_learning_rate(self, the_learning_rate): """ Sets the learning rate for the AI. """ self.learning_rate = the_learning_rate - def get_states_from_boards_spots(self, boards_spots): """ Gets an array of tuples from the given set of board spots, each tuple representing the characteristics which define the - state the board is in. - + state the board is in. + Format of returned data: [(own_pieces, opp_pieces, own_kings, opp_kings, own_edges, own_vert_center_mass, opp_vert_center_mass), ...] """ - piece_counters = [[0,0,0,0,0,0,0] for j in range(len(boards_spots))] + piece_counters = [[0, 0, 0, 0, 0, 0, 0] for j in range(len(boards_spots))] + for k in range(len(boards_spots)): for j in range(len(boards_spots[k])): for i in range(len(boards_spots[k][j])): if boards_spots[k][j][i] != 0: - piece_counters[k][boards_spots[k][j][i]-1] = piece_counters[k][boards_spots[k][j][i]-1] + 1 - if (self.player_id and (boards_spots[k][j][i] == 1 or boards_spots[k][j][i] == 3)) or (not self.player_id and (boards_spots[k][j][i] == 2 or boards_spots[k][j][i] == 4)): - if i==0 and j%2==0: + piece_counters[k][boards_spots[k][j][i] - 1] = piece_counters[k][boards_spots[k][j][i] - 1] + 1 + if (self.player_id and (boards_spots[k][j][i] == 1 or boards_spots[k][j][i] == 3)) or ( + not self.player_id and (boards_spots[k][j][i] == 2 or boards_spots[k][j][i] == 4)): + if i == 0 and j % 2 == 0: piece_counters[k][4] = piece_counters[k][4] + 1 - elif i==3 and j%2==1: + elif i == 3 and j % 2 == 1: piece_counters[k][4] = piece_counters[k][4] + 1 - + piece_counters[k][5] = piece_counters[k][5] + j - else: + else: piece_counters[k][6] = piece_counters[k][6] + j - + if piece_counters[k][0] + piece_counters[k][2] != 0: piece_counters[k][5] = int(piece_counters[k][5] / (piece_counters[k][0] + piece_counters[k][2])) else: @@ -131,50 +104,46 @@ def get_states_from_boards_spots(self, boards_spots): piece_counters[k][6] = 0 return [tuple(counter) for counter in piece_counters] - - def get_desired_transition_between_states(self, possible_state_array, initial_transition_value=10):#%%%%%%%%%%%%%%%%%% FOR (1) + def get_desired_transition_between_states(self, possible_state_array, + initial_transition_value=10): # %%%%%%%%%%%%%%%%%% FOR (1) """ Gets the desired transition to taken for the current board configuration. If any possible transition does not exist, it will create it. """ cur_state = tuple(self.get_states_from_boards_spots([self.board.spots])[0]) done_transitions = {} - for state in possible_state_array:#%%%%%%%%%%%%%%%%%%%%%% FOR (1) + for state in possible_state_array: # %%%%%%%%%%%%%%%%%%%%%% FOR (1) if done_transitions.get((cur_state, tuple(state))) is None: if self.transitions.get((cur_state, tuple(state))) is None: - self.transitions.update({(cur_state, tuple(state)):initial_transition_value}) - done_transitions.update({(cur_state, tuple(state)):self.transitions.get((cur_state, tuple(state)))}) - - + self.transitions.update({(cur_state, tuple(state)): initial_transition_value}) + done_transitions.update({(cur_state, tuple(state)): self.transitions.get((cur_state, tuple(state)))}) + if random != 0 and random.random() < self.random_move_probability: try: - return list(done_transitions.keys())[random.randint(0, len(done_transitions)-1)] - except: + return list(done_transitions.keys())[random.randint(0, len(done_transitions) - 1)] + except: return [] - + try: - reverse_dict = {j:i for i,j in done_transitions.items()} + reverse_dict = {j: i for i, j in done_transitions.items()} return reverse_dict.get(max(reverse_dict)) except: - return [] - - + return [] + def game_completed(self): """ Update self.transitions with a completed game before the board is cleared. """ cur_state = self.get_states_from_boards_spots([self.board.spots])[0] - transition = (self.pre_last_move_state ,self.post_last_move_state) + transition = (self.pre_last_move_state, self.post_last_move_state) - self.transitions[transition] = self.transitions[transition] + self.learning_rate * reward_function(transition[0],cur_state) + self.transitions[transition] = self.transitions[transition] + self.learning_rate * reward_function(transition[0], cur_state) self.pre_last_move_state = None self.post_last_move_state = None - - def get_transitions_information(self): """ Get an array of of information about the dictionary self.transitions . @@ -188,48 +157,45 @@ def get_transitions_information(self): max_value = float("-inf") min_value = float("inf") total_value = 0 - for k,v in self.transitions.items(): + for k, v in self.transitions.items(): if start_of_transitions.get(k[0]) is None: - start_of_transitions.update({k[0]:0}) - #if k[0] not in start_of_transitions: - #start_of_transitions.append(k[0]) + start_of_transitions.update({k[0]: 0}) + # if k[0] not in start_of_transitions: + # start_of_transitions.append(k[0]) if v > max_value: max_value = v if v < min_value: min_value = v total_value = total_value + v - - return [len(self.transitions), len(start_of_transitions), float(total_value/len(self.transitions)), max_value, min_value] - - + + return [len(self.transitions), len(start_of_transitions), float(total_value / len(self.transitions)), max_value, + min_value] + def print_transition_information(self, info): """ Prints the output of get_transitions_information in a easy to understand format. """ - print("Total number of transitions: ".ljust(35), info[0]) + print("Total number of transitions: ".ljust(35), info[0]) print("Total number of visited states: ".ljust(35), info[1]) print("Average value for transition: ".ljust(35), info[2]) print("Maximum value for transition: ".ljust(35), info[3]) print("Minimum value for transition: ".ljust(35), info[4]) - - + def save_transition_information(self, file_name="data.json"): """ Saves the current transitions information to a specified json file. """ with open(file_name, 'w') as fp: - json.dump({str(k): v for k,v in self.transitions.items()}, fp) - - + json.dump({str(k): v for k, v in self.transitions.items()}, fp) + def load_transition_information(self, file_name): """ Loads transitions information from a desired json file. """ with open(file_name, 'r') as fp: - self.transitions = {literal_eval(k): v for k,v in json.load(fp).items()} - - + self.transitions = {literal_eval(k): v for k, v in json.load(fp).items()} + def get_optimal_potential_value(self, depth): """ Look ahead a given number of moves and return the maximal value associated @@ -251,17 +217,15 @@ def get_optimal_potential_value(self, depth): """ answer = float("-inf") cur_state = self.get_states_from_boards_spots([self.board.spots])[0] - for k,v in self.transitions.items(): + for k, v in self.transitions.items(): if v > answer and k[0] == cur_state: answer = v - + if answer == float("-inf"): return None return answer - - - def get_next_move(self):#, new_board): + def get_next_move(self): # , new_board): """ NOTES: If the variable names are confusing, think about them being named when you just call the method. @@ -269,72 +233,50 @@ def get_next_move(self):#, new_board): PRECONDITIONS: 1) The board exists and is legal """ - if self.pre_last_move_state is not None:#%%%%%%%%%%%%%%%%%%%%%%% FOR (1) + if self.pre_last_move_state is not None: # %%%%%%%%%%%%%%%%%%%%%%% FOR (1) cur_state = self.get_states_from_boards_spots([self.board.spots])[0] - - transition = (self.pre_last_move_state ,self.post_last_move_state) - try:# self.transitions.get(transition) is not None:#%%%%%%%%%%%%%%%%%%%%%%%%%%%% FOR (1) + + transition = (self.pre_last_move_state, self.post_last_move_state) + try: # self.transitions.get(transition) is not None:#%%%%%%%%%%%%%%%%%%%%%%%%%%%% FOR (1) max_future_state = self.get_optimal_potential_value(1) - self.transitions[transition] = self.transitions[transition] + self.learning_rate * (reward_function(transition[0],cur_state)+ self.discount_factor* max_future_state - self.transitions[transition]) - except:#%%%%%%%%%%%%%%%%%%%%%%%%%%%% FOR (1) - self.transitions[transition] = self.transitions[transition] + self.learning_rate * (reward_function(transition[0],cur_state)) - - - self.pre_last_move_state = self.get_states_from_boards_spots([self.board.spots])[0]#%%%%%%%%%%%%%%%%%%%%%%%%%%%% FOR (1) - + self.transitions[transition] = self.transitions[transition] + self.learning_rate * (reward_function(transition[0], cur_state) + self.discount_factor * max_future_state - self.transitions[transition]) + except: # %%%%%%%%%%%%%%%%%%%%%%%%%%%% FOR (1) + self.transitions[transition] = self.transitions[transition] + self.learning_rate * (reward_function(transition[0], cur_state)) + + self.pre_last_move_state = self.get_states_from_boards_spots([self.board.spots])[0] # %%%%%%%%%%%% FOR (1) + possible_next_moves = self.board.get_possible_next_moves() possible_next_states = self.get_states_from_boards_spots(self.board.get_potential_spots_from_moves(possible_next_moves)) - - self.post_last_move_state = self.get_desired_transition_between_states(possible_next_states)[1] - + + self.post_last_move_state = self.get_desired_transition_between_states(possible_next_states)[1] + considered_moves = [] for j in range(len(possible_next_states)): if tuple(possible_next_states[j]) == self.post_last_move_state: considered_moves.append(possible_next_moves[j]) - - - #I believe with the updated board.is_game_over() I don't need to use this try statement -# try: -# return considered_moves[random.randint(0,len(considered_moves)-1)] -# except ValueError: -# return [] - - return considered_moves[random.randint(0,len(considered_moves)-1)] -def get_number_of_pieces_and_kings(spots, player_id=None): - """ - Gets the number of pieces and the number of kings that each player has on the current - board configuration represented in the given spots. The format of the function with defaults is: - [P1_pieces, P2_pieces, P1_kings, P2_kings] - and if given a player_id: - [player_pieces, player_kings] - """ - piece_counter = [0,0,0,0] - for row in spots: - for element in row: - if element != 0: - piece_counter[element-1] = piece_counter[element-1] + 1 - - if player_id == True: - return [piece_counter[0], piece_counter[2]] - elif player_id == False: - return [piece_counter[1], piece_counter[3]] - else: - return piece_counter - + + # I believe with the updated board.is_game_over() I don't need to use this try statement + # try: + # return considered_moves[random.randint(0,len(considered_moves)-1)] + # except ValueError: + # return [] + + return considered_moves[random.randint(0, len(considered_moves) - 1)] + class Alpha_beta(Player): """ - A class representing a checkers playing AI using Alpha-Beta pruning. - + A class representing a checkers playing AI using Alpha-Beta pruning. + TO DO: - 1) Be able to take in any reward function (for when not win/loss) + 1) Be able to take in any reward function (for when not win/loss) so that you can make a more robust set of training AI """ - + def __init__(self, the_player_id, the_depth, the_board=None): """ - Initialize the instance variables to be stored by the AI. + Initialize the instance variables to be stored by the AI. """ self.board = the_board self.depth = the_depth @@ -342,17 +284,17 @@ def __init__(self, the_player_id, the_depth, the_board=None): def alpha_beta(self, board, depth, alpha, beta, maximizing_player): """ - A method implementing alpha-beta pruning to decide what move to make given - the current board configuration. + A method implementing alpha-beta pruning to decide what move to make given + the current board configuration. """ if board.is_game_over(): - if get_number_of_pieces_and_kings(board.spots, board.player_turn) == [0,0]: + if get_number_of_pieces_and_kings(board.spots, board.player_turn) == [0, 0]: if maximizing_player: - #Using integers instead of float("inf") so it's less than float("inf") not equal to + # Using integers instead of float("inf") so it's less than float("inf") not equal to return -10000000, None else: return 10000000, None - elif get_number_of_pieces_and_kings(board.spots, not board.player_turn) == [0,0]: + elif get_number_of_pieces_and_kings(board.spots, not board.player_turn) == [0, 0]: if maximizing_player: return 1000000, None else: @@ -363,8 +305,8 @@ def alpha_beta(self, board, depth, alpha, beta, maximizing_player): if depth == 0: players_info = get_number_of_pieces_and_kings(board.spots) if board.player_turn != maximizing_player: - return players_info[1] + 2 * players_info[3] - (players_info[0] + 2 * players_info[2]), None - return players_info[0] + 2 * players_info[2] - (players_info[1] + 2 * players_info[3]), None + return players_info[1] + 2 * players_info[3] - (players_info[0] + 2 * players_info[2]), None + return players_info[0] + 2 * players_info[2] - (players_info[1] + 2 * players_info[3]), None possible_moves = board.get_possible_next_moves() potential_spots = board.get_potential_spots_from_moves(possible_moves) @@ -374,11 +316,11 @@ def alpha_beta(self, board, depth, alpha, beta, maximizing_player): for j in range(len(potential_spots)): cur_board = Board(potential_spots[j], not board.player_turn) alpha_beta_results = self.alpha_beta(cur_board, depth - 1, alpha, beta, False) - if v < alpha_beta_results[0]: + if v < alpha_beta_results[0]: v = alpha_beta_results[0] alpha = max(alpha, v) desired_move_index = j - if beta <= alpha: + if beta <= alpha: break if desired_move_index is None: return v, None @@ -388,7 +330,7 @@ def alpha_beta(self, board, depth, alpha, beta, maximizing_player): for j in range(len(potential_spots)): cur_board = Board(potential_spots[j], not board.player_turn) alpha_beta_results = self.alpha_beta(cur_board, depth - 1, alpha, beta, True) - if v > alpha_beta_results[0]: + if v > alpha_beta_results[0]: v = alpha_beta_results[0] desired_move_index = j beta = min(beta, v) @@ -397,10 +339,10 @@ def alpha_beta(self, board, depth, alpha, beta, maximizing_player): if desired_move_index is None: return v, None return v, possible_moves[desired_move_index] - + def get_next_move(self): return self.alpha_beta(self.board, self.depth, float('-inf'), float('inf'), self.player_id)[1] - + def play_n_games(player1, player2, num_games, move_limit): """ @@ -417,20 +359,25 @@ def play_n_games(player1, player2, num_games, move_limit): game_board = Board() player1.set_board(game_board) player2.set_board(game_board) - + players_move = player1 - outcome_counter = [[-1,-1,-1,-1,-1,-1] for j in range(num_games)] + outcome_counter = [[-1, -1, -1, -1, -1, -1] for j in range(num_games)] for j in range(num_games): - #print(j) + # print(j) move_counter = 0 while not game_board.is_game_over() and move_counter < move_limit: game_board.make_move(players_move.get_next_move()) - + move_counter = move_counter + 1 if players_move is player1: + value = players_move.value_iteration() + print(value) + # print(players_move.calculate_policy(value)) players_move = player2 else: players_move = player1 + + print(game_board.print_board()) else: piece_counter = get_number_of_pieces_and_kings(game_board.spots) if piece_counter[0] != 0 or piece_counter[2] != 0: @@ -439,28 +386,28 @@ def play_n_games(player1, player2, num_games, move_limit): outcome_counter[j][0] = 3 else: outcome_counter[j][0] = 2 -# if (j+1)%100==0: -# print("Tie game for game #" + str(j + 1) + " in " + str(move_counter) + " turns!") + # if (j+1)%100==0: + # print("Tie game for game #" + str(j + 1) + " in " + str(move_counter) + " turns!") else: outcome_counter[j][0] = 0 -# if (j+1)%100==0: -# print("Player 1 won game #" + str(j + 1) + " in " + str(move_counter) + " turns!") + # if (j+1)%100==0: + # print("Player 1 won game #" + str(j + 1) + " in " + str(move_counter) + " turns!") else: outcome_counter[j][0] = 1 -# if (j+1)%100==0: -# print("Player 2 won game #" + str(j + 1) + " in " + str(move_counter) + " turns!") - + # if (j+1)%100==0: + # print("Player 2 won game #" + str(j + 1) + " in " + str(move_counter) + " turns!") + outcome_counter[j][1] = move_counter outcome_counter[j][2] = piece_counter[0] outcome_counter[j][3] = piece_counter[1] outcome_counter[j][4] = piece_counter[2] outcome_counter[j][5] = piece_counter[3] - + player1.game_completed() player2.game_completed() - #game_board.print_board() + # game_board.print_board() game_board.reset_board() - + return outcome_counter @@ -472,7 +419,7 @@ def pretty_outcome_display(outcomes): 1) Add functionality for pieces in each game 2) Add ability to take other strings for AI info and display it """ - game_wins = [0,0,0,0] + game_wins = [0, 0, 0, 0] total_moves = 0 max_moves_made = float("-inf") min_moves_made = float("inf") @@ -482,58 +429,78 @@ def pretty_outcome_display(outcomes): min_moves_made = outcome[1] if outcome[1] > max_moves_made: max_moves_made = outcome[1] - + game_wins[outcome[0]] = game_wins[outcome[0]] + 1 - + print("Games Played: ".ljust(35), len(outcomes)) print("Player 1 wins: ".ljust(35), game_wins[0]) print("Player 2 wins: ".ljust(35), game_wins[1]) print("Games exceeded move limit: ".ljust(35), game_wins[3]) print("Games tied: ".ljust(35), game_wins[2]) - print("Total moves made: ".ljust(35), total_moves) - print("Average moves made: ".ljust(35), total_moves/len(outcomes)) + print("Total moves made: ".ljust(35), total_moves) + print("Average moves made: ".ljust(35), total_moves / len(outcomes)) print("Max moves made: ".ljust(35), max_moves_made) print("Min moves made: ".ljust(35), min_moves_made) - + def plot_end_game_information(outcome, interval, title="End of Game Results"): """ """ - player1_wins = [0 for _ in range(int(len(outcome)/interval))] - player2_wins = [0 for _ in range(int(len(outcome)/interval))] - ties = [0 for _ in range(int(len(outcome)/interval))] - move_limit = [0 for _ in range(int(len(outcome)/interval))] - - for j in range(int(len(outcome)/interval)): + player1_wins = [0 for _ in range(int(len(outcome) / interval))] + player2_wins = [0 for _ in range(int(len(outcome) / interval))] + ties = [0 for _ in range(int(len(outcome) / interval))] + move_limit = [0 for _ in range(int(len(outcome) / interval))] + + for j in range(int(len(outcome) / interval)): for i in range(interval): - if outcome[j*interval + i][0] == 0: + if outcome[j * interval + i][0] == 0: player1_wins[j] = player1_wins[j] + 1 - elif outcome[j*interval + i][0] == 1: + elif outcome[j * interval + i][0] == 1: player2_wins[j] = player2_wins[j] + 1 - elif outcome[j*interval + i][0] == 2: + elif outcome[j * interval + i][0] == 2: ties[j] = ties[j] + 1 else: move_limit[j] = move_limit[j] + 1 - + plt.figure(title) - - p1_win_graph, = plt.plot(player1_wins, label = "Player 1 wins") - p2_win_graph, = plt.plot(player2_wins, label = "Player 2 wins") - tie_graph, = plt.plot(ties, label = "Ties") - move_limit_graph, = plt.plot(move_limit, label = "Move limit reached") - - plt.ylabel("Occurance per " +str(interval) + " games") + + p1_win_graph, = plt.plot(player1_wins, label="Player 1 wins") + p2_win_graph, = plt.plot(player2_wins, label="Player 2 wins") + tie_graph, = plt.plot(ties, label="Ties") + move_limit_graph, = plt.plot(move_limit, label="Move limit reached") + + plt.ylabel("Occurance per " + str(interval) + " games") plt.xlabel("Interval") - + plt.legend(handles=[p1_win_graph, p2_win_graph, tie_graph, move_limit_graph]) +def get_number_of_pieces_and_kings(spots, player_id=None): + """ + Gets the number of pieces and the number of kings that each player has on the current + board configuration represented in the given spots. The format of the function with defaults is: + [P1_pieces, P2_pieces, P1_kings, P2_kings] + and if given a player_id: + [player_pieces, player_kings] + """ + piece_counter = [0, 0, 0, 0] + for row in spots: + for element in row: + if element != 0: + piece_counter[element - 1] = piece_counter[element - 1] + 1 + + if player_id == True: + return [piece_counter[0], piece_counter[2]] + elif player_id == False: + return [piece_counter[1], piece_counter[3]] + else: + return piece_counter + - -LEARNING_RATE = .005 +LEARNING_RATE = .005 DISCOUNT_FACTOR = .3 NUM_GAMES_TO_TRAIN = 100 -NUM_TRAINING_ROUNDS = 25 +NUM_TRAINING_ROUNDS = 1 NUM_VALIDATION_GAMES = 5 NUM_GAMES_TO_TEST = 0 TRAINING_RANDOM_MOVE_PROBABILITY = .25 @@ -541,40 +508,43 @@ def plot_end_game_information(outcome, interval, title="End of Game Results"): TRAINING_MOVE_LIMIT = 500 VALIDATION_MOVE_LIMIT = 1000 TESTING_MOVE_LIMIT = 2000 -PLAYER1 = Q_Learning_AI(True, LEARNING_RATE, DISCOUNT_FACTOR, the_random_move_probability=TRAINING_RANDOM_MOVE_PROBABILITY)#, info_location="data.json") -PLAYER2 = Alpha_beta(False, ALPHA_BETA_DEPTH) -#PLAYER3 = Alpha_beta(False, 1) -PLAYER4 = Alpha_beta(False, 3) +# PLAYER1 = Q_Learning_AI(True, LEARNING_RATE, DISCOUNT_FACTOR, +# the_random_move_probability=TRAINING_RANDOM_MOVE_PROBABILITY) # , info_location="data.json") +PLAYER2 = Alpha_beta(False, 2, ALPHA_BETA_DEPTH) +# PLAYER3 = Alpha_beta(False, 1) +# PLAYER4 = Alpha_beta(False, 2, 3) # PLAYER5 = Q_Learning_AI(False, LEARNING_RATE, DISCOUNT_FACTOR, the_random_move_probability=TRAINING_RANDOM_MOVE_PROBABILITY) - - -#PLAYER1.print_transition_information(PLAYER1.get_transitions_information()) - -training_info = [] -validation_info = [] -for j in range(NUM_TRAINING_ROUNDS): - training_info.extend(play_n_games(PLAYER1, PLAYER2, NUM_GAMES_TO_TRAIN, TRAINING_MOVE_LIMIT)) - PLAYER1.print_transition_information(PLAYER1.get_transitions_information()) - PLAYER1.set_random_move_probability(0) - PLAYER1.set_learning_rate(0) - validation_info.extend(play_n_games(PLAYER1, PLAYER4, NUM_VALIDATION_GAMES, VALIDATION_MOVE_LIMIT)) - print("Round " + str(j+1) + " completed!") - PLAYER1.set_random_move_probability(TRAINING_RANDOM_MOVE_PROBABILITY) - PLAYER1.set_learning_rate(LEARNING_RATE) - #print("") - #PLAYER1.print_transition_information(PLAYER1.get_transitions_information()) +PLAYER1 = Value_Iteration_AI(opponent=PLAYER2) +# PLAYER2 = Value_Iteration_AI(player_id=2) + +# PLAYER1.print_transition_information(PLAYER1.get_transitions_information()) + +def run(): + training_info = [] + validation_info = [] + for j in range(NUM_TRAINING_ROUNDS): + training_info.extend(play_n_games(PLAYER1, PLAYER2, NUM_GAMES_TO_TRAIN, TRAINING_MOVE_LIMIT)) + # PLAYER1.print_transition_information(PLAYER1.get_transitions_information()) + # PLAYER1.set_random_move_probability(0) + # PLAYER1.set_learning_rate(0) + # validation_info.extend(play_n_games(PLAYER1, PLAYER2, NUM_VALIDATION_GAMES, VALIDATION_MOVE_LIMIT)) + print("Round " + str(j + 1) + " completed!") + # PLAYER1.set_random_move_probability(TRAINING_RANDOM_MOVE_PROBABILITY) + # PLAYER1.set_learning_rate(LEARNING_RATE) + # print("") + # PLAYER1.print_transition_information(PLAYER1.get_transitions_information()) + print("") + # PLAYER1.save_transition_information() + + # plot_end_game_information(training_info, 200, "Training Information") + # plot_end_game_information(validation_info, NUM_VALIDATION_GAMES, "Validation Information") + plt.show() + + pretty_outcome_display(training_info) print("") - PLAYER1.save_transition_information() - - -#plot_end_game_information(training_info, 200, "Training Information") -#plot_end_game_information(validation_info, NUM_VALIDATION_GAMES, "Validation Information") -plt.show() - -pretty_outcome_display(training_info) -print("") -pretty_outcome_display(validation_info) - + pretty_outcome_display(validation_info) + + """ PLAYER1.set_random_move_probability(0) @@ -588,6 +558,5 @@ def plot_end_game_information(outcome, interval, title="End of Game Results"): PLAYER1.print_transition_information(PLAYER1.get_transitions_information()) """ - -PLAYER1.save_transition_information() +# PLAYER1.save_transition_information() diff --git a/Board.py b/Board.py index c9766f5..d5c133d 100644 --- a/Board.py +++ b/Board.py @@ -7,11 +7,11 @@ -if self.player_turn == True then it is player 1's turn """ - import math import copy from functools import reduce + class Board: """ A class to represent and play an 8x8 game of checkers. @@ -22,40 +22,42 @@ class Board: P1_K = 3 P2_K = 4 BACKWARDS_PLAYER = P2 - HEIGHT = 8 - WIDTH = 4 - - - def __init__(self, old_spots=None, the_player_turn=True): + + def __init__(self, height=8, width=4, old_spots=None, the_player_turn=True): """ Initializes a new instance of the Board class. Unless specified otherwise, the board will be created with a start board configuration. NOTE: Maybe have default parameter so board is 8x8 by default but nxn if wanted. + :type height: int + :type width: int """ - self.player_turn = the_player_turn - if old_spots is None: - self.spots = [[j, j, j, j] for j in [self.P1, self.P1, self.P1, self.EMPTY_SPOT, self.EMPTY_SPOT, self.P2, self.P2, self.P2]] + self.player_turn = the_player_turn + self.HEIGHT = 8 + self.WIDTH = 4 + + if old_spots is None: + self.spots = [[j, j, j, j] for j in + [self.P1, self.P1, self.P1, self.EMPTY_SPOT, self.EMPTY_SPOT, self.P2, self.P2, self.P2]] + # self.spots = [[j, j, j] for j in + # [self.P1, self.P1, self.EMPTY_SPOT, self.EMPTY_SPOT, self.P2, self.P2]] else: self.spots = old_spots - def reset_board(self): """ Resets the current configuration of the game board to the original starting position. """ self.spots = Board().spots - - + def empty_board(self): """ Removes any pieces currently on the board and leaves the board with nothing but empty spots. """ - self.spots = [[j, j, j, j] for j in [self.EMPTY_SPOT] * self.HEIGHT] # Make sure [self.EMPTY_SPOT]*self.HEIGHT] has no issues - - + self.spots = [[j, j, j, j] for j in [self.EMPTY_SPOT] * self.HEIGHT] + def is_game_over(self): """ Finds out and returns weather the game currently being played is over or @@ -65,7 +67,6 @@ def is_game_over(self): return True return False - def not_spot(self, loc): """ Finds out of the spot at the given location is an actual spot on the game board. @@ -73,8 +74,7 @@ def not_spot(self, loc): if len(loc) == 0 or loc[0] < 0 or loc[0] > self.HEIGHT - 1 or loc[1] < 0 or loc[1] > self.WIDTH - 1: return True return False - - + def get_spot_info(self, loc): """ Gets the information about the spot at the given location. @@ -83,8 +83,7 @@ def get_spot_info(self, loc): Might want to not use this for the sake of computational time. """ return self.spots[loc[0]][loc[1]] - - + def forward_n_locations(self, start_loc, n, backwards=False): """ Gets the locations possible for moving a piece from a given location diagonally @@ -95,14 +94,15 @@ def forward_n_locations(self, start_loc, n, backwards=False): temp2 = 0 elif start_loc[0] % 2 == 0: temp1 = 0 - temp2 = 1 + temp2 = 1 else: temp1 = 1 temp2 = 0 - answer = [[start_loc[0], start_loc[1] + math.floor(n / 2) + temp1], [start_loc[0], start_loc[1] - math.floor(n / 2) - temp2]] + answer = [[start_loc[0], start_loc[1] + math.floor(n / 2) + temp1], + [start_loc[0], start_loc[1] - math.floor(n / 2) - temp2]] - if backwards: + if backwards: answer[0][0] = answer[0][0] - n answer[1][0] = answer[1][0] - n else: @@ -113,9 +113,8 @@ def forward_n_locations(self, start_loc, n, backwards=False): answer[0] = [] if self.not_spot(answer[1]): answer[1] = [] - + return answer - def get_simple_moves(self, start_loc): """ @@ -131,7 +130,6 @@ def get_simple_moves(self, start_loc): next_locations = self.forward_n_locations(start_loc, 1, True) # Switched the true from the statement below else: next_locations = self.forward_n_locations(start_loc, 1) - possible_next_locations = [] @@ -139,19 +137,18 @@ def get_simple_moves(self, start_loc): if len(location) != 0: if self.spots[location[0]][location[1]] == self.EMPTY_SPOT: possible_next_locations.append(location) - - return [[start_loc, end_spot] for end_spot in possible_next_locations] - - + + return [[start_loc, end_spot] for end_spot in possible_next_locations] + def get_capture_moves(self, start_loc, move_beginnings=None): """ Recursively get all of the possible moves for a piece which involve capturing an opponent's piece. """ if move_beginnings is None: move_beginnings = [start_loc] - + answer = [] - if self.spots[start_loc[0]][start_loc[1]] > 2: + if self.spots[start_loc[0]][start_loc[1]] > 2: next1 = self.forward_n_locations(start_loc, 1) next2 = self.forward_n_locations(start_loc, 2) next1.extend(self.forward_n_locations(start_loc, 1, True)) @@ -162,54 +159,73 @@ def get_capture_moves(self, start_loc, move_beginnings=None): else: next1 = self.forward_n_locations(start_loc, 1) next2 = self.forward_n_locations(start_loc, 2) - - + for j in range(len(next1)): - if (not self.not_spot(next2[j])) and (not self.not_spot(next1[j])) : # if both spots exist - if self.get_spot_info(next1[j]) != self.EMPTY_SPOT and self.get_spot_info(next1[j]) % 2 != self.get_spot_info(start_loc) % 2: # if next spot is opponent + if (not self.not_spot(next2[j])) and (not self.not_spot(next1[j])): # if both spots exist + if self.get_spot_info(next1[j]) != self.EMPTY_SPOT and self.get_spot_info( + next1[j]) % 2 != self.get_spot_info(start_loc) % 2: # if next spot is opponent if self.get_spot_info(next2[j]) == self.EMPTY_SPOT: # if next next spot is empty temp_move1 = copy.deepcopy(move_beginnings) temp_move1.append(next2[j]) - + answer_length = len(answer) - - if self.get_spot_info(start_loc) != self.P1 or next2[j][0] != self.HEIGHT - 1: - if self.get_spot_info(start_loc) != self.P2 or next2[j][0] != 0: + if self.get_spot_info(start_loc) != self.P1 or next2[j][0] != self.HEIGHT - 1: + if self.get_spot_info(start_loc) != self.P2 or next2[j][0] != 0: temp_move2 = [start_loc, next2[j]] - + temp_board = Board(copy.deepcopy(self.spots), self.player_turn) temp_board.make_move(temp_move2, False) answer.extend(temp_board.get_capture_moves(temp_move2[1], temp_move1)) - + if len(answer) == answer_length: answer.append(temp_move1) - + return answer - - - def get_possible_next_moves(self): + + def get_piece_locations(self): + piece_locations = [] + + for j in range(self.HEIGHT): + for i in range(self.WIDTH): + if (self.player_turn is True and (self.spots[j][i] == self.P1 or self.spots[j][i] == self.P1_K)) or \ + (self.player_turn is False and (self.spots[j][i] == self.P2 or self.spots[j][i] == self.P2_K)): + piece_locations.append([j, i]) + + return piece_locations + + def get_possible_next_moves_for_a_piece(self, piece_location): + capture_moves = self.get_capture_moves(piece_location) + + if len(capture_moves) != 0: + return capture_moves + + return self.get_simple_moves(piece_location) + + def get_possible_next_moves(self, spots=None): """ Gets the possible moves that can be made from the current board configuration. """ + if spots is None: + spots = self.spots + piece_locations = [] for j in range(self.HEIGHT): for i in range(self.WIDTH): - if (self.player_turn == True and (self.spots[j][i] == self.P1 or self.spots[j][i] == self.P1_K)) or (self.player_turn == False and (self.spots[j][i] == self.P2 or self.spots[j][i] == self.P2_K)): + if (self.player_turn is True and (spots[j][i] == self.P1 or spots[j][i] == self.P1_K)) or (self.player_turn is False and (spots[j][i] == self.P2 or spots[j][i] == self.P2_K)): piece_locations.append([j, i]) - - try: #Should check to make sure if this try statement is still necessary - capture_moves = list(reduce(lambda a, b: a + b, list(map(self.get_capture_moves, piece_locations)))) # CHECK IF OUTER LIST IS NECESSARY + + try: # Should check to make sure if this try statement is still necessary + capture_moves = list(reduce(lambda a, b: a + b, list(map(self.get_capture_moves, piece_locations)))) if len(capture_moves) != 0: return capture_moves - return list(reduce(lambda a, b: a + b, list(map(self.get_simple_moves, piece_locations)))) # CHECK IF OUTER LIST IS NECESSARY + return list(reduce(lambda a, b: a + b, list(map(self.get_simple_moves, piece_locations)))) except TypeError: return [] - - + def make_move(self, move, switch_player_turn=True): """ Makes a given move on the board, and (as long as is wanted) switches the indicator for @@ -227,10 +243,9 @@ def make_move(self, move, switch_player_turn=True): middle_y = move[j + 1][1] else: middle_y = move[j][1] - + self.spots[int((move[j][0] + move[j + 1][0]) / 2)][middle_y] = self.EMPTY_SPOT - - + self.spots[move[len(move) - 1][0]][move[len(move) - 1][1]] = self.spots[move[0][0]][move[0][1]] if move[len(move) - 1][0] == self.HEIGHT - 1 and self.spots[move[len(move) - 1][0]][move[len(move) - 1][1]] == self.P1: self.spots[move[len(move) - 1][0]][move[len(move) - 1][1]] = self.P1_K @@ -239,10 +254,9 @@ def make_move(self, move, switch_player_turn=True): else: self.spots[move[len(move) - 1][0]][move[len(move) - 1][1]] = self.spots[move[0][0]][move[0][1]] self.spots[move[0][0]][move[0][1]] = self.EMPTY_SPOT - + if switch_player_turn: self.player_turn = not self.player_turn - def get_potential_spots_from_moves(self, moves): """ @@ -255,11 +269,47 @@ def get_potential_spots_from_moves(self, moves): for move in moves: original_spots = copy.deepcopy(self.spots) self.make_move(move, switch_player_turn=False) - answer.append(self.spots) - self.spots = original_spots + answer.append(self.spots) + self.spots = original_spots return answer - - + + def get_states_from_boards_spots(self, boards_spots, player_id=None): + """ + Gets an array of tuples from the given set of board spots, + each tuple representing the characteristics which define the + state the board is in. + + Format of returned data: + [(own_pieces, opp_pieces, own_kings, opp_kings, own_edges, own_vert_center_mass, opp_vert_center_mass), ...] + """ + piece_counters = [[0, 0, 0, 0, 0, 0, 0] for j in range(len(boards_spots))] + for k in range(len(boards_spots)): + for j in range(len(boards_spots[k])): + for i in range(len(boards_spots[k][j])): + if boards_spots[k][j][i] != 0: + piece_counters[k][boards_spots[k][j][i] - 1] = piece_counters[k][boards_spots[k][j][i] - 1] + 1 + if (player_id and (boards_spots[k][j][i] == 1 or boards_spots[k][j][i] == 3)) or \ + (not player_id and (boards_spots[k][j][i] == 2 or boards_spots[k][j][i] == 4)): + if i == 0 and j % 2 == 0: + piece_counters[k][4] = piece_counters[k][4] + 1 + elif i == 3 and j % 2 == 1: + piece_counters[k][4] = piece_counters[k][4] + 1 + + piece_counters[k][5] = piece_counters[k][5] + j + else: + piece_counters[k][6] = piece_counters[k][6] + j + + if piece_counters[k][0] + piece_counters[k][2] != 0: + piece_counters[k][5] = int(piece_counters[k][5] / (piece_counters[k][0] + piece_counters[k][2])) + else: + piece_counters[k][5] = 0 + if piece_counters[k][1] + piece_counters[k][3] != 0: + piece_counters[k][6] = int(piece_counters[k][6] / (piece_counters[k][1] + piece_counters[k][3])) + else: + piece_counters[k][6] = 0 + + return [tuple(counter) for counter in piece_counters] + def insert_pieces(self, pieces_info): """ Inserts a set of pieces onto a board. @@ -268,8 +318,7 @@ def insert_pieces(self, pieces_info): """ for piece_info in pieces_info: self.spots[piece_info[0]][piece_info[1]] = piece_info[2] - - + def get_symbol(self, location): """ Gets the symbol for what should be at a board location. @@ -284,8 +333,7 @@ def get_symbol(self, location): return "O" else: return "X" - - + def print_board(self): """ Prints a string representation of the current game board. @@ -302,7 +350,32 @@ def print_board(self): if i != 3 or j % 2 != 1: # should figure out if this 3 should be changed to self.WIDTH-1 temp_line = temp_line + "///|" print(temp_line) - print(norm_line) + print(norm_line) + def get_number_of_pieces_and_kings(self, spots, player_id=None): + """ + Gets the number of pieces and the number of kings that each player has on the current + board configuration represented in the given spots. The format of the function with defaults is: + [P1_pieces, P2_pieces, P1_kings, P2_kings] + and if given a player_id: + [player_pieces, player_kings] + """ + piece_counter = [0, 0, 0, 0] + for row in spots: + for element in row: + if element != 0: + piece_counter[element - 1] = piece_counter[element - 1] + 1 + + if player_id == True: + return [piece_counter[0], piece_counter[2]] + elif player_id == False: + return [piece_counter[1], piece_counter[3]] + else: + return piece_counter + def set_spots(self, spots): + if spots is not None: + self.spots = copy.deepcopy(spots) + def switch_turn(self): + self.player_turn = not self.player_turn diff --git a/Player.py b/Player.py new file mode 100644 index 0000000..77b0916 --- /dev/null +++ b/Player.py @@ -0,0 +1,36 @@ +''' +Created on Oct 24, 2017 + +@author: Furkan Arslan +''' + +from Board import Board + +class Player: + """ + A class to be inherited by any class representing a checkers player. + This is used so that other functions can be written for more general use, + without worry of crashing (e.g. play_n_games). + + NOTES: + 1) Create set playerID method + """ + + def set_board(self, the_board): + """ + Sets the Board object which is known by the AI. + """ + self.board = the_board + + def game_completed(self): + """ + Should be overridden if AI implementing this class should be notified + of when a game ends, before the board is wiped. + """ + pass + + def get_next_move(self): + """ + Gets the desired next move from the AI. + """ + pass \ No newline at end of file diff --git a/Value_Iteration_AI.py b/Value_Iteration_AI.py new file mode 100644 index 0000000..3ff5d21 --- /dev/null +++ b/Value_Iteration_AI.py @@ -0,0 +1,142 @@ +from Player import Player +from Board import Board +import numpy as np +import IPython.core.debugger +dbg = IPython.core.debugger.Pdb() + + +class Value_Iteration_AI(Player): + LOSING_STATES = -100 + WINNING_STATES = 100 + + def __init__(self, opponent, player_id=1, discount_factor=0.5, board=None): + self.player_id = player_id + self.discount_factor = discount_factor + self.states = [] + self.value_function = [] + self.policy = [] + self.opponent = opponent + self.board = board + + if self.board is None: + self.board = Board() + + self.value_iteration() + + def reward_function(self, state_info1, state_info2): + if self.board.is_game_over(): + if state_info2[1] == 0 and state_info2[3] == 0: # winning state + return 100 + elif state_info2[0] == 0 and state_info2[2] == 0: # losing state + return -100 + else: + return 0 # draw state + else: + # if my player eats a opponent's piece, gain 5. if my player eats a opponent's king, gain 10 + gained_reward = 5*(state_info2[0] - state_info1[0]) + 10 * (state_info2[2] - state_info1[2]) + # if opponent eats my piece, punish -5. if opponent eats my king, punish -10 + lost_reward = 5*(state_info2[1] - state_info1[1]) + 10 * (state_info2[3] - state_info1[3]) + + return gained_reward - lost_reward + + def get_reward(self, current_spots, next_spots): + current_status = self.board.get_states_from_boards_spots([current_spots]) + next_status = self.board.get_states_from_boards_spots([next_spots]) + + return self.reward_function(current_status[0], next_status[0]) + + def get_transition_probabilities(self, actions, opponent_action): + # the probability of taking the action is calculated by 1 / (number of actions x number of opponent actions) + return 1 / (len(actions) * len(opponent_action)) + + def get_value(self, state): + try: # if the state has already observed, find state's index and state's value and return them + index = self.states.index(state) + + return self.value_function[index], index + except ValueError: # if the state has not been observed yet, create a new state and add it to states array + self.states.append(state) + self.value_function.append(0) + index = len(self.value_function) - 1 + + return 0, index + + def calculate_value_of_action(self, state, possible_moves, opponent_moves): + next_state = self.board.spots # determine next state + next_state_value = self.get_value(next_state)[0] # obtain value of next state. If the state is not in the states array, this function creates the state and adds to the array + reward = self.get_reward(state, next_state) + prob = self.get_transition_probabilities(possible_moves, opponent_moves) + + return prob * (reward + self.discount_factor * next_state_value) + + def calculate_expected_value(self, state): + if self.board.is_game_over(): + return [self.LOSING_STATES] + + possible_moves = self.board.get_possible_next_moves() + expected_value = np.zeros(len(possible_moves)) + + for i in range(len(possible_moves)): + move = possible_moves[i] + self.board.set_spots(state) # recover board to state condition + + self.board.make_move(move) # make my move + opponent_moves = [self.opponent.get_next_move()] # determine possible opponent's moves + + if self.board.is_game_over(): + expected_value[i] = self.calculate_value_of_action(state, possible_moves, opponent_moves) + self.board.switch_turn() + + continue + + for opp_move in opponent_moves: # maybe there can be more than one opponent moves + self.board.make_move(opp_move) # make opponent move to obtain next state + expected_value[i] += self.calculate_value_of_action(state, possible_moves, opponent_moves) + + return expected_value + + def value_iteration(self, theta=0.0001): + self.states.append(self.board.spots) + self.value_function.append(0) + + while True: + delta = 0 + + for state in self.states: + self.board.set_spots(state) # make the board look like same as the state + + v, index = self.get_value(state) + + expected_value = self.calculate_expected_value(state) + + self.value_function[index] = np.max(expected_value) + + delta = max(delta, np.abs(v - self.value_function[index])) + + if delta < theta: + break + + self.board.reset_board() + self.calculate_policy() + + def calculate_policy(self): + for state in self.states: + expected_value = self.calculate_expected_value(state) # get values of actions + self.policy[state] = [0 for i in expected_value] # init policy's values for this state + + best_action = np.argmax(expected_value) # find best action in this state + self.policy[state, best_action] = 1.0 # assign best action to 1 + + def game_completed(self): + pass + + def get_next_move(self): + """ + Gets the desired next move from the AI. + """ + current_state = self.board.spots # determine current state + determine_policies = np.array(self.policy[current_state]) # obtain policy array for current state + possible_actions = self.board.get_possible_next_moves() # obtain available actions + + return possible_actions[determine_policies == 1] # return selected action whose value is 1 + diff --git a/homework-1 report.pdf b/homework-1 report.pdf new file mode 100644 index 0000000..31d00a0 Binary files /dev/null and b/homework-1 report.pdf differ diff --git a/homework-1.pdf b/homework-1.pdf new file mode 100644 index 0000000..e156c86 Binary files /dev/null and b/homework-1.pdf differ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..db4b00e --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +player.py +board.py (my version. I have made some changes) + +- The value iteration function start with init function. So initialization of Value_Iteration_AI can take too long. +- The only required argument of Value_Iteration_AI is opponent model. +- After initialization, no need to call value iteration function again \ No newline at end of file