이제 학습 데이터를 생성하도록 한다.
플레이어는 랜덤, AI는 minimax 알고리즘을 이용하여 수를 두며
학습 데이터는 AI가 이기거나 비긴 데이터만 저장하도록 하였다.
# Tic Tac Toe (3/4)
# Created by netcanis on 2023/09/09.
#
# Minimax
# Alpha–beta pruning
# generate training data, csv파일 저장.
# 머신러닝, h5파일 저장.
import os
import tkinter as tk
import random
import numpy as np
import pandas as pd
import tensorflow as tf
from keras.models import Sequential, load_model
NUM_ITEMS = 9
PLAYER = 1
AI = -1
PLAYER_MODE = "RANDOM"
AI_MODE = "MINIMAX"
NUM_EPISODES = 50000
CSV_FILE_NAME = "ttt_training_data.csv"
H5_FILE_NAME = "ttt_model.h5"
class TTT:
def __init__(self):
self.window = tk.Tk()
self.window.title("TTT")
self.episode = 0
self.init_ML()
self.learning()
def init_game(self):
self.board = [[0 for _ in range(3)] for _ in range(3)]
self.sequence = 0
self.game_over = False
self.turn_player = random.choice([PLAYER, AI])
def find_empty_cells(self):
empty_cells = []
for row in range(3):
for col in range(3):
if self.board[row][col] == 0:
empty_cells.append((row, col))
return empty_cells
def check_winner(self, board, player):
for row in board:
if all(cell == player for cell in row):
return True
for col in range(3):
if all(board[row][col] == player for row in range(3)):
return True
if all(board[i][i] == player for i in range(3)) or all(board[i][2 - i] == player for i in range(3)):
return True
return False
def is_board_full(self, board):
return all(cell != 0 for row in board for cell in row)
def random_move(self, player):
if self.game_over:
return -1, -1
row, col = random.choice(self.find_empty_cells())
self.make_move(row, col, player)
return row, col
def minimax_move(self, player):
if self.game_over:
return -1, -1
row, col = self.find_best_move(player)
self.make_move(row, col, player)
return row, col
def make_move(self, row, col, player):
if self.board[row][col] == 0:
self.board[row][col] = player
self.sequence += 1
if self.check_winner(self.board, player):
self.game_over = True
print(f"Game Over! {'Player' if player == PLAYER else 'AI'} wins!")
elif self.is_board_full(self.board):
self.game_over = True
self.turn_player = 0
print("Game draw!")
else:
self.turn_player *= -1
def find_best_move(self, player):
if self.sequence <= 1:
return random.choice(self.find_empty_cells())
alpha = -float('inf')
beta = float('inf')
best_move = None
if player == AI:
best_score = -float('inf')
else:
best_score = float('inf')
for row, col in self.find_empty_cells():
self.board[row][col] = player
if player == AI:
score = self.minimax(0, False, alpha, beta)
else:
score = self.minimax(0, True, alpha, beta)
self.board[row][col] = 0
if (player == AI and score > best_score) or \
(player == PLAYER and score < best_score):
best_score = score
best_move = (row, col)
return best_move
def minimax(self, depth, is_maximizing, alpha, beta):
if self.check_winner(self.board, AI):
return (NUM_ITEMS + 1 - depth)
if self.check_winner(self.board, PLAYER):
return -(NUM_ITEMS + 1 - depth)
if self.is_board_full(self.board):
return 0
if is_maximizing:
best_score = -float('inf')
for row, col in self.find_empty_cells():
self.board[row][col] = AI
score = self.minimax(depth + 1, False, alpha, beta)
self.board[row][col] = 0
best_score = max(best_score, score)
alpha = max(alpha, best_score)
if beta <= alpha:
break
return best_score
else:
best_score = float('inf')
for row, col in self.find_empty_cells():
self.board[row][col] = PLAYER
score = self.minimax(depth + 1, True, alpha, beta)
self.board[row][col] = 0
best_score = min(best_score, score)
beta = min(beta, best_score)
if beta <= alpha:
break
return best_score
def updateBoardUI(self, row, col, player):
pass
def save_data_to_csv(self, x_data, y_data, file_name):
x_data_flat = [x.flatten() for x in x_data]
y_data_flat = [y.flatten() for y in y_data]
data = {'x_data': x_data_flat, 'y_data': y_data_flat}
df = pd.DataFrame(data)
df.to_csv(file_name, index=False)
def load_data_from_csv(self, file_name):
df = pd.read_csv(file_name)
x_data_flat = df['x_data'].apply(lambda x: np.fromstring(x[1:-1], sep=' '))
y_data_flat = df['y_data'].apply(lambda x: np.fromstring(x[1:-1], sep=' '))
x_data = np.array(x_data_flat.to_list())
y_data = np.array(y_data_flat.to_list())
return x_data, y_data
def generate_training_data(self, num_games):
x_data = []
y_data = []
while self.episode < num_games:
self.init_game()
x = []
y = []
while True:
row, col = self.get_next_move(self.turn_player)
x.append(np.array(self.board).flatten())
y.append(np.eye(9)[row * 3 + col])
if self.check_winner(self.board, PLAYER):
break
elif self.check_winner(self.board, AI):
break
elif self.is_board_full(self.board):
break
if self.turn_player != PLAYER:
del x[-1]
del y[0]
x_data.extend(x)
y_data.extend(y)
self.episode += 1
print(f"{self.episode}: {self.turn_player} win.")
return np.array(x_data), np.array(y_data)
def get_next_move(self, player):
if (player == AI and AI_MODE == "MINIMAX") or (player == PLAYER and PLAYER_MODE == "MINIMAX"):
return self.minimax_move(player)
else:
return self.random_move(player)
def init_ML(self):
# hidden layer : 27 - 각 위치당 3가지 상태(1,-1,0)이고 총 9개의 자리이므로 3x9=27
self.model = tf.keras.Sequential([
tf.keras.layers.Dense(27, activation='relu', input_shape=(9,)),
tf.keras.layers.Dense(9, activation='softmax')
])
self.model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
def learning(self):
if os.path.exists(CSV_FILE_NAME) == False:
x_data, y_data = self.generate_training_data(NUM_EPISODES)
self.save_data_to_csv(x_data, y_data, CSV_FILE_NAME)
print(f"{CSV_FILE_NAME} 저장 완료")
else:
x_data, y_data = self.load_data_from_csv(CSV_FILE_NAME)
self.model.fit(x_data, y_data, epochs=100, verbose=1)
self.model.save(H5_FILE_NAME)
test_results = self.model.evaluate(x_data, y_data)
print(f"손실(Loss): {test_results[0]}")
print(f"정확도(Accuracy): {test_results[1]}")
def predicts(self, input_data):
if isinstance(input_data, list):
input_data = np.array(input_data)
prediction = self.model.predict(input_data.reshape(1, -1))
sorted_indices = np.argsort(prediction, axis=-1)[:, ::-1]
index = 0
for i in sorted_indices[0]:
if input_data.shape == (9,):
if input_data[i] == 0:
index = i
break
elif input_data.shape == (3, 3):
row = i // 3
col = i % 3
if input_data[row][col] == 0:
index = i
break
#max_value = prediction[0, index]
return index
def run(self):
self.window.mainloop()
if __name__ == "__main__":
game = TTT()
game.run()
2023.09.12 - [AI,ML, Algorithm] - Tic-Tac-Toe 게임 제작 (1/4) - minimax
2023.09.12 - [AI,ML, Algorithm] - Tic-Tac-Toe 게임 제작 (2/4) - alpha–beta pruning
2023.09.12 - [AI,ML, Algorithm] - Tic-Tac-Toe 게임 제작 (3/4) - 머신러닝 훈련 데이터 생성
2023.09.12 - [AI,ML, Algorithm] - Tic-Tac-Toe 게임 제작 (4/4) - 머신러닝을 이용한 게임 구현
'개발 > AI,ML,ALGORITHM' 카테고리의 다른 글
Gomoku(Five in a Row, Omok) (1/5) - 기본 구현 (minimax, alpha-beta pruning) (0) | 2023.10.27 |
---|---|
Tic-Tac-Toe 게임 제작 (4/4) - 머신러닝을 이용한 게임 구현 (0) | 2023.09.12 |
Tic-Tac-Toe 게임 제작 (2/4) - alpha–beta pruning (0) | 2023.09.12 |
Tic-Tac-Toe 게임 제작 (1/4) - minimax (0) | 2023.09.12 |
Simple Neural Network XOR (0) | 2023.08.29 |