반응형

이제 학습 데이터를 생성하도록 한다.

플레이어는 랜덤, AI는 minimax 알고리즘을 이용하여 수를 두며

학습 데이터는 AI가 이기거나 비긴 데이터만 저장하도록 하였다.

# Tic Tac Toe (3/4)
# Created by netcanis on 2023/09/09.
#
# Minimax
# Alpha–beta pruning
# generate training data, csv파일 저장.
# 머신러닝, h5파일 저장.


import os
import tkinter as tk
import random
import numpy as np
import pandas as pd
import tensorflow as tf
from keras.models import Sequential, load_model

NUM_ITEMS = 9
PLAYER = 1
AI = -1

PLAYER_MODE = "RANDOM"
AI_MODE = "MINIMAX"

NUM_EPISODES = 50000
CSV_FILE_NAME = "ttt_training_data.csv"
H5_FILE_NAME = "ttt_model.h5"

class TTT:
    def __init__(self):
        self.window = tk.Tk()
        self.window.title("TTT")

        self.episode = 0

        self.init_ML()
        self.learning()

    def init_game(self):
        self.board = [[0 for _ in range(3)] for _ in range(3)]
        self.sequence = 0
        self.game_over = False
        self.turn_player = random.choice([PLAYER, AI])

    def find_empty_cells(self):
        empty_cells = []
        for row in range(3):
            for col in range(3):
                if self.board[row][col] == 0:
                    empty_cells.append((row, col))
        return empty_cells

    def check_winner(self, board, player):
        for row in board:
            if all(cell == player for cell in row):
                return True
        for col in range(3):
            if all(board[row][col] == player for row in range(3)):
                return True
        if all(board[i][i] == player for i in range(3)) or all(board[i][2 - i] == player for i in range(3)):
            return True
        return False

    def is_board_full(self, board):
        return all(cell != 0 for row in board for cell in row)

    def random_move(self, player):
        if self.game_over:
            return -1, -1
        row, col = random.choice(self.find_empty_cells())
        self.make_move(row, col, player)
        return row, col

    def minimax_move(self, player):
        if self.game_over:
            return -1, -1
        row, col = self.find_best_move(player)
        self.make_move(row, col, player)
        return row, col

    def make_move(self, row, col, player):
        if self.board[row][col] == 0:
            self.board[row][col] = player

            self.sequence += 1

            if self.check_winner(self.board, player):
                self.game_over = True
                print(f"Game Over! {'Player' if player == PLAYER else 'AI'} wins!")
            elif self.is_board_full(self.board):
                self.game_over = True
                self.turn_player = 0
                print("Game draw!")
            else:
                self.turn_player *= -1

    def find_best_move(self, player):
        if self.sequence <= 1:
            return random.choice(self.find_empty_cells())

        alpha = -float('inf')
        beta = float('inf')

        best_move = None
        if player == AI:
            best_score = -float('inf')
        else:
            best_score = float('inf')

        for row, col in self.find_empty_cells():
            self.board[row][col] = player
            if player == AI:
                score = self.minimax(0, False, alpha, beta)
            else:
                score = self.minimax(0, True, alpha, beta)
            self.board[row][col] = 0

            if (player == AI and score > best_score) or \
                    (player == PLAYER and score < best_score):
                best_score = score
                best_move = (row, col)

        return best_move

    def minimax(self, depth, is_maximizing, alpha, beta):
        if self.check_winner(self.board, AI):
            return (NUM_ITEMS + 1 - depth)

        if self.check_winner(self.board, PLAYER):
            return -(NUM_ITEMS + 1 - depth)

        if self.is_board_full(self.board):
            return 0

        if is_maximizing:
            best_score = -float('inf')
            for row, col in self.find_empty_cells():
                self.board[row][col] = AI
                score = self.minimax(depth + 1, False, alpha, beta)
                self.board[row][col] = 0
                best_score = max(best_score, score)
                alpha = max(alpha, best_score)
                if beta <= alpha:
                    break
            return best_score
        else:
            best_score = float('inf')
            for row, col in self.find_empty_cells():
                self.board[row][col] = PLAYER
                score = self.minimax(depth + 1, True, alpha, beta)
                self.board[row][col] = 0
                best_score = min(best_score, score)
                beta = min(beta, best_score)
                if beta <= alpha:
                    break
            return best_score

    def updateBoardUI(self, row, col, player):
        pass

    def save_data_to_csv(self, x_data, y_data, file_name):
        x_data_flat = [x.flatten() for x in x_data]
        y_data_flat = [y.flatten() for y in y_data]

        data = {'x_data': x_data_flat, 'y_data': y_data_flat}
        df = pd.DataFrame(data)

        df.to_csv(file_name, index=False)

    def load_data_from_csv(self, file_name):
        df = pd.read_csv(file_name)

        x_data_flat = df['x_data'].apply(lambda x: np.fromstring(x[1:-1], sep=' '))
        y_data_flat = df['y_data'].apply(lambda x: np.fromstring(x[1:-1], sep=' '))

        x_data = np.array(x_data_flat.to_list())
        y_data = np.array(y_data_flat.to_list())

        return x_data, y_data

    def generate_training_data(self, num_games):
        x_data = []
        y_data = []

        while self.episode < num_games:
            self.init_game()

            x = []
            y = []
            while True:
                row, col = self.get_next_move(self.turn_player)
                x.append(np.array(self.board).flatten())
                y.append(np.eye(9)[row * 3 + col])

                if self.check_winner(self.board, PLAYER):
                    break
                elif self.check_winner(self.board, AI):
                    break
                elif self.is_board_full(self.board):
                    break

            if self.turn_player != PLAYER:
                del x[-1]
                del y[0]
                x_data.extend(x)
                y_data.extend(y)
                self.episode += 1
                print(f"{self.episode}: {self.turn_player} win.")

        return np.array(x_data), np.array(y_data)

    def get_next_move(self, player):
        if (player == AI and AI_MODE == "MINIMAX") or (player == PLAYER and PLAYER_MODE == "MINIMAX"):
            return self.minimax_move(player)
        else:
            return self.random_move(player)

    def init_ML(self):
        # hidden layer : 27 - 각 위치당 3가지 상태(1,-1,0)이고 총 9개의 자리이므로 3x9=27
        self.model = tf.keras.Sequential([
            tf.keras.layers.Dense(27, activation='relu', input_shape=(9,)),
            tf.keras.layers.Dense(9, activation='softmax')
        ])

        self.model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    def learning(self):
        if os.path.exists(CSV_FILE_NAME) == False:
            x_data, y_data = self.generate_training_data(NUM_EPISODES)

            self.save_data_to_csv(x_data, y_data, CSV_FILE_NAME)
            print(f"{CSV_FILE_NAME} 저장 완료")
        else:
            x_data, y_data = self.load_data_from_csv(CSV_FILE_NAME)

        self.model.fit(x_data, y_data, epochs=100, verbose=1)
        self.model.save(H5_FILE_NAME)

        test_results = self.model.evaluate(x_data, y_data)
        print(f"손실(Loss): {test_results[0]}")
        print(f"정확도(Accuracy): {test_results[1]}")

    def predicts(self, input_data):
        if isinstance(input_data, list):
            input_data = np.array(input_data)

        prediction = self.model.predict(input_data.reshape(1, -1))
        sorted_indices = np.argsort(prediction, axis=-1)[:, ::-1]

        index = 0
        for i in sorted_indices[0]:
            if input_data.shape == (9,):
                if input_data[i] == 0:
                    index = i
                    break
            elif input_data.shape == (3, 3):
                row = i // 3
                col = i % 3
                if input_data[row][col] == 0:
                    index = i
                    break

        #max_value = prediction[0, index]
        return index

    def run(self):
        self.window.mainloop()


if __name__ == "__main__":
    game = TTT()
    game.run()

 

 

2023.09.12 - [AI,ML, Algorithm] - Tic-Tac-Toe 게임 제작 (1/4) - minimax

2023.09.12 - [AI,ML, Algorithm] - Tic-Tac-Toe 게임 제작 (2/4) - alpha–beta pruning

2023.09.12 - [AI,ML, Algorithm] - Tic-Tac-Toe 게임 제작 (3/4) - 머신러닝 훈련 데이터 생성

2023.09.12 - [AI,ML, Algorithm] - Tic-Tac-Toe 게임 제작 (4/4) - 머신러닝을 이용한 게임 구현

 

반응형
블로그 이미지

SKY STORY

,