diff --git a/NLP.ipynb b/NLP.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..fc53a3a0de9b70584f5d2c3d3e9ea5f84838b686
--- /dev/null
+++ b/NLP.ipynb
@@ -0,0 +1,912 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "NLP.ipynb",
+      "provenance": [],
+      "collapsed_sections": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "LoImN07UxAn2"
+      },
+      "source": [
+        "import pandas as pd\r\n",
+        "import numpy as np\r\n",
+        "%matplotlib inline\r\n",
+        "import matplotlib.pyplot as plt\r\n",
+        "import re\r\n",
+        "import urllib.request\r\n",
+        "from tensorflow.keras.preprocessing.text import Tokenizer\r\n",
+        "from tensorflow.keras.preprocessing.sequence import pad_sequences"
+      ],
+      "execution_count": 3,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Yf1ceqLwz_72"
+      },
+      "source": [
+        "urllib.request.urlretrieve(\"https://raw.githubusercontent.com/e9t/nsmc/master/ratings_train.txt\", filename=\"ratings_train.txt\")\r\n",
+        "urllib.request.urlretrieve(\"https://raw.githubusercontent.com/e9t/nsmc/master/ratings_test.txt\", filename=\"ratings_test.txt\")\r\n",
+        "\r\n",
+        "train_data = pd.read_table('ratings_train.txt')\r\n",
+        "test_data = pd.read_table('ratings_test.txt')"
+      ],
+      "execution_count": 4,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "PfZhNCB10AWy",
+        "outputId": "ec183025-6db0-470e-b85b-9f3b22c118e0"
+      },
+      "source": [
+        "print('훈련용 리뷰 개수 :',len(train_data)) # 훈련용 리뷰 개수 출력\r\n",
+        "print('테스트용 리뷰 개수 :',len(test_data)) # 테스트용 리뷰 개수 출력"
+      ],
+      "execution_count": 5,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "훈련용 리뷰 개수 : 150000\n",
+            "테스트용 리뷰 개수 : 50000\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "dFki1IvRY8ob"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 221
+        },
+        "id": "lfEqvka20D7v",
+        "outputId": "04f01fb2-5b85-4443-ae8b-803c210daf65"
+      },
+      "source": [
+        "train_data[:5] # 상위 5개 출력"
+      ],
+      "execution_count": 20,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>id</th>\n",
+              "      <th>document</th>\n",
+              "      <th>label</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>9976970</td>\n",
+              "      <td>아 더빙.. 진짜 짜증나네요 목소리</td>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>3819312</td>\n",
+              "      <td>흠...포스터보고 초딩영화줄....오버연기조차 가볍지 않구나</td>\n",
+              "      <td>1</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>10265843</td>\n",
+              "      <td>너무재밓었다그래서보는것을추천한다</td>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>9045019</td>\n",
+              "      <td>교도소 이야기구먼 ..솔직히 재미는 없다..평점 조정</td>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>6483659</td>\n",
+              "      <td>사이몬페그의 익살스런 연기가 돋보였던 영화!스파이더맨에서 늙어보이기만 했던 커스틴 ...</td>\n",
+              "      <td>1</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "         id                                           document  label\n",
+              "0   9976970                                아 더빙.. 진짜 짜증나네요 목소리      0\n",
+              "1   3819312                  흠...포스터보고 초딩영화줄....오버연기조차 가볍지 않구나      1\n",
+              "2  10265843                                  너무재밓었다그래서보는것을추천한다      0\n",
+              "3   9045019                      교도소 이야기구먼 ..솔직히 재미는 없다..평점 조정      0\n",
+              "4   6483659  사이몬페그의 익살스런 연기가 돋보였던 영화!스파이더맨에서 늙어보이기만 했던 커스틴 ...      1"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 20
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "0_QgSxri4rbN",
+        "outputId": "4bcdc6db-b9a1-4034-8919-ca25654f7c27"
+      },
+      "source": [
+        "train_data['document'].nunique(), train_data['label'].nunique()"
+      ],
+      "execution_count": 22,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "(146182, 2)"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 22
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "MPWimSwD4u5l"
+      },
+      "source": [
+        "train_data.drop_duplicates(subset=['document'], inplace=True)"
+      ],
+      "execution_count": 23,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "fOpiWx7N0RU0"
+      },
+      "source": [
+        "print(test_data.isnull().values.any())"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "YEbx4OUP0qNE"
+      },
+      "source": [
+        "train_data = train_data.dropna(how = 'any')"
+      ],
+      "execution_count": 100,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 221
+        },
+        "id": "vk4HP6lK0Hzx",
+        "outputId": "90be32a8-ecd3-4c6d-bc99-92d65431a7dc"
+      },
+      "source": [
+        "train_data['document'] = train_data['document'].str.replace(\"[^ㄱ-ㅎㅏ-ㅣ가-힣 ]\",\"\")\r\n",
+        "# 한글과 공백을 제외하고 모두 제거\r\n",
+        "train_data[:5]"
+      ],
+      "execution_count": 24,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>id</th>\n",
+              "      <th>document</th>\n",
+              "      <th>label</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>9976970</td>\n",
+              "      <td>아 더빙 진짜 짜증나네요 목소리</td>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>3819312</td>\n",
+              "      <td>흠포스터보고 초딩영화줄오버연기조차 가볍지 않구나</td>\n",
+              "      <td>1</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>10265843</td>\n",
+              "      <td>너무재밓었다그래서보는것을추천한다</td>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>9045019</td>\n",
+              "      <td>교도소 이야기구먼 솔직히 재미는 없다평점 조정</td>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>6483659</td>\n",
+              "      <td>사이몬페그의 익살스런 연기가 돋보였던 영화스파이더맨에서 늙어보이기만 했던 커스틴 던...</td>\n",
+              "      <td>1</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "         id                                           document  label\n",
+              "0   9976970                                  아 더빙 진짜 짜증나네요 목소리      0\n",
+              "1   3819312                         흠포스터보고 초딩영화줄오버연기조차 가볍지 않구나      1\n",
+              "2  10265843                                  너무재밓었다그래서보는것을추천한다      0\n",
+              "3   9045019                          교도소 이야기구먼 솔직히 재미는 없다평점 조정      0\n",
+              "4   6483659  사이몬페그의 익살스런 연기가 돋보였던 영화스파이더맨에서 늙어보이기만 했던 커스틴 던...      1"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 24
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "1QdaNPpr4-dL",
+        "outputId": "62e72381-4194-4738-f6c3-f1b79bead3b2"
+      },
+      "source": [
+        "train_data['document'].replace('', np.nan, inplace=True)\r\n",
+        "train_data = train_data.dropna(how = 'any')\r\n",
+        "print('전처리 후 훈련용 샘플의 개수 :',len(train_data))"
+      ],
+      "execution_count": 26,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "145791\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "FlcP0_l85DZY",
+        "outputId": "aeb52df5-b12c-4994-c200-023d2941a03e"
+      },
+      "source": [
+        "test_data.drop_duplicates(subset = ['document'], inplace=True) # document 열에서 중복인 내용이 있다면 중복 제거\r\n",
+        "test_data['document'] = test_data['document'].str.replace(\"[^ㄱ-ㅎㅏ-ㅣ가-힣 ]\",\"\") # 정규 표현식 수행\r\n",
+        "test_data['document'].replace('', np.nan, inplace=True) # 공백은 Null 값으로 변경\r\n",
+        "test_data = test_data.dropna(how='any') # Null 값 제거\r\n",
+        "print('전처리 후 테스트용 샘플의 개수 :',len(test_data))"
+      ],
+      "execution_count": 25,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "전처리 후 테스트용 샘플의 개수 : 48995\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "RYvpBwSM5Fq-",
+        "outputId": "795adda9-9d42-400d-f88c-a97b56fcd691"
+      },
+      "source": [
+        "!pip install konlpy"
+      ],
+      "execution_count": 27,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Collecting konlpy\n",
+            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/85/0e/f385566fec837c0b83f216b2da65db9997b35dd675e107752005b7d392b1/konlpy-0.5.2-py2.py3-none-any.whl (19.4MB)\n",
+            "\u001b[K     |████████████████████████████████| 19.4MB 1.3MB/s \n",
+            "\u001b[?25hCollecting beautifulsoup4==4.6.0\n",
+            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/9e/d4/10f46e5cfac773e22707237bfcd51bbffeaf0a576b0a847ec7ab15bd7ace/beautifulsoup4-4.6.0-py3-none-any.whl (86kB)\n",
+            "\u001b[K     |████████████████████████████████| 92kB 8.1MB/s \n",
+            "\u001b[?25hRequirement already satisfied: numpy>=1.6 in /usr/local/lib/python3.6/dist-packages (from konlpy) (1.19.4)\n",
+            "Collecting JPype1>=0.7.0\n",
+            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/b7/21/9e2c0dbf9df856e6392a1aec1d18006c60b175aa4e31d351e8278a8a63c0/JPype1-1.2.0-cp36-cp36m-manylinux2010_x86_64.whl (453kB)\n",
+            "\u001b[K     |████████████████████████████████| 460kB 42.1MB/s \n",
+            "\u001b[?25hRequirement already satisfied: lxml>=4.1.0 in /usr/local/lib/python3.6/dist-packages (from konlpy) (4.2.6)\n",
+            "Collecting colorama\n",
+            "  Downloading https://files.pythonhosted.org/packages/44/98/5b86278fbbf250d239ae0ecb724f8572af1c91f4a11edf4d36a206189440/colorama-0.4.4-py2.py3-none-any.whl\n",
+            "Collecting tweepy>=3.7.0\n",
+            "  Downloading https://files.pythonhosted.org/packages/bb/7c/99d51f80f3b77b107ebae2634108717362c059a41384a1810d13e2429a81/tweepy-3.9.0-py2.py3-none-any.whl\n",
+            "Requirement already satisfied: typing-extensions; python_version < \"3.8\" in /usr/local/lib/python3.6/dist-packages (from JPype1>=0.7.0->konlpy) (3.7.4.3)\n",
+            "Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.6/dist-packages (from tweepy>=3.7.0->konlpy) (1.15.0)\n",
+            "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from tweepy>=3.7.0->konlpy) (1.3.0)\n",
+            "Requirement already satisfied: requests[socks]>=2.11.1 in /usr/local/lib/python3.6/dist-packages (from tweepy>=3.7.0->konlpy) (2.23.0)\n",
+            "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from requests-oauthlib>=0.7.0->tweepy>=3.7.0->konlpy) (3.1.0)\n",
+            "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests[socks]>=2.11.1->tweepy>=3.7.0->konlpy) (3.0.4)\n",
+            "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests[socks]>=2.11.1->tweepy>=3.7.0->konlpy) (1.24.3)\n",
+            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests[socks]>=2.11.1->tweepy>=3.7.0->konlpy) (2020.12.5)\n",
+            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests[socks]>=2.11.1->tweepy>=3.7.0->konlpy) (2.10)\n",
+            "Requirement already satisfied: PySocks!=1.5.7,>=1.5.6; extra == \"socks\" in /usr/local/lib/python3.6/dist-packages (from requests[socks]>=2.11.1->tweepy>=3.7.0->konlpy) (1.7.1)\n",
+            "Installing collected packages: beautifulsoup4, JPype1, colorama, tweepy, konlpy\n",
+            "  Found existing installation: beautifulsoup4 4.6.3\n",
+            "    Uninstalling beautifulsoup4-4.6.3:\n",
+            "      Successfully uninstalled beautifulsoup4-4.6.3\n",
+            "  Found existing installation: tweepy 3.6.0\n",
+            "    Uninstalling tweepy-3.6.0:\n",
+            "      Successfully uninstalled tweepy-3.6.0\n",
+            "Successfully installed JPype1-1.2.0 beautifulsoup4-4.6.0 colorama-0.4.4 konlpy-0.5.2 tweepy-3.9.0\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "nTUUKfxg5MvF"
+      },
+      "source": [
+        "from konlpy.tag import Okt"
+      ],
+      "execution_count": 28,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "BP51L-LQ5OlN",
+        "outputId": "375dc8c4-413d-4c84-f82c-63a506530d85"
+      },
+      "source": [
+        "okt = Okt()\r\n",
+        "print(train_data['document'][1])\r\n",
+        "okt.morphs(train_data['document'][1], stem = True)"
+      ],
+      "execution_count": 101,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "흠포스터보고 초딩영화줄오버연기조차 가볍지 않구나\n"
+          ],
+          "name": "stdout"
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "['흠', '포스터', '보고', '초딩', '영화', '줄', '오버', '연기', '조차', '가볍다', '않다']"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 101
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "FlmMJ8kD5P9h"
+      },
+      "source": [
+        "stopwords = ['의','가','이','은','들','는','좀','잘','걍','과','도','를','으로','자','에','와','한','하다']\r\n",
+        "\r\n",
+        "X_train = []\r\n",
+        "for sentence in train_data['document']:\r\n",
+        "    temp_X = []\r\n",
+        "    temp_X = okt.morphs(sentence, stem=True) # 토큰화\r\n",
+        "    temp_X = [word for word in temp_X if not word in stopwords] # 불용어 제거a\r\n",
+        "    X_train.append(temp_X)\r\n",
+        "\r\n",
+        "X_test = []\r\n",
+        "for sentence in test_data['document']:\r\n",
+        "    temp_X = []\r\n",
+        "    temp_X = okt.morphs(sentence, stem=True) # 토큰화\r\n",
+        "    temp_X = [word for word in temp_X if not word in stopwords] # 불용어 제거\r\n",
+        "    X_test.append(temp_X)"
+      ],
+      "execution_count": 36,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "fh0UexgqBydS",
+        "outputId": "3463110d-81e1-4d93-b844-08cb3d01dd85"
+      },
+      "source": [
+        "print(X_train[:3])"
+      ],
+      "execution_count": 61,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "[[51, 455, 17, 261, 660], [934, 458, 42, 603, 2, 215, 1450, 25, 962, 676, 20], [387, 2445, 1, 2316, 5672, 3, 223, 10]]\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "waEO9jB7B9ql"
+      },
+      "source": [
+        "tokenizer = Tokenizer()\r\n",
+        "tokenizer.fit_on_texts(X_train)\r\n",
+        "print(tokenizer.word_index)\r\n",
+        "print(len(tokenizer.word_index))"
+      ],
+      "execution_count": 38,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Sgd-Hwq7B_B2",
+        "outputId": "244c8165-9a59-4649-dee8-fa95a6f6ddcc"
+      },
+      "source": [
+        "threshold = 3\r\n",
+        "total_cnt = len(tokenizer.word_index) # 단어의 수\r\n",
+        "rare_cnt = 0 # 등장 빈도수가 threshold보다 작은 단어의 개수를 카운트\r\n",
+        "\r\n",
+        "for key, value in tokenizer.word_counts.items():\r\n",
+        "    if(value < threshold):\r\n",
+        "        rare_cnt = rare_cnt + 1\r\n",
+        "\r\n",
+        "print('단어 집합(vocabulary)의 크기 :',total_cnt)\r\n",
+        "print('등장 빈도가 %s번 이하인 희귀 단어의 수: %s'%(threshold - 1, rare_cnt))"
+      ],
+      "execution_count": 39,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "단어 집합(vocabulary)의 크기 : 43752\n",
+            "등장 빈도가 2번 이하인 희귀 단어의 수: 24337\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "wrQZFEUVCDRa",
+        "outputId": "0835f163-6876-4fa0-f7b8-caf27a386ba1"
+      },
+      "source": [
+        "vocab_size = total_cnt - rare_cnt + 2\r\n",
+        "print('단어 집합의 크기 :',vocab_size)"
+      ],
+      "execution_count": 40,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "단어 집합의 크기 : 19417\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "L2D8IT7xCFf9"
+      },
+      "source": [
+        "tokenizer = Tokenizer(vocab_size, oov_token = 'OOV') \r\n",
+        "tokenizer.fit_on_texts(X_train)\r\n",
+        "X_train = tokenizer.texts_to_sequences(X_train)\r\n",
+        "X_test = tokenizer.texts_to_sequences(X_test)"
+      ],
+      "execution_count": 41,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "-v7C4X6tCILL"
+      },
+      "source": [
+        "max_len = 30\r\n",
+        "X_train = pad_sequences(X_train, maxlen = max_len)\r\n",
+        "X_test = pad_sequences(X_test, maxlen = max_len)"
+      ],
+      "execution_count": 63,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "zojktSc1G-o8"
+      },
+      "source": [
+        "y_train = np.array(train_data['label'])\r\n",
+        "y_test = np.array(test_data['label'])\r\n",
+        "X_train[1]"
+      ],
+      "execution_count": 65,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "nZA9D39RGNkT"
+      },
+      "source": [
+        "from tensorflow.keras.layers import Embedding, Dense, LSTM\r\n",
+        "from tensorflow.keras.models import Sequential\r\n",
+        "from tensorflow.keras.models import load_model\r\n",
+        "from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint"
+      ],
+      "execution_count": 57,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "uVbgjA9EGWAw"
+      },
+      "source": [
+        "model = Sequential()\r\n",
+        "model.add(Embedding(vocab_size, 100))\r\n",
+        "model.add(LSTM(128))\r\n",
+        "model.add(Dense(1, activation='sigmoid'))"
+      ],
+      "execution_count": 58,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "kfN8Z-DGGXco"
+      },
+      "source": [
+        "es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=4)\r\n",
+        "mc = ModelCheckpoint('best_model.h5', monitor='val_acc', mode='max', verbose=1, save_best_only=True)"
+      ],
+      "execution_count": 59,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "iNFXL9s8GY1t",
+        "outputId": "36c4ce96-a1f5-4baa-ee10-88cc9722fede"
+      },
+      "source": [
+        "model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])\r\n",
+        "history = model.fit(X_train, y_train, epochs=15, callbacks=[es, mc], batch_size=60, validation_split=0.2)"
+      ],
+      "execution_count": 69,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Epoch 1/15\n",
+            "1944/1944 [==============================] - 159s 81ms/step - loss: 0.3858 - acc: 0.8256 - val_loss: 0.3499 - val_acc: 0.8468\n",
+            "\n",
+            "Epoch 00001: val_acc improved from -inf to 0.84684, saving model to best_model.h5\n",
+            "Epoch 2/15\n",
+            "1944/1944 [==============================] - 156s 80ms/step - loss: 0.3294 - acc: 0.8567 - val_loss: 0.3359 - val_acc: 0.8548\n",
+            "\n",
+            "Epoch 00002: val_acc improved from 0.84684 to 0.85476, saving model to best_model.h5\n",
+            "Epoch 3/15\n",
+            "1944/1944 [==============================] - 158s 81ms/step - loss: 0.3003 - acc: 0.8716 - val_loss: 0.3314 - val_acc: 0.8572\n",
+            "\n",
+            "Epoch 00003: val_acc improved from 0.85476 to 0.85723, saving model to best_model.h5\n",
+            "Epoch 4/15\n",
+            "1944/1944 [==============================] - 155s 80ms/step - loss: 0.2806 - acc: 0.8821 - val_loss: 0.3319 - val_acc: 0.8554\n",
+            "\n",
+            "Epoch 00004: val_acc did not improve from 0.85723\n",
+            "Epoch 5/15\n",
+            "1944/1944 [==============================] - 153s 79ms/step - loss: 0.2611 - acc: 0.8928 - val_loss: 0.3367 - val_acc: 0.8603\n",
+            "\n",
+            "Epoch 00005: val_acc improved from 0.85723 to 0.86028, saving model to best_model.h5\n",
+            "Epoch 6/15\n",
+            "1944/1944 [==============================] - 153s 79ms/step - loss: 0.2492 - acc: 0.8998 - val_loss: 0.3365 - val_acc: 0.8559\n",
+            "\n",
+            "Epoch 00006: val_acc did not improve from 0.86028\n",
+            "Epoch 7/15\n",
+            "1944/1944 [==============================] - 153s 79ms/step - loss: 0.2340 - acc: 0.9070 - val_loss: 0.3479 - val_acc: 0.8555\n",
+            "\n",
+            "Epoch 00007: val_acc did not improve from 0.86028\n",
+            "Epoch 00007: early stopping\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "LgDrITELGaCI",
+        "outputId": "22ea3bbb-0f9b-4faa-9001-d1c2334e714e"
+      },
+      "source": [
+        "loaded_model = load_model('best_model.h5')\r\n",
+        "print(loaded_model.predict(X_test[:5]))\r\n",
+        "print(y_test[:5])"
+      ],
+      "execution_count": 89,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "[[0.98269826]\n",
+            " [0.10281318]\n",
+            " [0.00681123]\n",
+            " [0.08977833]\n",
+            " [0.9861894 ]]\n",
+            "[1 0 0 0 1]\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "zgh7HidIQLXX",
+        "outputId": "be94275b-85a4-45e6-fe96-3373f62a0f7c"
+      },
+      "source": [
+        "print(\"\\n 테스트 정확도: %.4f\" % (loaded_model.evaluate(X_test, y_test)[1]))"
+      ],
+      "execution_count": 90,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "1532/1532 [==============================] - 19s 12ms/step - loss: 0.3434 - acc: 0.8576\n",
+            "\n",
+            " 테스트 정확도: 0.8576\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Ux5IGakqKA49"
+      },
+      "source": [
+        "def s_analysis(new_sentence):\r\n",
+        "  new_sentence = okt.morphs(new_sentence, stem=True) # 토큰화\r\n",
+        "  new_sentence = [word for word in new_sentence if not word in stopwords] # 불용어 제거\r\n",
+        "  encoded = tokenizer.texts_to_sequences([new_sentence]) # 정수 인코딩\r\n",
+        "  pad_new = pad_sequences(encoded, maxlen = max_len) # 패딩\r\n",
+        "  score = float(loaded_model.predict(pad_new)) # 예측\r\n",
+        "  if(score > 0.5):\r\n",
+        "    print(\"{:.2f}% 확률로 긍정 리뷰입니다.\\n\".format(score * 100))\r\n",
+        "  else:\r\n",
+        "    print(\"{:.2f}% 확률로 부정 리뷰입니다.\\n\".format((1 - score) * 100))"
+      ],
+      "execution_count": 92,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "aFOBFmYMQKrX",
+        "outputId": "b4ddf99b-b0b3-4131-b800-ec6fecc86a7b"
+      },
+      "source": [
+        "s_analysis(\"진짜 재미없고 최악이다\")"
+      ],
+      "execution_count": 103,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "99.73% 확률로 부정 리뷰입니다.\n",
+            "\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "aNfGM0fsQiTE",
+        "outputId": "8d8e62ac-a9fe-4850-d581-3f3d919fe526"
+      },
+      "source": [
+        "s_analysis(\"진짜 최고다. 정말 박진감있고 흥미로웠다\")"
+      ],
+      "execution_count": 95,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "99.76% 확률로 긍정 리뷰입니다.\n",
+            "\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "ue9Yvi0MQr8E",
+        "outputId": "a24e5877-881f-4d54-905f-21cc84321938"
+      },
+      "source": [
+        "s_analysis(\"정말 기분이 좋네요\")"
+      ],
+      "execution_count": 99,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "96.36% 확률로 긍정 리뷰입니다.\n",
+            "\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "1z_X6QeuQ0gP",
+        "outputId": "f193e2e5-34e9-4698-b172-a1b68f7146a2"
+      },
+      "source": [
+        "s_analysis(\"진짜 재미없고 최악이다\")"
+      ],
+      "execution_count": 102,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "99.73% 확률로 부정 리뷰입니다.\n",
+            "\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file