From e844632c06ab64fbc993035cd3d469c7e1000fab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=EC=A7=80=EC=88=98?=
 <75234139+JisooKinn@users.noreply.github.com>
Date: Thu, 2 Dec 2021 15:26:55 +0900
Subject: [PATCH] Add Accuracy ect..

---
 [DM]_Naive_Bayes.ipynb | 294 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 294 insertions(+)
 create mode 100644 [DM]_Naive_Bayes.ipynb

diff --git a/[DM]_Naive_Bayes.ipynb b/[DM]_Naive_Bayes.ipynb
new file mode 100644
index 0000000..4d5c8d4
--- /dev/null
+++ b/[DM]_Naive_Bayes.ipynb
@@ -0,0 +1,294 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "[DM] Naive_Bayes.ipynb",
+      "provenance": [],
+      "collapsed_sections": [],
+      "authorship_tag": "ABX9TyNy7hmbNOzoRjiOM4Rat/n0",
+      "include_colab_link": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/lani009/IDS-DataMining/blob/main/%5BDM%5D_Naive_Bayes.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "p_S1iryH1NBB"
+      },
+      "source": [
+        "import os\n",
+        "import pandas as pd\n",
+        "import numpy as np\n",
+        "import matplotlib.pyplot as plt\n",
+        "import seaborn as sns\n",
+        "import time"
+      ],
+      "execution_count": 1,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "S3PZhNLC1daY",
+        "outputId": "8c764462-6538-4e2e-c11e-b2bffad1a612"
+      },
+      "source": [
+        "data = pd.read_csv('DM_data.csv')\n",
+        "data.info()"
+      ],
+      "execution_count": 2,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "<class 'pandas.core.frame.DataFrame'>\n",
+            "RangeIndex: 25192 entries, 0 to 25191\n",
+            "Data columns (total 40 columns):\n",
+            " #   Column                       Non-Null Count  Dtype  \n",
+            "---  ------                       --------------  -----  \n",
+            " 0   duration                     25192 non-null  int64  \n",
+            " 1   protocol_type                25192 non-null  int64  \n",
+            " 2   service                      25192 non-null  int64  \n",
+            " 3   flag                         25192 non-null  int64  \n",
+            " 4   src_bytes                    25192 non-null  int64  \n",
+            " 5   dst_bytes                    25192 non-null  int64  \n",
+            " 6   land                         25192 non-null  int64  \n",
+            " 7   wrong_fragment               25192 non-null  int64  \n",
+            " 8   hot                          25192 non-null  int64  \n",
+            " 9   num_failed_logins            25192 non-null  int64  \n",
+            " 10  logged_in                    25192 non-null  int64  \n",
+            " 11  num_compromised              25192 non-null  int64  \n",
+            " 12  root_shell                   25192 non-null  int64  \n",
+            " 13  su_attempted                 25192 non-null  int64  \n",
+            " 14  num_root                     25192 non-null  int64  \n",
+            " 15  num_file_creations           25192 non-null  int64  \n",
+            " 16  num_shells                   25192 non-null  int64  \n",
+            " 17  num_access_files             25192 non-null  int64  \n",
+            " 18  is_guest_login               25192 non-null  int64  \n",
+            " 19  count                        25192 non-null  int64  \n",
+            " 20  srv_count                    25192 non-null  int64  \n",
+            " 21  serror_rate                  25192 non-null  float64\n",
+            " 22  srv_serror_rate              25192 non-null  float64\n",
+            " 23  rerror_rate                  25192 non-null  float64\n",
+            " 24  srv_rerror_rate              25192 non-null  float64\n",
+            " 25  same_srv_rate                25192 non-null  float64\n",
+            " 26  diff_srv_rate                25192 non-null  float64\n",
+            " 27  srv_diff_host_rate           25192 non-null  float64\n",
+            " 28  dst_host_count               25192 non-null  int64  \n",
+            " 29  dst_host_srv_count           25192 non-null  int64  \n",
+            " 30  dst_host_same_srv_rate       25192 non-null  float64\n",
+            " 31  dst_host_diff_srv_rate       25192 non-null  float64\n",
+            " 32  dst_host_same_src_port_rate  25192 non-null  float64\n",
+            " 33  dst_host_srv_diff_host_rate  25192 non-null  float64\n",
+            " 34  dst_host_serror_rate         25192 non-null  float64\n",
+            " 35  dst_host_srv_serror_rate     25192 non-null  float64\n",
+            " 36  dst_host_rerror_rate         25192 non-null  float64\n",
+            " 37  dst_host_srv_rerror_rate     25192 non-null  float64\n",
+            " 38  class                        25192 non-null  int64  \n",
+            " 39  index_num                    25192 non-null  int64  \n",
+            "dtypes: float64(15), int64(25)\n",
+            "memory usage: 7.7 MB\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "vzo6lf_G3QFN"
+      },
+      "source": [
+        "from sklearn.model_selection import train_test_split\n",
+        "from sklearn.preprocessing import MinMaxScaler, StandardScaler"
+      ],
+      "execution_count": 3,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "ioY_BhsQ3Suc"
+      },
+      "source": [
+        "data_y = data[\"class\"]\n",
+        "data_X = data.drop(columns = [\"class\",\"index_num\"])"
+      ],
+      "execution_count": 4,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Aoz6AkZa3_rU"
+      },
+      "source": [
+        "sc = MinMaxScaler()\n",
+        "_X = sc.fit_transform(data_X)"
+      ],
+      "execution_count": 46,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "LffojJ-C1tEY",
+        "outputId": "2b81d88e-4810-4479-e668-ce1ecc4f377c"
+      },
+      "source": [
+        "X_train, X_test, Y_train, Y_test = train_test_split(_X, data_y, test_size=0.3, random_state=42)\n",
+        "print(X_train.shape, X_test.shape)\n",
+        "print(Y_train.shape, Y_test.shape)"
+      ],
+      "execution_count": 47,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "(17634, 38) (7558, 38)\n",
+            "(17634,) (7558,)\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "R1X0zCyN4qNT"
+      },
+      "source": [
+        "## **Naive Bayes**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "xupFriQx2n6T"
+      },
+      "source": [
+        "from sklearn.naive_bayes import GaussianNB"
+      ],
+      "execution_count": 59,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "bTXaZ-jf4Slk"
+      },
+      "source": [
+        "nb = GaussianNB()"
+      ],
+      "execution_count": 60,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "N8lbZQaE4UD8",
+        "outputId": "977a4c01-5f90-45d4-f590-3e7a8f3283b6"
+      },
+      "source": [
+        "nb.fit(X_train, Y_train.values.ravel())"
+      ],
+      "execution_count": 61,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "GaussianNB()"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 61
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Kf7YGTRd4WyR"
+      },
+      "source": [
+        "Y_test_pred = nb.predict(X_test)"
+      ],
+      "execution_count": 62,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "aqMM4cqwvXQt",
+        "outputId": "dcc7cda0-81ef-46a8-fbf5-97c7a805e851"
+      },
+      "source": [
+        "from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score\n",
+        "\n",
+        "\n",
+        "print(\"Accuracy : \", accuracy_score(Y_test,Y_test_pred)) \n",
+        "\n",
+        "print(\"Precision : \", precision_score(Y_test,Y_test_pred))\n",
+        "\n",
+        "print(\"Recall : \", recall_score(Y_test,Y_test_pred))\n",
+        "\n",
+        "print(\"F1 Score : \",f1_score(Y_test,Y_test_pred))"
+      ],
+      "execution_count": 65,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Accuracy :  0.9053982535062186\n",
+            "Precision :  0.8777987591043971\n",
+            "Recall :  0.9254835039817975\n",
+            "F1 Score :  0.9010106603904195\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "IYDovkMfwr-X"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file
-- 
GitLab