From e844632c06ab64fbc993035cd3d469c7e1000fab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=A7=80=EC=88=98?= <75234139+JisooKinn@users.noreply.github.com> Date: Thu, 2 Dec 2021 15:26:55 +0900 Subject: [PATCH] Add Accuracy ect.. --- [DM]_Naive_Bayes.ipynb | 294 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 294 insertions(+) create mode 100644 [DM]_Naive_Bayes.ipynb diff --git a/[DM]_Naive_Bayes.ipynb b/[DM]_Naive_Bayes.ipynb new file mode 100644 index 0000000..4d5c8d4 --- /dev/null +++ b/[DM]_Naive_Bayes.ipynb @@ -0,0 +1,294 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "[DM] Naive_Bayes.ipynb", + "provenance": [], + "collapsed_sections": [], + "authorship_tag": "ABX9TyNy7hmbNOzoRjiOM4Rat/n0", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "<a href=\"https://colab.research.google.com/github/lani009/IDS-DataMining/blob/main/%5BDM%5D_Naive_Bayes.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "p_S1iryH1NBB" + }, + "source": [ + "import os\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import time" + ], + "execution_count": 1, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "S3PZhNLC1daY", + "outputId": "8c764462-6538-4e2e-c11e-b2bffad1a612" + }, + "source": [ + "data = pd.read_csv('DM_data.csv')\n", + "data.info()" + ], + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "<class 'pandas.core.frame.DataFrame'>\n", + "RangeIndex: 25192 entries, 0 to 25191\n", + "Data columns (total 40 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 duration 25192 non-null int64 \n", + " 1 protocol_type 25192 non-null int64 \n", + " 2 service 25192 non-null int64 \n", + " 3 flag 25192 non-null int64 \n", + " 4 src_bytes 25192 non-null int64 \n", + " 5 dst_bytes 25192 non-null int64 \n", + " 6 land 25192 non-null int64 \n", + " 7 wrong_fragment 25192 non-null int64 \n", + " 8 hot 25192 non-null int64 \n", + " 9 num_failed_logins 25192 non-null int64 \n", + " 10 logged_in 25192 non-null int64 \n", + " 11 num_compromised 25192 non-null int64 \n", + " 12 root_shell 25192 non-null int64 \n", + " 13 su_attempted 25192 non-null int64 \n", + " 14 num_root 25192 non-null int64 \n", + " 15 num_file_creations 25192 non-null int64 \n", + " 16 num_shells 25192 non-null int64 \n", + " 17 num_access_files 25192 non-null int64 \n", + " 18 is_guest_login 25192 non-null int64 \n", + " 19 count 25192 non-null int64 \n", + " 20 srv_count 25192 non-null int64 \n", + " 21 serror_rate 25192 non-null float64\n", + " 22 srv_serror_rate 25192 non-null float64\n", + " 23 rerror_rate 25192 non-null float64\n", + " 24 srv_rerror_rate 25192 non-null float64\n", + " 25 same_srv_rate 25192 non-null float64\n", + " 26 diff_srv_rate 25192 non-null float64\n", + " 27 srv_diff_host_rate 25192 non-null float64\n", + " 28 dst_host_count 25192 non-null int64 \n", + " 29 dst_host_srv_count 25192 non-null int64 \n", + " 30 dst_host_same_srv_rate 25192 non-null float64\n", + " 31 dst_host_diff_srv_rate 25192 non-null float64\n", + " 32 dst_host_same_src_port_rate 25192 non-null float64\n", + " 33 dst_host_srv_diff_host_rate 25192 non-null float64\n", + " 34 dst_host_serror_rate 25192 non-null float64\n", + " 35 dst_host_srv_serror_rate 25192 non-null float64\n", + " 36 dst_host_rerror_rate 25192 non-null float64\n", + " 37 dst_host_srv_rerror_rate 25192 non-null float64\n", + " 38 class 25192 non-null int64 \n", + " 39 index_num 25192 non-null int64 \n", + "dtypes: float64(15), int64(25)\n", + "memory usage: 7.7 MB\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "vzo6lf_G3QFN" + }, + "source": [ + "from sklearn.model_selection import train_test_split\n", + "from sklearn.preprocessing import MinMaxScaler, StandardScaler" + ], + "execution_count": 3, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "ioY_BhsQ3Suc" + }, + "source": [ + "data_y = data[\"class\"]\n", + "data_X = data.drop(columns = [\"class\",\"index_num\"])" + ], + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "Aoz6AkZa3_rU" + }, + "source": [ + "sc = MinMaxScaler()\n", + "_X = sc.fit_transform(data_X)" + ], + "execution_count": 46, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LffojJ-C1tEY", + "outputId": "2b81d88e-4810-4479-e668-ce1ecc4f377c" + }, + "source": [ + "X_train, X_test, Y_train, Y_test = train_test_split(_X, data_y, test_size=0.3, random_state=42)\n", + "print(X_train.shape, X_test.shape)\n", + "print(Y_train.shape, Y_test.shape)" + ], + "execution_count": 47, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(17634, 38) (7558, 38)\n", + "(17634,) (7558,)\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "R1X0zCyN4qNT" + }, + "source": [ + "## **Naive Bayes**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "xupFriQx2n6T" + }, + "source": [ + "from sklearn.naive_bayes import GaussianNB" + ], + "execution_count": 59, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "bTXaZ-jf4Slk" + }, + "source": [ + "nb = GaussianNB()" + ], + "execution_count": 60, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "N8lbZQaE4UD8", + "outputId": "977a4c01-5f90-45d4-f590-3e7a8f3283b6" + }, + "source": [ + "nb.fit(X_train, Y_train.values.ravel())" + ], + "execution_count": 61, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "GaussianNB()" + ] + }, + "metadata": {}, + "execution_count": 61 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Kf7YGTRd4WyR" + }, + "source": [ + "Y_test_pred = nb.predict(X_test)" + ], + "execution_count": 62, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "aqMM4cqwvXQt", + "outputId": "dcc7cda0-81ef-46a8-fbf5-97c7a805e851" + }, + "source": [ + "from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score\n", + "\n", + "\n", + "print(\"Accuracy : \", accuracy_score(Y_test,Y_test_pred)) \n", + "\n", + "print(\"Precision : \", precision_score(Y_test,Y_test_pred))\n", + "\n", + "print(\"Recall : \", recall_score(Y_test,Y_test_pred))\n", + "\n", + "print(\"F1 Score : \",f1_score(Y_test,Y_test_pred))" + ], + "execution_count": 65, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Accuracy : 0.9053982535062186\n", + "Precision : 0.8777987591043971\n", + "Recall : 0.9254835039817975\n", + "F1 Score : 0.9010106603904195\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "IYDovkMfwr-X" + }, + "source": [ + "" + ], + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file -- GitLab