diff --git a/[DM]Apriori.ipynb b/[DM]Apriori.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..b3638e109ebd28b2313fe146d9b4e3a661946964 --- /dev/null +++ b/[DM]Apriori.ipynb @@ -0,0 +1,2849 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "[DM]Apriori.ipynb", + "provenance": [], + "collapsed_sections": [], + "authorship_tag": "ABX9TyMF242OqNw0l0NQIEHfC2Vp", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "<a href=\"https://colab.research.google.com/github/lani009/IDS-DataMining/blob/main/%5BDM%5DApriori.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "HAY_lKeo6NUE" + }, + "source": [ + "import os\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import time" + ], + "execution_count": 1, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "J02wdPhK76Yc", + "outputId": "8cf59c87-16dc-40c8-bcfa-a2b635986d1f" + }, + "source": [ + "data = pd.read_csv('DM_data.csv')\n", + "data.info()" + ], + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "<class 'pandas.core.frame.DataFrame'>\n", + "RangeIndex: 25192 entries, 0 to 25191\n", + "Data columns (total 40 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 duration 25192 non-null int64 \n", + " 1 protocol_type 25192 non-null int64 \n", + " 2 service 25192 non-null int64 \n", + " 3 flag 25192 non-null int64 \n", + " 4 src_bytes 25192 non-null int64 \n", + " 5 dst_bytes 25192 non-null int64 \n", + " 6 land 25192 non-null int64 \n", + " 7 wrong_fragment 25192 non-null int64 \n", + " 8 hot 25192 non-null int64 \n", + " 9 num_failed_logins 25192 non-null int64 \n", + " 10 logged_in 25192 non-null int64 \n", + " 11 num_compromised 25192 non-null int64 \n", + " 12 root_shell 25192 non-null int64 \n", + " 13 su_attempted 25192 non-null int64 \n", + " 14 num_root 25192 non-null int64 \n", + " 15 num_file_creations 25192 non-null int64 \n", + " 16 num_shells 25192 non-null int64 \n", + " 17 num_access_files 25192 non-null int64 \n", + " 18 is_guest_login 25192 non-null int64 \n", + " 19 count 25192 non-null int64 \n", + " 20 srv_count 25192 non-null int64 \n", + " 21 serror_rate 25192 non-null float64\n", + " 22 srv_serror_rate 25192 non-null float64\n", + " 23 rerror_rate 25192 non-null float64\n", + " 24 srv_rerror_rate 25192 non-null float64\n", + " 25 same_srv_rate 25192 non-null float64\n", + " 26 diff_srv_rate 25192 non-null float64\n", + " 27 srv_diff_host_rate 25192 non-null float64\n", + " 28 dst_host_count 25192 non-null int64 \n", + " 29 dst_host_srv_count 25192 non-null int64 \n", + " 30 dst_host_same_srv_rate 25192 non-null float64\n", + " 31 dst_host_diff_srv_rate 25192 non-null float64\n", + " 32 dst_host_same_src_port_rate 25192 non-null float64\n", + " 33 dst_host_srv_diff_host_rate 25192 non-null float64\n", + " 34 dst_host_serror_rate 25192 non-null float64\n", + " 35 dst_host_srv_serror_rate 25192 non-null float64\n", + " 36 dst_host_rerror_rate 25192 non-null float64\n", + " 37 dst_host_srv_rerror_rate 25192 non-null float64\n", + " 38 class 25192 non-null int64 \n", + " 39 index_num 25192 non-null int64 \n", + "dtypes: float64(15), int64(25)\n", + "memory usage: 7.7 MB\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "phplztW08CAV" + }, + "source": [ + "from sklearn.model_selection import train_test_split\n", + "from sklearn.preprocessing import MinMaxScaler, StandardScaler" + ], + "execution_count": 3, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "rWLrmiHs86KH" + }, + "source": [ + "from mlxtend.frequent_patterns import apriori,association_rules" + ], + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 383 + }, + "id": "KpeVfpxYTAHF", + "outputId": "209f825b-169a-44c0-a3da-ca8b57b5438a" + }, + "source": [ + "sc = StandardScaler() \n", + "sc_data = sc.fit_transform(data)\n", + "\n", + "sc_df = pd.DataFrame(sc_data, columns=data.columns)\n", + "sc_df.head(n=10)\n", + "\n", + "#StandardScaler로 data scaling" + ], + "execution_count": 5, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>duration</th>\n", + " <th>protocol_type</th>\n", + " <th>service</th>\n", + " <th>flag</th>\n", + " <th>src_bytes</th>\n", + " <th>dst_bytes</th>\n", + " <th>land</th>\n", + " <th>wrong_fragment</th>\n", + " <th>hot</th>\n", + " <th>num_failed_logins</th>\n", + " <th>logged_in</th>\n", + " <th>num_compromised</th>\n", + " <th>root_shell</th>\n", + " <th>su_attempted</th>\n", + " <th>num_root</th>\n", + " <th>num_file_creations</th>\n", + " <th>num_shells</th>\n", + " <th>num_access_files</th>\n", + " <th>is_guest_login</th>\n", + " <th>count</th>\n", + " <th>srv_count</th>\n", + " <th>serror_rate</th>\n", + " <th>srv_serror_rate</th>\n", + " <th>rerror_rate</th>\n", + " <th>srv_rerror_rate</th>\n", + " <th>same_srv_rate</th>\n", + " <th>diff_srv_rate</th>\n", + " <th>srv_diff_host_rate</th>\n", + " <th>dst_host_count</th>\n", + " <th>dst_host_srv_count</th>\n", + " <th>dst_host_same_srv_rate</th>\n", + " <th>dst_host_diff_srv_rate</th>\n", + " <th>dst_host_same_src_port_rate</th>\n", + " <th>dst_host_srv_diff_host_rate</th>\n", + " <th>dst_host_serror_rate</th>\n", + " <th>dst_host_srv_serror_rate</th>\n", + " <th>dst_host_rerror_rate</th>\n", + " <th>dst_host_srv_rerror_rate</th>\n", + " <th>class</th>\n", + " <th>index_num</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>-0.113551</td>\n", + " <td>-0.444009</td>\n", + " <td>-1.399448</td>\n", + " <td>0.744553</td>\n", + " <td>-0.009889</td>\n", + " <td>-0.039310</td>\n", + " <td>-0.00891</td>\n", + " <td>-0.091223</td>\n", + " <td>-0.091933</td>\n", + " <td>-0.02622</td>\n", + " <td>-0.807626</td>\n", + " <td>-0.021873</td>\n", + " <td>-0.039377</td>\n", + " <td>-0.027665</td>\n", + " <td>-0.021724</td>\n", + " <td>-0.027808</td>\n", + " <td>-0.018905</td>\n", + " <td>-0.043917</td>\n", + " <td>-0.09599</td>\n", + " <td>-0.720244</td>\n", + " <td>-0.354628</td>\n", + " <td>-0.640142</td>\n", + " <td>-0.633978</td>\n", + " <td>-0.372186</td>\n", + " <td>-0.373098</td>\n", + " <td>0.772109</td>\n", + " <td>-0.349282</td>\n", + " <td>-0.373886</td>\n", + " <td>-0.328634</td>\n", + " <td>-0.813985</td>\n", + " <td>-0.779157</td>\n", + " <td>-0.280673</td>\n", + " <td>0.073120</td>\n", + " <td>-0.287993</td>\n", + " <td>-0.641804</td>\n", + " <td>-0.627365</td>\n", + " <td>-0.221668</td>\n", + " <td>-0.374281</td>\n", + " <td>-0.934425</td>\n", + " <td>-1.731982</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>-0.113551</td>\n", + " <td>1.325565</td>\n", + " <td>0.780883</td>\n", + " <td>0.744553</td>\n", + " <td>-0.010032</td>\n", + " <td>-0.039310</td>\n", + " <td>-0.00891</td>\n", + " <td>-0.091223</td>\n", + " <td>-0.091933</td>\n", + " <td>-0.02622</td>\n", + " <td>-0.807626</td>\n", + " <td>-0.021873</td>\n", + " <td>-0.039377</td>\n", + " <td>-0.027665</td>\n", + " <td>-0.021724</td>\n", + " <td>-0.027808</td>\n", + " <td>-0.018905</td>\n", + " <td>-0.043917</td>\n", + " <td>-0.09599</td>\n", + " <td>-0.624317</td>\n", + " <td>-0.368427</td>\n", + " <td>-0.640142</td>\n", + " <td>-0.633978</td>\n", + " <td>-0.372186</td>\n", + " <td>-0.373098</td>\n", + " <td>-1.320567</td>\n", + " <td>0.490836</td>\n", + " <td>-0.373886</td>\n", + " <td>0.732059</td>\n", + " <td>-1.030895</td>\n", + " <td>-1.157831</td>\n", + " <td>2.764403</td>\n", + " <td>2.375620</td>\n", + " <td>-0.287993</td>\n", + " <td>-0.641804</td>\n", + " <td>-0.627365</td>\n", + " <td>-0.385140</td>\n", + " <td>-0.374281</td>\n", + " <td>-0.934425</td>\n", + " <td>-1.731845</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>-0.113551</td>\n", + " <td>-0.444009</td>\n", + " <td>-1.377199</td>\n", + " <td>-0.917300</td>\n", + " <td>-0.010093</td>\n", + " <td>-0.039310</td>\n", + " <td>-0.00891</td>\n", + " <td>-0.091223</td>\n", + " <td>-0.091933</td>\n", + " <td>-0.02622</td>\n", + " <td>-0.807626</td>\n", + " <td>-0.021873</td>\n", + " <td>-0.039377</td>\n", + " <td>-0.027665</td>\n", + " <td>-0.021724</td>\n", + " <td>-0.027808</td>\n", + " <td>-0.018905</td>\n", + " <td>-0.043917</td>\n", + " <td>-0.09599</td>\n", + " <td>0.334947</td>\n", + " <td>-0.299430</td>\n", + " <td>1.595477</td>\n", + " <td>1.600209</td>\n", + " <td>-0.372186</td>\n", + " <td>-0.373098</td>\n", + " <td>-1.388806</td>\n", + " <td>0.042773</td>\n", + " <td>-0.373886</td>\n", + " <td>0.732059</td>\n", + " <td>-0.804947</td>\n", + " <td>-0.935081</td>\n", + " <td>-0.173828</td>\n", + " <td>-0.478183</td>\n", + " <td>-0.287993</td>\n", + " <td>1.603834</td>\n", + " <td>1.614454</td>\n", + " <td>-0.385140</td>\n", + " <td>-0.374281</td>\n", + " <td>1.070177</td>\n", + " <td>-1.731707</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>-0.113551</td>\n", + " <td>-0.444009</td>\n", + " <td>0.780883</td>\n", + " <td>0.744553</td>\n", + " <td>-0.009996</td>\n", + " <td>0.052473</td>\n", + " <td>-0.00891</td>\n", + " <td>-0.091223</td>\n", + " <td>-0.091933</td>\n", + " <td>-0.02622</td>\n", + " <td>1.238197</td>\n", + " <td>-0.021873</td>\n", + " <td>-0.039377</td>\n", + " <td>-0.027665</td>\n", + " <td>-0.021724</td>\n", + " <td>-0.027808</td>\n", + " <td>-0.018905</td>\n", + " <td>-0.043917</td>\n", + " <td>-0.09599</td>\n", + " <td>-0.694082</td>\n", + " <td>-0.313230</td>\n", + " <td>-0.193018</td>\n", + " <td>-0.187141</td>\n", + " <td>-0.372186</td>\n", + " <td>-0.373098</td>\n", + " <td>0.772109</td>\n", + " <td>-0.349282</td>\n", + " <td>-0.373886</td>\n", + " <td>-1.540854</td>\n", + " <td>1.264742</td>\n", + " <td>1.069663</td>\n", + " <td>-0.440940</td>\n", + " <td>-0.380894</td>\n", + " <td>0.073759</td>\n", + " <td>-0.574435</td>\n", + " <td>-0.604947</td>\n", + " <td>-0.385140</td>\n", + " <td>-0.342768</td>\n", + " <td>-0.934425</td>\n", + " <td>-1.731570</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>-0.113551</td>\n", + " <td>-0.444009</td>\n", + " <td>0.780883</td>\n", + " <td>0.744553</td>\n", + " <td>-0.010010</td>\n", + " <td>-0.034582</td>\n", + " <td>-0.00891</td>\n", + " <td>-0.091223</td>\n", + " <td>-0.091933</td>\n", + " <td>-0.02622</td>\n", + " <td>1.238197</td>\n", + " <td>-0.021873</td>\n", + " <td>-0.039377</td>\n", + " <td>-0.027665</td>\n", + " <td>-0.021724</td>\n", + " <td>-0.027808</td>\n", + " <td>-0.018905</td>\n", + " <td>-0.043917</td>\n", + " <td>-0.09599</td>\n", + " <td>-0.476067</td>\n", + " <td>0.059355</td>\n", + " <td>-0.640142</td>\n", + " <td>-0.633978</td>\n", + " <td>-0.372186</td>\n", + " <td>-0.373098</td>\n", + " <td>0.772109</td>\n", + " <td>-0.349282</td>\n", + " <td>-0.023115</td>\n", + " <td>0.732059</td>\n", + " <td>1.264742</td>\n", + " <td>1.069663</td>\n", + " <td>-0.440940</td>\n", + " <td>-0.478183</td>\n", + " <td>-0.287993</td>\n", + " <td>-0.641804</td>\n", + " <td>-0.627365</td>\n", + " <td>-0.385140</td>\n", + " <td>-0.374281</td>\n", + " <td>-0.934425</td>\n", + " <td>-1.731432</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>-0.113551</td>\n", + " <td>-0.444009</td>\n", + " <td>-1.377199</td>\n", + " <td>-2.025203</td>\n", + " <td>-0.010093</td>\n", + " <td>-0.039310</td>\n", + " <td>-0.00891</td>\n", + " <td>-0.091223</td>\n", + " <td>-0.091933</td>\n", + " <td>-0.02622</td>\n", + " <td>-0.807626</td>\n", + " <td>-0.021873</td>\n", + " <td>-0.039377</td>\n", + " <td>-0.027665</td>\n", + " <td>-0.021724</td>\n", + " <td>-0.027808</td>\n", + " <td>-0.018905</td>\n", + " <td>-0.043917</td>\n", + " <td>-0.09599</td>\n", + " <td>0.317506</td>\n", + " <td>-0.120038</td>\n", + " <td>-0.640142</td>\n", + " <td>-0.633978</td>\n", + " <td>2.765176</td>\n", + " <td>2.729322</td>\n", + " <td>-1.138595</td>\n", + " <td>-0.013235</td>\n", + " <td>-0.373886</td>\n", + " <td>0.732059</td>\n", + " <td>-0.868212</td>\n", + " <td>-1.001906</td>\n", + " <td>-0.066984</td>\n", + " <td>-0.478183</td>\n", + " <td>-0.287993</td>\n", + " <td>-0.641804</td>\n", + " <td>-0.627365</td>\n", + " <td>2.884296</td>\n", + " <td>2.777041</td>\n", + " <td>1.070177</td>\n", + " <td>-1.731295</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>-0.113551</td>\n", + " <td>-0.444009</td>\n", + " <td>-1.377199</td>\n", + " <td>-0.917300</td>\n", + " <td>-0.010093</td>\n", + " <td>-0.039310</td>\n", + " <td>-0.00891</td>\n", + " <td>-0.091223</td>\n", + " <td>-0.091933</td>\n", + " <td>-0.02622</td>\n", + " <td>-0.807626</td>\n", + " <td>-0.021873</td>\n", + " <td>-0.039377</td>\n", + " <td>-0.027665</td>\n", + " <td>-0.021724</td>\n", + " <td>-0.027808</td>\n", + " <td>-0.018905</td>\n", + " <td>-0.043917</td>\n", + " <td>-0.09599</td>\n", + " <td>0.709933</td>\n", + " <td>-0.258032</td>\n", + " <td>1.595477</td>\n", + " <td>1.600209</td>\n", + " <td>-0.372186</td>\n", + " <td>-0.373098</td>\n", + " <td>-1.388806</td>\n", + " <td>-0.013235</td>\n", + " <td>-0.373886</td>\n", + " <td>0.732059</td>\n", + " <td>-0.958592</td>\n", + " <td>-1.068731</td>\n", + " <td>-0.173828</td>\n", + " <td>-0.478183</td>\n", + " <td>-0.287993</td>\n", + " <td>1.603834</td>\n", + " <td>1.614454</td>\n", + " <td>-0.385140</td>\n", + " <td>-0.374281</td>\n", + " <td>1.070177</td>\n", + " <td>-1.731157</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>-0.113551</td>\n", + " <td>-0.444009</td>\n", + " <td>-1.377199</td>\n", + " <td>-0.917300</td>\n", + " <td>-0.010093</td>\n", + " <td>-0.039310</td>\n", + " <td>-0.00891</td>\n", + " <td>-0.091223</td>\n", + " <td>-0.091933</td>\n", + " <td>-0.02622</td>\n", + " <td>-0.807626</td>\n", + " <td>-0.021873</td>\n", + " <td>-0.039377</td>\n", + " <td>-0.027665</td>\n", + " <td>-0.021724</td>\n", + " <td>-0.027808</td>\n", + " <td>-0.018905</td>\n", + " <td>-0.043917</td>\n", + " <td>-0.09599</td>\n", + " <td>0.282624</td>\n", + " <td>-0.161436</td>\n", + " <td>1.595477</td>\n", + " <td>1.600209</td>\n", + " <td>-0.372186</td>\n", + " <td>-0.373098</td>\n", + " <td>-1.184088</td>\n", + " <td>-0.013235</td>\n", + " <td>-0.373886</td>\n", + " <td>0.732059</td>\n", + " <td>-0.904364</td>\n", + " <td>-1.024181</td>\n", + " <td>-0.066984</td>\n", + " <td>-0.478183</td>\n", + " <td>-0.287993</td>\n", + " <td>1.603834</td>\n", + " <td>1.614454</td>\n", + " <td>-0.385140</td>\n", + " <td>-0.374281</td>\n", + " <td>1.070177</td>\n", + " <td>-1.731019</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>-0.113551</td>\n", + " <td>-0.444009</td>\n", + " <td>0.780883</td>\n", + " <td>-0.917300</td>\n", + " <td>-0.010093</td>\n", + " <td>-0.039310</td>\n", + " <td>-0.00891</td>\n", + " <td>-0.091223</td>\n", + " <td>-0.091933</td>\n", + " <td>-0.02622</td>\n", + " <td>-0.807626</td>\n", + " <td>-0.021873</td>\n", + " <td>-0.039377</td>\n", + " <td>-0.027665</td>\n", + " <td>-0.021724</td>\n", + " <td>-0.027808</td>\n", + " <td>-0.018905</td>\n", + " <td>-0.043917</td>\n", + " <td>-0.09599</td>\n", + " <td>1.616874</td>\n", + " <td>-0.064840</td>\n", + " <td>1.595477</td>\n", + " <td>1.600209</td>\n", + " <td>-0.372186</td>\n", + " <td>-0.373098</td>\n", + " <td>-1.297820</td>\n", + " <td>-0.069243</td>\n", + " <td>-0.373886</td>\n", + " <td>0.732059</td>\n", + " <td>-0.832060</td>\n", + " <td>-0.957356</td>\n", + " <td>-0.173828</td>\n", + " <td>-0.478183</td>\n", + " <td>-0.287993</td>\n", + " <td>1.603834</td>\n", + " <td>1.614454</td>\n", + " <td>-0.385140</td>\n", + " <td>-0.374281</td>\n", + " <td>1.070177</td>\n", + " <td>-1.730882</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>-0.113551</td>\n", + " <td>-0.444009</td>\n", + " <td>-1.377199</td>\n", + " <td>-0.917300</td>\n", + " <td>-0.010093</td>\n", + " <td>-0.039310</td>\n", + " <td>-0.00891</td>\n", + " <td>-0.091223</td>\n", + " <td>-0.091933</td>\n", + " <td>-0.02622</td>\n", + " <td>-0.807626</td>\n", + " <td>-0.021873</td>\n", + " <td>-0.039377</td>\n", + " <td>-0.027665</td>\n", + " <td>-0.021724</td>\n", + " <td>-0.027808</td>\n", + " <td>-0.018905</td>\n", + " <td>-0.043917</td>\n", + " <td>-0.09599</td>\n", + " <td>0.422153</td>\n", + " <td>-0.271831</td>\n", + " <td>1.595477</td>\n", + " <td>1.600209</td>\n", + " <td>-0.372186</td>\n", + " <td>-0.373098</td>\n", + " <td>-1.366060</td>\n", + " <td>-0.013235</td>\n", + " <td>-0.373886</td>\n", + " <td>0.732059</td>\n", + " <td>-0.922440</td>\n", + " <td>-1.046456</td>\n", + " <td>-0.120406</td>\n", + " <td>-0.478183</td>\n", + " <td>-0.287993</td>\n", + " <td>1.603834</td>\n", + " <td>1.614454</td>\n", + " <td>-0.385140</td>\n", + " <td>-0.374281</td>\n", + " <td>1.070177</td>\n", + " <td>-1.730744</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " duration protocol_type ... class index_num\n", + "0 -0.113551 -0.444009 ... -0.934425 -1.731982\n", + "1 -0.113551 1.325565 ... -0.934425 -1.731845\n", + "2 -0.113551 -0.444009 ... 1.070177 -1.731707\n", + "3 -0.113551 -0.444009 ... -0.934425 -1.731570\n", + "4 -0.113551 -0.444009 ... -0.934425 -1.731432\n", + "5 -0.113551 -0.444009 ... 1.070177 -1.731295\n", + "6 -0.113551 -0.444009 ... 1.070177 -1.731157\n", + "7 -0.113551 -0.444009 ... 1.070177 -1.731019\n", + "8 -0.113551 -0.444009 ... 1.070177 -1.730882\n", + "9 -0.113551 -0.444009 ... 1.070177 -1.730744\n", + "\n", + "[10 rows x 40 columns]" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 383 + }, + "id": "LKK6fIznTzpy", + "outputId": "09aac2ea-3311-4583-ca47-bb6a2e85be99" + }, + "source": [ + "def encode_units(x):\n", + " if x <= 0 :\n", + " return 0\n", + " if x > 0 :\n", + " return 1\n", + "\n", + "train_df = sc_df.applymap(encode_units)\n", + "\n", + "train_df.head(n=10)\n", + "\n", + "#classification을 위해 scaling 시킨 data들을 음수면 0, 양수면 1로 encoding" + ], + "execution_count": 6, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>duration</th>\n", + " <th>protocol_type</th>\n", + " <th>service</th>\n", + " <th>flag</th>\n", + " <th>src_bytes</th>\n", + " <th>dst_bytes</th>\n", + " <th>land</th>\n", + " <th>wrong_fragment</th>\n", + " <th>hot</th>\n", + " <th>num_failed_logins</th>\n", + " <th>logged_in</th>\n", + " <th>num_compromised</th>\n", + " <th>root_shell</th>\n", + " <th>su_attempted</th>\n", + " <th>num_root</th>\n", + " <th>num_file_creations</th>\n", + " <th>num_shells</th>\n", + " <th>num_access_files</th>\n", + " <th>is_guest_login</th>\n", + " <th>count</th>\n", + " <th>srv_count</th>\n", + " <th>serror_rate</th>\n", + " <th>srv_serror_rate</th>\n", + " <th>rerror_rate</th>\n", + " <th>srv_rerror_rate</th>\n", + " <th>same_srv_rate</th>\n", + " <th>diff_srv_rate</th>\n", + " <th>srv_diff_host_rate</th>\n", + " <th>dst_host_count</th>\n", + " <th>dst_host_srv_count</th>\n", + " <th>dst_host_same_srv_rate</th>\n", + " <th>dst_host_diff_srv_rate</th>\n", + " <th>dst_host_same_src_port_rate</th>\n", + " <th>dst_host_srv_diff_host_rate</th>\n", + " <th>dst_host_serror_rate</th>\n", + " <th>dst_host_srv_serror_rate</th>\n", + " <th>dst_host_rerror_rate</th>\n", + " <th>dst_host_srv_rerror_rate</th>\n", + " <th>class</th>\n", + " <th>index_num</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " duration protocol_type service ... dst_host_srv_rerror_rate class index_num\n", + "0 0 0 0 ... 0 0 0\n", + "1 0 1 1 ... 0 0 0\n", + "2 0 0 0 ... 0 1 0\n", + "3 0 0 1 ... 0 0 0\n", + "4 0 0 1 ... 0 0 0\n", + "5 0 0 0 ... 1 1 0\n", + "6 0 0 0 ... 0 1 0\n", + "7 0 0 0 ... 0 1 0\n", + "8 0 0 1 ... 0 1 0\n", + "9 0 0 0 ... 0 1 0\n", + "\n", + "[10 rows x 40 columns]" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JmfJO9mn9_Te", + "outputId": "2e19f283-3632-44c6-e9d1-e0b090e41721" + }, + "source": [ + "data_X = train_df.drop(columns = [\"index_num\"])\n", + "\n", + "X_train, X_test = train_test_split(data_X, test_size=0.3, random_state=42)\n", + "print(X_train.shape, X_test.shape)\n", + "\n", + "#train data와 test data를 7:3 의 비율로 split" + ], + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(17634, 39) (7558, 39)\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 226 + }, + "id": "FAqOwB0oVeAK", + "outputId": "d61b213d-ba8d-4aee-d0af-b1f69ec5c903" + }, + "source": [ + "df = pd.DataFrame(X_train, columns=data.drop(columns = [\"index_num\"]).columns)\n", + "\n", + "df.head()" + ], + "execution_count": 8, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>duration</th>\n", + " <th>protocol_type</th>\n", + " <th>service</th>\n", + " <th>flag</th>\n", + " <th>src_bytes</th>\n", + " <th>dst_bytes</th>\n", + " <th>land</th>\n", + " <th>wrong_fragment</th>\n", + " <th>hot</th>\n", + " <th>num_failed_logins</th>\n", + " <th>logged_in</th>\n", + " <th>num_compromised</th>\n", + " <th>root_shell</th>\n", + " <th>su_attempted</th>\n", + " <th>num_root</th>\n", + " <th>num_file_creations</th>\n", + " <th>num_shells</th>\n", + " <th>num_access_files</th>\n", + " <th>is_guest_login</th>\n", + " <th>count</th>\n", + " <th>srv_count</th>\n", + " <th>serror_rate</th>\n", + " <th>srv_serror_rate</th>\n", + " <th>rerror_rate</th>\n", + " <th>srv_rerror_rate</th>\n", + " <th>same_srv_rate</th>\n", + " <th>diff_srv_rate</th>\n", + " <th>srv_diff_host_rate</th>\n", + " <th>dst_host_count</th>\n", + " <th>dst_host_srv_count</th>\n", + " <th>dst_host_same_srv_rate</th>\n", + " <th>dst_host_diff_srv_rate</th>\n", + " <th>dst_host_same_src_port_rate</th>\n", + " <th>dst_host_srv_diff_host_rate</th>\n", + " <th>dst_host_serror_rate</th>\n", + " <th>dst_host_srv_serror_rate</th>\n", + " <th>dst_host_rerror_rate</th>\n", + " <th>dst_host_srv_rerror_rate</th>\n", + " <th>class</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>741</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>411</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>17841</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>20962</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>17790</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " duration protocol_type ... dst_host_srv_rerror_rate class\n", + "741 0 0 ... 0 1\n", + "411 0 0 ... 1 1\n", + "17841 0 0 ... 1 1\n", + "20962 0 1 ... 0 1\n", + "17790 0 0 ... 0 1\n", + "\n", + "[5 rows x 39 columns]" + ] + }, + "metadata": {}, + "execution_count": 8 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 424 + }, + "id": "71bCJO3_-Nrz", + "outputId": "036db7fa-6eb4-4faa-a903-4dfa82521931" + }, + "source": [ + "frequent_itemsets = apriori( df, min_support = 0.27, use_colnames=True)\n", + "result_desc = frequent_itemsets.sort_values(['support'],ascending =[False])\n", + "result_desc" + ], + "execution_count": 9, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>support</th>\n", + " <th>itemsets</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>0.642225</td>\n", + " <td>(dst_host_count)</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>0.622547</td>\n", + " <td>(same_srv_rate)</td>\n", + " </tr>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0.618634</td>\n", + " <td>(service)</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>0.611773</td>\n", + " <td>(flag)</td>\n", + " </tr>\n", + " <tr>\n", + " <th>20</th>\n", + " <td>0.567143</td>\n", + " <td>(flag, same_srv_rate)</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>75</th>\n", + " <td>0.273789</td>\n", + " <td>(class, dst_host_srv_serror_rate, dst_host_ser...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>94</th>\n", + " <td>0.273733</td>\n", + " <td>(class, dst_host_srv_serror_rate, serror_rate,...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>95</th>\n", + " <td>0.273676</td>\n", + " <td>(class, dst_host_srv_serror_rate, srv_serror_r...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>102</th>\n", + " <td>0.273676</td>\n", + " <td>(class, srv_serror_rate, serror_rate, dst_host...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>39</th>\n", + " <td>0.270727</td>\n", + " <td>(dst_host_count, dst_host_serror_rate)</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>104 rows × 2 columns</p>\n", + "</div>" + ], + "text/plain": [ + " support itemsets\n", + "7 0.642225 (dst_host_count)\n", + "6 0.622547 (same_srv_rate)\n", + "0 0.618634 (service)\n", + "1 0.611773 (flag)\n", + "20 0.567143 (flag, same_srv_rate)\n", + ".. ... ...\n", + "75 0.273789 (class, dst_host_srv_serror_rate, dst_host_ser...\n", + "94 0.273733 (class, dst_host_srv_serror_rate, serror_rate,...\n", + "95 0.273676 (class, dst_host_srv_serror_rate, srv_serror_r...\n", + "102 0.273676 (class, srv_serror_rate, serror_rate, dst_host...\n", + "39 0.270727 (dst_host_count, dst_host_serror_rate)\n", + "\n", + "[104 rows x 2 columns]" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 632 + }, + "id": "DTT1_SWX-btw", + "outputId": "b837035f-880e-4cd0-8345-ad2da779bb21" + }, + "source": [ + "rules = association_rules(result_desc , metric = \"confidence\" , min_threshold = 0.9)\n", + "rules = rules.sort_values(['confidence','lift'], ascending=[False , False])\n", + "rules" + ], + "execution_count": 10, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>antecedents</th>\n", + " <th>consequents</th>\n", + " <th>antecedent support</th>\n", + " <th>consequent support</th>\n", + " <th>support</th>\n", + " <th>confidence</th>\n", + " <th>lift</th>\n", + " <th>leverage</th>\n", + " <th>conviction</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>220</th>\n", + " <td>(srv_serror_rate, dst_host_serror_rate)</td>\n", + " <td>(serror_rate)</td>\n", + " <td>0.275264</td>\n", + " <td>0.286152</td>\n", + " <td>0.275264</td>\n", + " <td>1.000000</td>\n", + " <td>3.494649</td>\n", + " <td>0.196497</td>\n", + " <td>inf</td>\n", + " </tr>\n", + " <tr>\n", + " <th>250</th>\n", + " <td>(class, srv_serror_rate, dst_host_serror_rate)</td>\n", + " <td>(serror_rate)</td>\n", + " <td>0.274413</td>\n", + " <td>0.286152</td>\n", + " <td>0.274413</td>\n", + " <td>1.000000</td>\n", + " <td>3.494649</td>\n", + " <td>0.195889</td>\n", + " <td>inf</td>\n", + " </tr>\n", + " <tr>\n", + " <th>287</th>\n", + " <td>(dst_host_srv_serror_rate, srv_serror_rate, ds...</td>\n", + " <td>(serror_rate)</td>\n", + " <td>0.273959</td>\n", + " <td>0.286152</td>\n", + " <td>0.273959</td>\n", + " <td>1.000000</td>\n", + " <td>3.494649</td>\n", + " <td>0.195565</td>\n", + " <td>inf</td>\n", + " </tr>\n", + " <tr>\n", + " <th>339</th>\n", + " <td>(class, dst_host_srv_serror_rate, srv_serror_r...</td>\n", + " <td>(serror_rate)</td>\n", + " <td>0.273676</td>\n", + " <td>0.286152</td>\n", + " <td>0.273676</td>\n", + " <td>1.000000</td>\n", + " <td>3.494649</td>\n", + " <td>0.195363</td>\n", + " <td>inf</td>\n", + " </tr>\n", + " <tr>\n", + " <th>215</th>\n", + " <td>(dst_host_srv_serror_rate, serror_rate)</td>\n", + " <td>(srv_serror_rate)</td>\n", + " <td>0.275377</td>\n", + " <td>0.283600</td>\n", + " <td>0.275320</td>\n", + " <td>0.999794</td>\n", + " <td>3.525369</td>\n", + " <td>0.197223</td>\n", + " <td>3478.839061</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>86</th>\n", + " <td>(service, same_srv_rate, logged_in)</td>\n", + " <td>(dst_host_same_srv_rate, flag)</td>\n", + " <td>0.353351</td>\n", + " <td>0.470795</td>\n", + " <td>0.318759</td>\n", + " <td>0.902102</td>\n", + " <td>1.916125</td>\n", + " <td>0.152403</td>\n", + " <td>5.405698</td>\n", + " </tr>\n", + " <tr>\n", + " <th>197</th>\n", + " <td>(dst_host_same_srv_rate, flag, logged_in)</td>\n", + " <td>(service, dst_host_srv_count, same_srv_rate)</td>\n", + " <td>0.331802</td>\n", + " <td>0.344732</td>\n", + " <td>0.299195</td>\n", + " <td>0.901726</td>\n", + " <td>2.615733</td>\n", + " <td>0.184812</td>\n", + " <td>6.667782</td>\n", + " </tr>\n", + " <tr>\n", + " <th>75</th>\n", + " <td>(service, flag, logged_in)</td>\n", + " <td>(dst_host_same_srv_rate)</td>\n", + " <td>0.355450</td>\n", + " <td>0.498809</td>\n", + " <td>0.320347</td>\n", + " <td>0.901244</td>\n", + " <td>1.806792</td>\n", + " <td>0.143046</td>\n", + " <td>5.075064</td>\n", + " </tr>\n", + " <tr>\n", + " <th>152</th>\n", + " <td>(dst_host_same_srv_rate, logged_in)</td>\n", + " <td>(service, dst_host_srv_count, same_srv_rate)</td>\n", + " <td>0.334524</td>\n", + " <td>0.344732</td>\n", + " <td>0.301463</td>\n", + " <td>0.901170</td>\n", + " <td>2.614118</td>\n", + " <td>0.186142</td>\n", + " <td>6.630236</td>\n", + " </tr>\n", + " <tr>\n", + " <th>40</th>\n", + " <td>(logged_in)</td>\n", + " <td>(service, same_srv_rate)</td>\n", + " <td>0.392254</td>\n", + " <td>0.434388</td>\n", + " <td>0.353351</td>\n", + " <td>0.900824</td>\n", + " <td>2.073777</td>\n", + " <td>0.182961</td>\n", + " <td>5.703116</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>367 rows × 9 columns</p>\n", + "</div>" + ], + "text/plain": [ + " antecedents ... conviction\n", + "220 (srv_serror_rate, dst_host_serror_rate) ... inf\n", + "250 (class, srv_serror_rate, dst_host_serror_rate) ... inf\n", + "287 (dst_host_srv_serror_rate, srv_serror_rate, ds... ... inf\n", + "339 (class, dst_host_srv_serror_rate, srv_serror_r... ... inf\n", + "215 (dst_host_srv_serror_rate, serror_rate) ... 3478.839061\n", + ".. ... ... ...\n", + "86 (service, same_srv_rate, logged_in) ... 5.405698\n", + "197 (dst_host_same_srv_rate, flag, logged_in) ... 6.667782\n", + "75 (service, flag, logged_in) ... 5.075064\n", + "152 (dst_host_same_srv_rate, logged_in) ... 6.630236\n", + "40 (logged_in) ... 5.703116\n", + "\n", + "[367 rows x 9 columns]" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 659 + }, + "id": "vy-AH96DXMYb", + "outputId": "41be0e5a-fd05-4558-c0ca-f42de66d09a2" + }, + "source": [ + "rules_list = rules[rules['consequents'] == {\"class\"}]\n", + "rules_list" + ], + "execution_count": 11, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>antecedents</th>\n", + " <th>consequents</th>\n", + " <th>antecedent support</th>\n", + " <th>consequent support</th>\n", + " <th>support</th>\n", + " <th>confidence</th>\n", + " <th>lift</th>\n", + " <th>leverage</th>\n", + " <th>conviction</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>314</th>\n", + " <td>(dst_host_srv_serror_rate, serror_rate, dst_ho...</td>\n", + " <td>(class)</td>\n", + " <td>0.274016</td>\n", + " <td>0.466542</td>\n", + " <td>0.273733</td>\n", + " <td>0.998965</td>\n", + " <td>2.141212</td>\n", + " <td>0.145893</td>\n", + " <td>515.533900</td>\n", + " </tr>\n", + " <tr>\n", + " <th>327</th>\n", + " <td>(dst_host_srv_serror_rate, srv_serror_rate, ds...</td>\n", + " <td>(class)</td>\n", + " <td>0.273959</td>\n", + " <td>0.466542</td>\n", + " <td>0.273676</td>\n", + " <td>0.998965</td>\n", + " <td>2.141212</td>\n", + " <td>0.145862</td>\n", + " <td>515.427209</td>\n", + " </tr>\n", + " <tr>\n", + " <th>341</th>\n", + " <td>(serror_rate, dst_host_srv_serror_rate, srv_se...</td>\n", + " <td>(class)</td>\n", + " <td>0.273959</td>\n", + " <td>0.466542</td>\n", + " <td>0.273676</td>\n", + " <td>0.998965</td>\n", + " <td>2.141212</td>\n", + " <td>0.145862</td>\n", + " <td>515.427209</td>\n", + " </tr>\n", + " <tr>\n", + " <th>246</th>\n", + " <td>(srv_serror_rate, dst_host_serror_rate)</td>\n", + " <td>(class)</td>\n", + " <td>0.275264</td>\n", + " <td>0.466542</td>\n", + " <td>0.274413</td>\n", + " <td>0.996910</td>\n", + " <td>2.136806</td>\n", + " <td>0.145991</td>\n", + " <td>172.627039</td>\n", + " </tr>\n", + " <tr>\n", + " <th>251</th>\n", + " <td>(serror_rate, srv_serror_rate, dst_host_serror...</td>\n", + " <td>(class)</td>\n", + " <td>0.275264</td>\n", + " <td>0.466542</td>\n", + " <td>0.274413</td>\n", + " <td>0.996910</td>\n", + " <td>2.136806</td>\n", + " <td>0.145991</td>\n", + " <td>172.627039</td>\n", + " </tr>\n", + " <tr>\n", + " <th>236</th>\n", + " <td>(dst_host_srv_serror_rate, srv_serror_rate)</td>\n", + " <td>(class)</td>\n", + " <td>0.275604</td>\n", + " <td>0.466542</td>\n", + " <td>0.274583</td>\n", + " <td>0.996296</td>\n", + " <td>2.135492</td>\n", + " <td>0.146002</td>\n", + " <td>144.033685</td>\n", + " </tr>\n", + " <tr>\n", + " <th>264</th>\n", + " <td>(dst_host_srv_serror_rate, serror_rate)</td>\n", + " <td>(class)</td>\n", + " <td>0.275377</td>\n", + " <td>0.466542</td>\n", + " <td>0.274356</td>\n", + " <td>0.996293</td>\n", + " <td>2.135485</td>\n", + " <td>0.145881</td>\n", + " <td>143.915139</td>\n", + " </tr>\n", + " <tr>\n", + " <th>269</th>\n", + " <td>(serror_rate, dst_host_srv_serror_rate, srv_se...</td>\n", + " <td>(class)</td>\n", + " <td>0.275320</td>\n", + " <td>0.466542</td>\n", + " <td>0.274300</td>\n", + " <td>0.996292</td>\n", + " <td>2.135483</td>\n", + " <td>0.145851</td>\n", + " <td>143.885502</td>\n", + " </tr>\n", + " <tr>\n", + " <th>241</th>\n", + " <td>(serror_rate, dst_host_serror_rate)</td>\n", + " <td>(class)</td>\n", + " <td>0.276341</td>\n", + " <td>0.466542</td>\n", + " <td>0.274526</td>\n", + " <td>0.993433</td>\n", + " <td>2.129355</td>\n", + " <td>0.145602</td>\n", + " <td>81.235665</td>\n", + " </tr>\n", + " <tr>\n", + " <th>308</th>\n", + " <td>(dst_host_srv_serror_rate, dst_host_serror_rate)</td>\n", + " <td>(class)</td>\n", + " <td>0.276625</td>\n", + " <td>0.466542</td>\n", + " <td>0.273789</td>\n", + " <td>0.989750</td>\n", + " <td>2.121460</td>\n", + " <td>0.144732</td>\n", + " <td>52.044171</td>\n", + " </tr>\n", + " <tr>\n", + " <th>233</th>\n", + " <td>(dst_host_srv_serror_rate)</td>\n", + " <td>(class)</td>\n", + " <td>0.279233</td>\n", + " <td>0.466542</td>\n", + " <td>0.274697</td>\n", + " <td>0.983753</td>\n", + " <td>2.108606</td>\n", + " <td>0.144423</td>\n", + " <td>32.834346</td>\n", + " </tr>\n", + " <tr>\n", + " <th>228</th>\n", + " <td>(serror_rate, srv_serror_rate)</td>\n", + " <td>(class)</td>\n", + " <td>0.282352</td>\n", + " <td>0.466542</td>\n", + " <td>0.275150</td>\n", + " <td>0.974493</td>\n", + " <td>2.088757</td>\n", + " <td>0.143421</td>\n", + " <td>20.914077</td>\n", + " </tr>\n", + " <tr>\n", + " <th>210</th>\n", + " <td>(srv_serror_rate)</td>\n", + " <td>(class)</td>\n", + " <td>0.283600</td>\n", + " <td>0.466542</td>\n", + " <td>0.275434</td>\n", + " <td>0.971206</td>\n", + " <td>2.081712</td>\n", + " <td>0.143123</td>\n", + " <td>18.526555</td>\n", + " </tr>\n", + " <tr>\n", + " <th>207</th>\n", + " <td>(serror_rate)</td>\n", + " <td>(class)</td>\n", + " <td>0.286152</td>\n", + " <td>0.466542</td>\n", + " <td>0.276001</td>\n", + " <td>0.964526</td>\n", + " <td>2.067395</td>\n", + " <td>0.142499</td>\n", + " <td>15.038154</td>\n", + " </tr>\n", + " <tr>\n", + " <th>232</th>\n", + " <td>(dst_host_serror_rate)</td>\n", + " <td>(class)</td>\n", + " <td>0.285358</td>\n", + " <td>0.466542</td>\n", + " <td>0.274980</td>\n", + " <td>0.963633</td>\n", + " <td>2.065480</td>\n", + " <td>0.141849</td>\n", + " <td>14.668640</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " antecedents ... conviction\n", + "314 (dst_host_srv_serror_rate, serror_rate, dst_ho... ... 515.533900\n", + "327 (dst_host_srv_serror_rate, srv_serror_rate, ds... ... 515.427209\n", + "341 (serror_rate, dst_host_srv_serror_rate, srv_se... ... 515.427209\n", + "246 (srv_serror_rate, dst_host_serror_rate) ... 172.627039\n", + "251 (serror_rate, srv_serror_rate, dst_host_serror... ... 172.627039\n", + "236 (dst_host_srv_serror_rate, srv_serror_rate) ... 144.033685\n", + "264 (dst_host_srv_serror_rate, serror_rate) ... 143.915139\n", + "269 (serror_rate, dst_host_srv_serror_rate, srv_se... ... 143.885502\n", + "241 (serror_rate, dst_host_serror_rate) ... 81.235665\n", + "308 (dst_host_srv_serror_rate, dst_host_serror_rate) ... 52.044171\n", + "233 (dst_host_srv_serror_rate) ... 32.834346\n", + "228 (serror_rate, srv_serror_rate) ... 20.914077\n", + "210 (srv_serror_rate) ... 18.526555\n", + "207 (serror_rate) ... 15.038154\n", + "232 (dst_host_serror_rate) ... 14.668640\n", + "\n", + "[15 rows x 9 columns]" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mmAOlrta8P1p", + "outputId": "87228663-6c09-4a17-ad78-ce1072472707" + }, + "source": [ + "col = rules_list['antecedents']\n", + "col" + ], + "execution_count": 12, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "314 (dst_host_srv_serror_rate, serror_rate, dst_ho...\n", + "327 (dst_host_srv_serror_rate, srv_serror_rate, ds...\n", + "341 (serror_rate, dst_host_srv_serror_rate, srv_se...\n", + "246 (srv_serror_rate, dst_host_serror_rate)\n", + "251 (serror_rate, srv_serror_rate, dst_host_serror...\n", + "236 (dst_host_srv_serror_rate, srv_serror_rate)\n", + "264 (dst_host_srv_serror_rate, serror_rate)\n", + "269 (serror_rate, dst_host_srv_serror_rate, srv_se...\n", + "241 (serror_rate, dst_host_serror_rate)\n", + "308 (dst_host_srv_serror_rate, dst_host_serror_rate)\n", + "233 (dst_host_srv_serror_rate)\n", + "228 (serror_rate, srv_serror_rate)\n", + "210 (srv_serror_rate)\n", + "207 (serror_rate)\n", + "232 (dst_host_serror_rate)\n", + "Name: antecedents, dtype: object" + ] + }, + "metadata": {}, + "execution_count": 12 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "jmbME98E91xf" + }, + "source": [ + "col.to_csv('./col_list.csv')" + ], + "execution_count": 13, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 383 + }, + "id": "9jVN92COXRsJ", + "outputId": "d1dad66e-914a-4386-a477-97ea3e348c55" + }, + "source": [ + "test = pd.DataFrame(X_test, columns=data.drop(columns = [\"index_num\"]).columns)\n", + "test.head(n=10)" + ], + "execution_count": 14, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>duration</th>\n", + " <th>protocol_type</th>\n", + " <th>service</th>\n", + " <th>flag</th>\n", + " <th>src_bytes</th>\n", + " <th>dst_bytes</th>\n", + " <th>land</th>\n", + " <th>wrong_fragment</th>\n", + " <th>hot</th>\n", + " <th>num_failed_logins</th>\n", + " <th>logged_in</th>\n", + " <th>num_compromised</th>\n", + " <th>root_shell</th>\n", + " <th>su_attempted</th>\n", + " <th>num_root</th>\n", + " <th>num_file_creations</th>\n", + " <th>num_shells</th>\n", + " <th>num_access_files</th>\n", + " <th>is_guest_login</th>\n", + " <th>count</th>\n", + " <th>srv_count</th>\n", + " <th>serror_rate</th>\n", + " <th>srv_serror_rate</th>\n", + " <th>rerror_rate</th>\n", + " <th>srv_rerror_rate</th>\n", + " <th>same_srv_rate</th>\n", + " <th>diff_srv_rate</th>\n", + " <th>srv_diff_host_rate</th>\n", + " <th>dst_host_count</th>\n", + " <th>dst_host_srv_count</th>\n", + " <th>dst_host_same_srv_rate</th>\n", + " <th>dst_host_diff_srv_rate</th>\n", + " <th>dst_host_same_src_port_rate</th>\n", + " <th>dst_host_srv_diff_host_rate</th>\n", + " <th>dst_host_serror_rate</th>\n", + " <th>dst_host_srv_serror_rate</th>\n", + " <th>dst_host_rerror_rate</th>\n", + " <th>dst_host_srv_rerror_rate</th>\n", + " <th>class</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>19064</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>11127</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6517</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2973</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>13339</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>19289</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2166</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5548</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10887</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2222</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " duration protocol_type ... dst_host_srv_rerror_rate class\n", + "19064 0 1 ... 0 1\n", + "11127 0 0 ... 1 0\n", + "6517 0 0 ... 0 1\n", + "2973 0 1 ... 0 0\n", + "13339 0 0 ... 0 1\n", + "19289 0 0 ... 0 0\n", + "2166 0 0 ... 0 0\n", + "5548 0 0 ... 0 0\n", + "10887 0 0 ... 0 0\n", + "2222 0 0 ... 0 1\n", + "\n", + "[10 rows x 39 columns]" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "hBLP3xt-ulXS" + }, + "source": [ + "col = ['dst_host_srv_serror_rate', 'srv_serror_rate', 'serror_rate', 'dst_host_serror_rate']" + ], + "execution_count": 15, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vCNJIYoeD1sO", + "outputId": "5042860c-02d6-46fb-fc23-97f8327f70ef" + }, + "source": [ + "idx_b = test[(test['dst_host_srv_serror_rate'] == 0) | (test['srv_serror_rate'] == 0) | (test['serror_rate'] == 0) | (test['dst_host_serror_rate'] == 0)].index\n", + "test_df = test.drop(idx_b)\n", + "\n", + "idx_class = test_df[test_df['class'] == 0 ].index\n", + "test_err = test_df.drop(idx_class)\n", + "\n", + "print(test_df.shape)\n", + "print(test_err.shape)" + ], + "execution_count": 17, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(2121, 39)\n", + "(2120, 39)\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "RWB6lAq0_GWg", + "outputId": "ce73c2e8-22d1-4977-c766-6f074d2945e8" + }, + "source": [ + "idx_a = test[(test['dst_host_srv_serror_rate'] == 1) & (test['srv_serror_rate'] == 1) & (test['serror_rate'] == 1) & (test['dst_host_serror_rate'] == 1)].index\n", + "test_df = test.drop(idx_a)\n", + "\n", + "idx_class = test_df[test_df['class'] == 1 ].index\n", + "test_err = test_df.drop(idx_class)\n", + "\n", + "print(test_df.shape)\n", + "print(test_err.shape)" + ], + "execution_count": 27, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(5437, 39)\n", + "(4041, 39)\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sK2Zm5_5ncb_" + }, + "source": [ + "\n", + "\n", + " | Prediction of Attack | Prediction of Non-Attack\n", + "---\n", + " Attack | True Positive : 2020 | False Negative : 1396\n", + "\n", + "\n", + "---\n", + " Non-Attack | False Positive : 1 | True Negative : 4041\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VdYEcIpkHm_R" + }, + "source": [ + "**Apriori Test**\n", + "\n", + "\n", + "\n", + "* Accuracy = 80.19%\n", + "* Precision = 99.95%\n", + "* Recall = 59.13%\n", + "* Fallout = 0.02%\n", + "* F-score = 74.3" + ] + } + ] +} \ No newline at end of file