diff --git a/experiment/a-priori/[DM]APriori.ipynb b/experiment/a-priori/[DM]APriori.ipynb deleted file mode 100644 index b6409e13b833b4cac4d43a8266828aee6916edec..0000000000000000000000000000000000000000 --- a/experiment/a-priori/[DM]APriori.ipynb +++ /dev/null @@ -1,2847 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "[DM]Apriori.ipynb", - "provenance": [], - "collapsed_sections": [], - "authorship_tag": "ABX9TyPQv9I66rslo5RN/uXRNX/R", - "include_colab_link": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "view-in-github", - "colab_type": "text" - }, - "source": [ - "<a href=\"https://colab.research.google.com/github/lani009/IDS-DataMining/blob/main/%5BDM%5DApriori.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "HAY_lKeo6NUE" - }, - "source": [ - "import os\n", - "import pandas as pd\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "import time" - ], - "execution_count": 1, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "J02wdPhK76Yc", - "outputId": "8cf59c87-16dc-40c8-bcfa-a2b635986d1f" - }, - "source": [ - "data = pd.read_csv('DM_data.csv')\n", - "data.info()" - ], - "execution_count": 2, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "<class 'pandas.core.frame.DataFrame'>\n", - "RangeIndex: 25192 entries, 0 to 25191\n", - "Data columns (total 40 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 duration 25192 non-null int64 \n", - " 1 protocol_type 25192 non-null int64 \n", - " 2 service 25192 non-null int64 \n", - " 3 flag 25192 non-null int64 \n", - " 4 src_bytes 25192 non-null int64 \n", - " 5 dst_bytes 25192 non-null int64 \n", - " 6 land 25192 non-null int64 \n", - " 7 wrong_fragment 25192 non-null int64 \n", - " 8 hot 25192 non-null int64 \n", - " 9 num_failed_logins 25192 non-null int64 \n", - " 10 logged_in 25192 non-null int64 \n", - " 11 num_compromised 25192 non-null int64 \n", - " 12 root_shell 25192 non-null int64 \n", - " 13 su_attempted 25192 non-null int64 \n", - " 14 num_root 25192 non-null int64 \n", - " 15 num_file_creations 25192 non-null int64 \n", - " 16 num_shells 25192 non-null int64 \n", - " 17 num_access_files 25192 non-null int64 \n", - " 18 is_guest_login 25192 non-null int64 \n", - " 19 count 25192 non-null int64 \n", - " 20 srv_count 25192 non-null int64 \n", - " 21 serror_rate 25192 non-null float64\n", - " 22 srv_serror_rate 25192 non-null float64\n", - " 23 rerror_rate 25192 non-null float64\n", - " 24 srv_rerror_rate 25192 non-null float64\n", - " 25 same_srv_rate 25192 non-null float64\n", - " 26 diff_srv_rate 25192 non-null float64\n", - " 27 srv_diff_host_rate 25192 non-null float64\n", - " 28 dst_host_count 25192 non-null int64 \n", - " 29 dst_host_srv_count 25192 non-null int64 \n", - " 30 dst_host_same_srv_rate 25192 non-null float64\n", - " 31 dst_host_diff_srv_rate 25192 non-null float64\n", - " 32 dst_host_same_src_port_rate 25192 non-null float64\n", - " 33 dst_host_srv_diff_host_rate 25192 non-null float64\n", - " 34 dst_host_serror_rate 25192 non-null float64\n", - " 35 dst_host_srv_serror_rate 25192 non-null float64\n", - " 36 dst_host_rerror_rate 25192 non-null float64\n", - " 37 dst_host_srv_rerror_rate 25192 non-null float64\n", - " 38 class 25192 non-null int64 \n", - " 39 index_num 25192 non-null int64 \n", - "dtypes: float64(15), int64(25)\n", - "memory usage: 7.7 MB\n" - ] - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "phplztW08CAV" - }, - "source": [ - "from sklearn.model_selection import train_test_split\n", - "from sklearn.preprocessing import MinMaxScaler, StandardScaler" - ], - "execution_count": 3, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "rWLrmiHs86KH" - }, - "source": [ - "from mlxtend.frequent_patterns import apriori,association_rules" - ], - "execution_count": 4, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 383 - }, - "id": "KpeVfpxYTAHF", - "outputId": "209f825b-169a-44c0-a3da-ca8b57b5438a" - }, - "source": [ - "sc = StandardScaler() \n", - "sc_data = sc.fit_transform(data)\n", - "\n", - "sc_df = pd.DataFrame(sc_data, columns=data.columns)\n", - "sc_df.head(n=10)\n", - "\n", - "#StandardScaler로 data scaling" - ], - "execution_count": 5, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>duration</th>\n", - " <th>protocol_type</th>\n", - " <th>service</th>\n", - " <th>flag</th>\n", - " <th>src_bytes</th>\n", - " <th>dst_bytes</th>\n", - " <th>land</th>\n", - " <th>wrong_fragment</th>\n", - " <th>hot</th>\n", - " <th>num_failed_logins</th>\n", - " <th>logged_in</th>\n", - " <th>num_compromised</th>\n", - " <th>root_shell</th>\n", - " <th>su_attempted</th>\n", - " <th>num_root</th>\n", - " <th>num_file_creations</th>\n", - " <th>num_shells</th>\n", - " <th>num_access_files</th>\n", - " <th>is_guest_login</th>\n", - " <th>count</th>\n", - " <th>srv_count</th>\n", - " <th>serror_rate</th>\n", - " <th>srv_serror_rate</th>\n", - " <th>rerror_rate</th>\n", - " <th>srv_rerror_rate</th>\n", - " <th>same_srv_rate</th>\n", - " <th>diff_srv_rate</th>\n", - " <th>srv_diff_host_rate</th>\n", - " <th>dst_host_count</th>\n", - " <th>dst_host_srv_count</th>\n", - " <th>dst_host_same_srv_rate</th>\n", - " <th>dst_host_diff_srv_rate</th>\n", - " <th>dst_host_same_src_port_rate</th>\n", - " <th>dst_host_srv_diff_host_rate</th>\n", - " <th>dst_host_serror_rate</th>\n", - " <th>dst_host_srv_serror_rate</th>\n", - " <th>dst_host_rerror_rate</th>\n", - " <th>dst_host_srv_rerror_rate</th>\n", - " <th>class</th>\n", - " <th>index_num</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>-0.113551</td>\n", - " <td>-0.444009</td>\n", - " <td>-1.399448</td>\n", - " <td>0.744553</td>\n", - " <td>-0.009889</td>\n", - " <td>-0.039310</td>\n", - " <td>-0.00891</td>\n", - " <td>-0.091223</td>\n", - " <td>-0.091933</td>\n", - " <td>-0.02622</td>\n", - " <td>-0.807626</td>\n", - " <td>-0.021873</td>\n", - " <td>-0.039377</td>\n", - " <td>-0.027665</td>\n", - " <td>-0.021724</td>\n", - " <td>-0.027808</td>\n", - " <td>-0.018905</td>\n", - " <td>-0.043917</td>\n", - " <td>-0.09599</td>\n", - " <td>-0.720244</td>\n", - " <td>-0.354628</td>\n", - " <td>-0.640142</td>\n", - " <td>-0.633978</td>\n", - " <td>-0.372186</td>\n", - " <td>-0.373098</td>\n", - " <td>0.772109</td>\n", - " <td>-0.349282</td>\n", - " <td>-0.373886</td>\n", - " <td>-0.328634</td>\n", - " <td>-0.813985</td>\n", - " <td>-0.779157</td>\n", - " <td>-0.280673</td>\n", - " <td>0.073120</td>\n", - " <td>-0.287993</td>\n", - " <td>-0.641804</td>\n", - " <td>-0.627365</td>\n", - " <td>-0.221668</td>\n", - " <td>-0.374281</td>\n", - " <td>-0.934425</td>\n", - " <td>-1.731982</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>-0.113551</td>\n", - " <td>1.325565</td>\n", - " <td>0.780883</td>\n", - " <td>0.744553</td>\n", - " <td>-0.010032</td>\n", - " <td>-0.039310</td>\n", - " <td>-0.00891</td>\n", - " <td>-0.091223</td>\n", - " <td>-0.091933</td>\n", - " <td>-0.02622</td>\n", - " <td>-0.807626</td>\n", - " <td>-0.021873</td>\n", - " <td>-0.039377</td>\n", - " <td>-0.027665</td>\n", - " <td>-0.021724</td>\n", - " <td>-0.027808</td>\n", - " <td>-0.018905</td>\n", - " <td>-0.043917</td>\n", - " <td>-0.09599</td>\n", - " <td>-0.624317</td>\n", - " <td>-0.368427</td>\n", - " <td>-0.640142</td>\n", - " <td>-0.633978</td>\n", - " <td>-0.372186</td>\n", - " <td>-0.373098</td>\n", - " <td>-1.320567</td>\n", - " <td>0.490836</td>\n", - " <td>-0.373886</td>\n", - " <td>0.732059</td>\n", - " <td>-1.030895</td>\n", - " <td>-1.157831</td>\n", - " <td>2.764403</td>\n", - " <td>2.375620</td>\n", - " <td>-0.287993</td>\n", - " <td>-0.641804</td>\n", - " <td>-0.627365</td>\n", - " <td>-0.385140</td>\n", - " <td>-0.374281</td>\n", - " <td>-0.934425</td>\n", - " <td>-1.731845</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>-0.113551</td>\n", - " <td>-0.444009</td>\n", - " <td>-1.377199</td>\n", - " <td>-0.917300</td>\n", - " <td>-0.010093</td>\n", - " <td>-0.039310</td>\n", - " <td>-0.00891</td>\n", - " <td>-0.091223</td>\n", - " <td>-0.091933</td>\n", - " <td>-0.02622</td>\n", - " <td>-0.807626</td>\n", - " <td>-0.021873</td>\n", - " <td>-0.039377</td>\n", - " <td>-0.027665</td>\n", - " <td>-0.021724</td>\n", - " <td>-0.027808</td>\n", - " <td>-0.018905</td>\n", - " <td>-0.043917</td>\n", - " <td>-0.09599</td>\n", - " <td>0.334947</td>\n", - " <td>-0.299430</td>\n", - " <td>1.595477</td>\n", - " <td>1.600209</td>\n", - " <td>-0.372186</td>\n", - " <td>-0.373098</td>\n", - " <td>-1.388806</td>\n", - " <td>0.042773</td>\n", - " <td>-0.373886</td>\n", - " <td>0.732059</td>\n", - " <td>-0.804947</td>\n", - " <td>-0.935081</td>\n", - " <td>-0.173828</td>\n", - " <td>-0.478183</td>\n", - " <td>-0.287993</td>\n", - " <td>1.603834</td>\n", - " <td>1.614454</td>\n", - " <td>-0.385140</td>\n", - " <td>-0.374281</td>\n", - " <td>1.070177</td>\n", - " <td>-1.731707</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>-0.113551</td>\n", - " <td>-0.444009</td>\n", - " <td>0.780883</td>\n", - " <td>0.744553</td>\n", - " <td>-0.009996</td>\n", - " <td>0.052473</td>\n", - " <td>-0.00891</td>\n", - " <td>-0.091223</td>\n", - " <td>-0.091933</td>\n", - " <td>-0.02622</td>\n", - " <td>1.238197</td>\n", - " <td>-0.021873</td>\n", - " <td>-0.039377</td>\n", - " <td>-0.027665</td>\n", - " <td>-0.021724</td>\n", - " <td>-0.027808</td>\n", - " <td>-0.018905</td>\n", - " <td>-0.043917</td>\n", - " <td>-0.09599</td>\n", - " <td>-0.694082</td>\n", - " <td>-0.313230</td>\n", - " <td>-0.193018</td>\n", - " <td>-0.187141</td>\n", - " <td>-0.372186</td>\n", - " <td>-0.373098</td>\n", - " <td>0.772109</td>\n", - " <td>-0.349282</td>\n", - " <td>-0.373886</td>\n", - " <td>-1.540854</td>\n", - " <td>1.264742</td>\n", - " <td>1.069663</td>\n", - " <td>-0.440940</td>\n", - " <td>-0.380894</td>\n", - " <td>0.073759</td>\n", - " <td>-0.574435</td>\n", - " <td>-0.604947</td>\n", - " <td>-0.385140</td>\n", - " <td>-0.342768</td>\n", - " <td>-0.934425</td>\n", - " <td>-1.731570</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>-0.113551</td>\n", - " <td>-0.444009</td>\n", - " <td>0.780883</td>\n", - " <td>0.744553</td>\n", - " <td>-0.010010</td>\n", - " <td>-0.034582</td>\n", - " <td>-0.00891</td>\n", - " <td>-0.091223</td>\n", - " <td>-0.091933</td>\n", - " <td>-0.02622</td>\n", - " <td>1.238197</td>\n", - " <td>-0.021873</td>\n", - " <td>-0.039377</td>\n", - " <td>-0.027665</td>\n", - " <td>-0.021724</td>\n", - " <td>-0.027808</td>\n", - " <td>-0.018905</td>\n", - " <td>-0.043917</td>\n", - " <td>-0.09599</td>\n", - " <td>-0.476067</td>\n", - " <td>0.059355</td>\n", - " <td>-0.640142</td>\n", - " <td>-0.633978</td>\n", - " <td>-0.372186</td>\n", - " <td>-0.373098</td>\n", - " <td>0.772109</td>\n", - " <td>-0.349282</td>\n", - " <td>-0.023115</td>\n", - " <td>0.732059</td>\n", - " <td>1.264742</td>\n", - " <td>1.069663</td>\n", - " <td>-0.440940</td>\n", - " <td>-0.478183</td>\n", - " <td>-0.287993</td>\n", - " <td>-0.641804</td>\n", - " <td>-0.627365</td>\n", - " <td>-0.385140</td>\n", - " <td>-0.374281</td>\n", - " <td>-0.934425</td>\n", - " <td>-1.731432</td>\n", - " </tr>\n", - " <tr>\n", - " <th>5</th>\n", - " <td>-0.113551</td>\n", - " <td>-0.444009</td>\n", - " <td>-1.377199</td>\n", - " <td>-2.025203</td>\n", - " <td>-0.010093</td>\n", - " <td>-0.039310</td>\n", - " <td>-0.00891</td>\n", - " <td>-0.091223</td>\n", - " <td>-0.091933</td>\n", - " <td>-0.02622</td>\n", - " <td>-0.807626</td>\n", - " <td>-0.021873</td>\n", - " <td>-0.039377</td>\n", - " <td>-0.027665</td>\n", - " <td>-0.021724</td>\n", - " <td>-0.027808</td>\n", - " <td>-0.018905</td>\n", - " <td>-0.043917</td>\n", - " <td>-0.09599</td>\n", - " <td>0.317506</td>\n", - " <td>-0.120038</td>\n", - " <td>-0.640142</td>\n", - " <td>-0.633978</td>\n", - " <td>2.765176</td>\n", - " <td>2.729322</td>\n", - " <td>-1.138595</td>\n", - " <td>-0.013235</td>\n", - " <td>-0.373886</td>\n", - " <td>0.732059</td>\n", - " <td>-0.868212</td>\n", - " <td>-1.001906</td>\n", - " <td>-0.066984</td>\n", - " <td>-0.478183</td>\n", - " <td>-0.287993</td>\n", - " <td>-0.641804</td>\n", - " <td>-0.627365</td>\n", - " <td>2.884296</td>\n", - " <td>2.777041</td>\n", - " <td>1.070177</td>\n", - " <td>-1.731295</td>\n", - " </tr>\n", - " <tr>\n", - " <th>6</th>\n", - " <td>-0.113551</td>\n", - " <td>-0.444009</td>\n", - " <td>-1.377199</td>\n", - " <td>-0.917300</td>\n", - " <td>-0.010093</td>\n", - " <td>-0.039310</td>\n", - " <td>-0.00891</td>\n", - " <td>-0.091223</td>\n", - " <td>-0.091933</td>\n", - " <td>-0.02622</td>\n", - " <td>-0.807626</td>\n", - " <td>-0.021873</td>\n", - " <td>-0.039377</td>\n", - " <td>-0.027665</td>\n", - " <td>-0.021724</td>\n", - " <td>-0.027808</td>\n", - " <td>-0.018905</td>\n", - " <td>-0.043917</td>\n", - " <td>-0.09599</td>\n", - " <td>0.709933</td>\n", - " <td>-0.258032</td>\n", - " <td>1.595477</td>\n", - " <td>1.600209</td>\n", - " <td>-0.372186</td>\n", - " <td>-0.373098</td>\n", - " <td>-1.388806</td>\n", - " <td>-0.013235</td>\n", - " <td>-0.373886</td>\n", - " <td>0.732059</td>\n", - " <td>-0.958592</td>\n", - " <td>-1.068731</td>\n", - " <td>-0.173828</td>\n", - " <td>-0.478183</td>\n", - " <td>-0.287993</td>\n", - " <td>1.603834</td>\n", - " <td>1.614454</td>\n", - " <td>-0.385140</td>\n", - " <td>-0.374281</td>\n", - " <td>1.070177</td>\n", - " <td>-1.731157</td>\n", - " </tr>\n", - " <tr>\n", - " <th>7</th>\n", - " <td>-0.113551</td>\n", - " <td>-0.444009</td>\n", - " <td>-1.377199</td>\n", - " <td>-0.917300</td>\n", - " <td>-0.010093</td>\n", - " <td>-0.039310</td>\n", - " <td>-0.00891</td>\n", - " <td>-0.091223</td>\n", - " <td>-0.091933</td>\n", - " <td>-0.02622</td>\n", - " <td>-0.807626</td>\n", - " <td>-0.021873</td>\n", - " <td>-0.039377</td>\n", - " <td>-0.027665</td>\n", - " <td>-0.021724</td>\n", - " <td>-0.027808</td>\n", - " <td>-0.018905</td>\n", - " <td>-0.043917</td>\n", - " <td>-0.09599</td>\n", - " <td>0.282624</td>\n", - " <td>-0.161436</td>\n", - " <td>1.595477</td>\n", - " <td>1.600209</td>\n", - " <td>-0.372186</td>\n", - " <td>-0.373098</td>\n", - " <td>-1.184088</td>\n", - " <td>-0.013235</td>\n", - " <td>-0.373886</td>\n", - " <td>0.732059</td>\n", - " <td>-0.904364</td>\n", - " <td>-1.024181</td>\n", - " <td>-0.066984</td>\n", - " <td>-0.478183</td>\n", - " <td>-0.287993</td>\n", - " <td>1.603834</td>\n", - " <td>1.614454</td>\n", - " <td>-0.385140</td>\n", - " <td>-0.374281</td>\n", - " <td>1.070177</td>\n", - " <td>-1.731019</td>\n", - " </tr>\n", - " <tr>\n", - " <th>8</th>\n", - " <td>-0.113551</td>\n", - " <td>-0.444009</td>\n", - " <td>0.780883</td>\n", - " <td>-0.917300</td>\n", - " <td>-0.010093</td>\n", - " <td>-0.039310</td>\n", - " <td>-0.00891</td>\n", - " <td>-0.091223</td>\n", - " <td>-0.091933</td>\n", - " <td>-0.02622</td>\n", - " <td>-0.807626</td>\n", - " <td>-0.021873</td>\n", - " <td>-0.039377</td>\n", - " <td>-0.027665</td>\n", - " <td>-0.021724</td>\n", - " <td>-0.027808</td>\n", - " <td>-0.018905</td>\n", - " <td>-0.043917</td>\n", - " <td>-0.09599</td>\n", - " <td>1.616874</td>\n", - " <td>-0.064840</td>\n", - " <td>1.595477</td>\n", - " <td>1.600209</td>\n", - " <td>-0.372186</td>\n", - " <td>-0.373098</td>\n", - " <td>-1.297820</td>\n", - " <td>-0.069243</td>\n", - " <td>-0.373886</td>\n", - " <td>0.732059</td>\n", - " <td>-0.832060</td>\n", - " <td>-0.957356</td>\n", - " <td>-0.173828</td>\n", - " <td>-0.478183</td>\n", - " <td>-0.287993</td>\n", - " <td>1.603834</td>\n", - " <td>1.614454</td>\n", - " <td>-0.385140</td>\n", - " <td>-0.374281</td>\n", - " <td>1.070177</td>\n", - " <td>-1.730882</td>\n", - " </tr>\n", - " <tr>\n", - " <th>9</th>\n", - " <td>-0.113551</td>\n", - " <td>-0.444009</td>\n", - " <td>-1.377199</td>\n", - " <td>-0.917300</td>\n", - " <td>-0.010093</td>\n", - " <td>-0.039310</td>\n", - " <td>-0.00891</td>\n", - " <td>-0.091223</td>\n", - " <td>-0.091933</td>\n", - " <td>-0.02622</td>\n", - " <td>-0.807626</td>\n", - " <td>-0.021873</td>\n", - " <td>-0.039377</td>\n", - " <td>-0.027665</td>\n", - " <td>-0.021724</td>\n", - " <td>-0.027808</td>\n", - " <td>-0.018905</td>\n", - " <td>-0.043917</td>\n", - " <td>-0.09599</td>\n", - " <td>0.422153</td>\n", - " <td>-0.271831</td>\n", - " <td>1.595477</td>\n", - " <td>1.600209</td>\n", - " <td>-0.372186</td>\n", - " <td>-0.373098</td>\n", - " <td>-1.366060</td>\n", - " <td>-0.013235</td>\n", - " <td>-0.373886</td>\n", - " <td>0.732059</td>\n", - " <td>-0.922440</td>\n", - " <td>-1.046456</td>\n", - " <td>-0.120406</td>\n", - " <td>-0.478183</td>\n", - " <td>-0.287993</td>\n", - " <td>1.603834</td>\n", - " <td>1.614454</td>\n", - " <td>-0.385140</td>\n", - " <td>-0.374281</td>\n", - " <td>1.070177</td>\n", - " <td>-1.730744</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " duration protocol_type ... class index_num\n", - "0 -0.113551 -0.444009 ... -0.934425 -1.731982\n", - "1 -0.113551 1.325565 ... -0.934425 -1.731845\n", - "2 -0.113551 -0.444009 ... 1.070177 -1.731707\n", - "3 -0.113551 -0.444009 ... -0.934425 -1.731570\n", - "4 -0.113551 -0.444009 ... -0.934425 -1.731432\n", - "5 -0.113551 -0.444009 ... 1.070177 -1.731295\n", - "6 -0.113551 -0.444009 ... 1.070177 -1.731157\n", - "7 -0.113551 -0.444009 ... 1.070177 -1.731019\n", - "8 -0.113551 -0.444009 ... 1.070177 -1.730882\n", - "9 -0.113551 -0.444009 ... 1.070177 -1.730744\n", - "\n", - "[10 rows x 40 columns]" - ] - }, - "metadata": {}, - "execution_count": 5 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 383 - }, - "id": "LKK6fIznTzpy", - "outputId": "09aac2ea-3311-4583-ca47-bb6a2e85be99" - }, - "source": [ - "def encode_units(x):\n", - " if x <= 0 :\n", - " return 0\n", - " if x > 0 :\n", - " return 1\n", - "\n", - "train_df = sc_df.applymap(encode_units)\n", - "\n", - "train_df.head(n=10)\n", - "\n", - "#classification을 위해 scaling 시킨 data들을 음수면 0, 양수면 1로 encoding" - ], - "execution_count": 6, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>duration</th>\n", - " <th>protocol_type</th>\n", - " <th>service</th>\n", - " <th>flag</th>\n", - " <th>src_bytes</th>\n", - " <th>dst_bytes</th>\n", - " <th>land</th>\n", - " <th>wrong_fragment</th>\n", - " <th>hot</th>\n", - " <th>num_failed_logins</th>\n", - " <th>logged_in</th>\n", - " <th>num_compromised</th>\n", - " <th>root_shell</th>\n", - " <th>su_attempted</th>\n", - " <th>num_root</th>\n", - " <th>num_file_creations</th>\n", - " <th>num_shells</th>\n", - " <th>num_access_files</th>\n", - " <th>is_guest_login</th>\n", - " <th>count</th>\n", - " <th>srv_count</th>\n", - " <th>serror_rate</th>\n", - " <th>srv_serror_rate</th>\n", - " <th>rerror_rate</th>\n", - " <th>srv_rerror_rate</th>\n", - " <th>same_srv_rate</th>\n", - " <th>diff_srv_rate</th>\n", - " <th>srv_diff_host_rate</th>\n", - " <th>dst_host_count</th>\n", - " <th>dst_host_srv_count</th>\n", - " <th>dst_host_same_srv_rate</th>\n", - " <th>dst_host_diff_srv_rate</th>\n", - " <th>dst_host_same_src_port_rate</th>\n", - " <th>dst_host_srv_diff_host_rate</th>\n", - " <th>dst_host_serror_rate</th>\n", - " <th>dst_host_srv_serror_rate</th>\n", - " <th>dst_host_rerror_rate</th>\n", - " <th>dst_host_srv_rerror_rate</th>\n", - " <th>class</th>\n", - " <th>index_num</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>5</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>6</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>7</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>8</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>9</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " duration protocol_type service ... dst_host_srv_rerror_rate class index_num\n", - "0 0 0 0 ... 0 0 0\n", - "1 0 1 1 ... 0 0 0\n", - "2 0 0 0 ... 0 1 0\n", - "3 0 0 1 ... 0 0 0\n", - "4 0 0 1 ... 0 0 0\n", - "5 0 0 0 ... 1 1 0\n", - "6 0 0 0 ... 0 1 0\n", - "7 0 0 0 ... 0 1 0\n", - "8 0 0 1 ... 0 1 0\n", - "9 0 0 0 ... 0 1 0\n", - "\n", - "[10 rows x 40 columns]" - ] - }, - "metadata": {}, - "execution_count": 6 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "JmfJO9mn9_Te", - "outputId": "2e19f283-3632-44c6-e9d1-e0b090e41721" - }, - "source": [ - "data_X = train_df.drop(columns = [\"index_num\"])\n", - "\n", - "X_train, X_test = train_test_split(data_X, test_size=0.3, random_state=42)\n", - "print(X_train.shape, X_test.shape)\n", - "\n", - "#train data와 test data를 7:3 의 비율로 split" - ], - "execution_count": 7, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "(17634, 39) (7558, 39)\n" - ] - } - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 226 - }, - "id": "FAqOwB0oVeAK", - "outputId": "d61b213d-ba8d-4aee-d0af-b1f69ec5c903" - }, - "source": [ - "df = pd.DataFrame(X_train, columns=data.drop(columns = [\"index_num\"]).columns)\n", - "\n", - "df.head()" - ], - "execution_count": 8, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>duration</th>\n", - " <th>protocol_type</th>\n", - " <th>service</th>\n", - " <th>flag</th>\n", - " <th>src_bytes</th>\n", - " <th>dst_bytes</th>\n", - " <th>land</th>\n", - " <th>wrong_fragment</th>\n", - " <th>hot</th>\n", - " <th>num_failed_logins</th>\n", - " <th>logged_in</th>\n", - " <th>num_compromised</th>\n", - " <th>root_shell</th>\n", - " <th>su_attempted</th>\n", - " <th>num_root</th>\n", - " <th>num_file_creations</th>\n", - " <th>num_shells</th>\n", - " <th>num_access_files</th>\n", - " <th>is_guest_login</th>\n", - " <th>count</th>\n", - " <th>srv_count</th>\n", - " <th>serror_rate</th>\n", - " <th>srv_serror_rate</th>\n", - " <th>rerror_rate</th>\n", - " <th>srv_rerror_rate</th>\n", - " <th>same_srv_rate</th>\n", - " <th>diff_srv_rate</th>\n", - " <th>srv_diff_host_rate</th>\n", - " <th>dst_host_count</th>\n", - " <th>dst_host_srv_count</th>\n", - " <th>dst_host_same_srv_rate</th>\n", - " <th>dst_host_diff_srv_rate</th>\n", - " <th>dst_host_same_src_port_rate</th>\n", - " <th>dst_host_srv_diff_host_rate</th>\n", - " <th>dst_host_serror_rate</th>\n", - " <th>dst_host_srv_serror_rate</th>\n", - " <th>dst_host_rerror_rate</th>\n", - " <th>dst_host_srv_rerror_rate</th>\n", - " <th>class</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>741</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>411</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>17841</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>20962</th>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>17790</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " duration protocol_type ... dst_host_srv_rerror_rate class\n", - "741 0 0 ... 0 1\n", - "411 0 0 ... 1 1\n", - "17841 0 0 ... 1 1\n", - "20962 0 1 ... 0 1\n", - "17790 0 0 ... 0 1\n", - "\n", - "[5 rows x 39 columns]" - ] - }, - "metadata": {}, - "execution_count": 8 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 424 - }, - "id": "71bCJO3_-Nrz", - "outputId": "036db7fa-6eb4-4faa-a903-4dfa82521931" - }, - "source": [ - "frequent_itemsets = apriori( df, min_support = 0.27, use_colnames=True)\n", - "result_desc = frequent_itemsets.sort_values(['support'],ascending =[False])\n", - "result_desc" - ], - "execution_count": 9, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>support</th>\n", - " <th>itemsets</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>7</th>\n", - " <td>0.642225</td>\n", - " <td>(dst_host_count)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>6</th>\n", - " <td>0.622547</td>\n", - " <td>(same_srv_rate)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>0.618634</td>\n", - " <td>(service)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>0.611773</td>\n", - " <td>(flag)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>20</th>\n", - " <td>0.567143</td>\n", - " <td>(flag, same_srv_rate)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>...</th>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>75</th>\n", - " <td>0.273789</td>\n", - " <td>(class, dst_host_srv_serror_rate, dst_host_ser...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>94</th>\n", - " <td>0.273733</td>\n", - " <td>(class, dst_host_srv_serror_rate, serror_rate,...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>95</th>\n", - " <td>0.273676</td>\n", - " <td>(class, dst_host_srv_serror_rate, srv_serror_r...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>102</th>\n", - " <td>0.273676</td>\n", - " <td>(class, srv_serror_rate, serror_rate, dst_host...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>39</th>\n", - " <td>0.270727</td>\n", - " <td>(dst_host_count, dst_host_serror_rate)</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>104 rows × 2 columns</p>\n", - "</div>" - ], - "text/plain": [ - " support itemsets\n", - "7 0.642225 (dst_host_count)\n", - "6 0.622547 (same_srv_rate)\n", - "0 0.618634 (service)\n", - "1 0.611773 (flag)\n", - "20 0.567143 (flag, same_srv_rate)\n", - ".. ... ...\n", - "75 0.273789 (class, dst_host_srv_serror_rate, dst_host_ser...\n", - "94 0.273733 (class, dst_host_srv_serror_rate, serror_rate,...\n", - "95 0.273676 (class, dst_host_srv_serror_rate, srv_serror_r...\n", - "102 0.273676 (class, srv_serror_rate, serror_rate, dst_host...\n", - "39 0.270727 (dst_host_count, dst_host_serror_rate)\n", - "\n", - "[104 rows x 2 columns]" - ] - }, - "metadata": {}, - "execution_count": 9 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 632 - }, - "id": "DTT1_SWX-btw", - "outputId": "b837035f-880e-4cd0-8345-ad2da779bb21" - }, - "source": [ - "rules = association_rules(result_desc , metric = \"confidence\" , min_threshold = 0.9)\n", - "rules = rules.sort_values(['confidence','lift'], ascending=[False , False])\n", - "rules" - ], - "execution_count": 10, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>antecedents</th>\n", - " <th>consequents</th>\n", - " <th>antecedent support</th>\n", - " <th>consequent support</th>\n", - " <th>support</th>\n", - " <th>confidence</th>\n", - " <th>lift</th>\n", - " <th>leverage</th>\n", - " <th>conviction</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>220</th>\n", - " <td>(srv_serror_rate, dst_host_serror_rate)</td>\n", - " <td>(serror_rate)</td>\n", - " <td>0.275264</td>\n", - " <td>0.286152</td>\n", - " <td>0.275264</td>\n", - " <td>1.000000</td>\n", - " <td>3.494649</td>\n", - " <td>0.196497</td>\n", - " <td>inf</td>\n", - " </tr>\n", - " <tr>\n", - " <th>250</th>\n", - " <td>(class, srv_serror_rate, dst_host_serror_rate)</td>\n", - " <td>(serror_rate)</td>\n", - " <td>0.274413</td>\n", - " <td>0.286152</td>\n", - " <td>0.274413</td>\n", - " <td>1.000000</td>\n", - " <td>3.494649</td>\n", - " <td>0.195889</td>\n", - " <td>inf</td>\n", - " </tr>\n", - " <tr>\n", - " <th>287</th>\n", - " <td>(dst_host_srv_serror_rate, srv_serror_rate, ds...</td>\n", - " <td>(serror_rate)</td>\n", - " <td>0.273959</td>\n", - " <td>0.286152</td>\n", - " <td>0.273959</td>\n", - " <td>1.000000</td>\n", - " <td>3.494649</td>\n", - " <td>0.195565</td>\n", - " <td>inf</td>\n", - " </tr>\n", - " <tr>\n", - " <th>339</th>\n", - " <td>(class, dst_host_srv_serror_rate, srv_serror_r...</td>\n", - " <td>(serror_rate)</td>\n", - " <td>0.273676</td>\n", - " <td>0.286152</td>\n", - " <td>0.273676</td>\n", - " <td>1.000000</td>\n", - " <td>3.494649</td>\n", - " <td>0.195363</td>\n", - " <td>inf</td>\n", - " </tr>\n", - " <tr>\n", - " <th>215</th>\n", - " <td>(dst_host_srv_serror_rate, serror_rate)</td>\n", - " <td>(srv_serror_rate)</td>\n", - " <td>0.275377</td>\n", - " <td>0.283600</td>\n", - " <td>0.275320</td>\n", - " <td>0.999794</td>\n", - " <td>3.525369</td>\n", - " <td>0.197223</td>\n", - " <td>3478.839061</td>\n", - " </tr>\n", - " <tr>\n", - " <th>...</th>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>86</th>\n", - " <td>(service, same_srv_rate, logged_in)</td>\n", - " <td>(dst_host_same_srv_rate, flag)</td>\n", - " <td>0.353351</td>\n", - " <td>0.470795</td>\n", - " <td>0.318759</td>\n", - " <td>0.902102</td>\n", - " <td>1.916125</td>\n", - " <td>0.152403</td>\n", - " <td>5.405698</td>\n", - " </tr>\n", - " <tr>\n", - " <th>197</th>\n", - " <td>(dst_host_same_srv_rate, flag, logged_in)</td>\n", - " <td>(service, dst_host_srv_count, same_srv_rate)</td>\n", - " <td>0.331802</td>\n", - " <td>0.344732</td>\n", - " <td>0.299195</td>\n", - " <td>0.901726</td>\n", - " <td>2.615733</td>\n", - " <td>0.184812</td>\n", - " <td>6.667782</td>\n", - " </tr>\n", - " <tr>\n", - " <th>75</th>\n", - " <td>(service, flag, logged_in)</td>\n", - " <td>(dst_host_same_srv_rate)</td>\n", - " <td>0.355450</td>\n", - " <td>0.498809</td>\n", - " <td>0.320347</td>\n", - " <td>0.901244</td>\n", - " <td>1.806792</td>\n", - " <td>0.143046</td>\n", - " <td>5.075064</td>\n", - " </tr>\n", - " <tr>\n", - " <th>152</th>\n", - " <td>(dst_host_same_srv_rate, logged_in)</td>\n", - " <td>(service, dst_host_srv_count, same_srv_rate)</td>\n", - " <td>0.334524</td>\n", - " <td>0.344732</td>\n", - " <td>0.301463</td>\n", - " <td>0.901170</td>\n", - " <td>2.614118</td>\n", - " <td>0.186142</td>\n", - " <td>6.630236</td>\n", - " </tr>\n", - " <tr>\n", - " <th>40</th>\n", - " <td>(logged_in)</td>\n", - " <td>(service, same_srv_rate)</td>\n", - " <td>0.392254</td>\n", - " <td>0.434388</td>\n", - " <td>0.353351</td>\n", - " <td>0.900824</td>\n", - " <td>2.073777</td>\n", - " <td>0.182961</td>\n", - " <td>5.703116</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>367 rows × 9 columns</p>\n", - "</div>" - ], - "text/plain": [ - " antecedents ... conviction\n", - "220 (srv_serror_rate, dst_host_serror_rate) ... inf\n", - "250 (class, srv_serror_rate, dst_host_serror_rate) ... inf\n", - "287 (dst_host_srv_serror_rate, srv_serror_rate, ds... ... inf\n", - "339 (class, dst_host_srv_serror_rate, srv_serror_r... ... inf\n", - "215 (dst_host_srv_serror_rate, serror_rate) ... 3478.839061\n", - ".. ... ... ...\n", - "86 (service, same_srv_rate, logged_in) ... 5.405698\n", - "197 (dst_host_same_srv_rate, flag, logged_in) ... 6.667782\n", - "75 (service, flag, logged_in) ... 5.075064\n", - "152 (dst_host_same_srv_rate, logged_in) ... 6.630236\n", - "40 (logged_in) ... 5.703116\n", - "\n", - "[367 rows x 9 columns]" - ] - }, - "metadata": {}, - "execution_count": 10 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 659 - }, - "id": "vy-AH96DXMYb", - "outputId": "41be0e5a-fd05-4558-c0ca-f42de66d09a2" - }, - "source": [ - "rules_list = rules[rules['consequents'] == {\"class\"}]\n", - "rules_list" - ], - "execution_count": 11, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>antecedents</th>\n", - " <th>consequents</th>\n", - " <th>antecedent support</th>\n", - " <th>consequent support</th>\n", - " <th>support</th>\n", - " <th>confidence</th>\n", - " <th>lift</th>\n", - " <th>leverage</th>\n", - " <th>conviction</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>314</th>\n", - " <td>(dst_host_srv_serror_rate, serror_rate, dst_ho...</td>\n", - " <td>(class)</td>\n", - " <td>0.274016</td>\n", - " <td>0.466542</td>\n", - " <td>0.273733</td>\n", - " <td>0.998965</td>\n", - " <td>2.141212</td>\n", - " <td>0.145893</td>\n", - " <td>515.533900</td>\n", - " </tr>\n", - " <tr>\n", - " <th>327</th>\n", - " <td>(dst_host_srv_serror_rate, srv_serror_rate, ds...</td>\n", - " <td>(class)</td>\n", - " <td>0.273959</td>\n", - " <td>0.466542</td>\n", - " <td>0.273676</td>\n", - " <td>0.998965</td>\n", - " <td>2.141212</td>\n", - " <td>0.145862</td>\n", - " <td>515.427209</td>\n", - " </tr>\n", - " <tr>\n", - " <th>341</th>\n", - " <td>(serror_rate, dst_host_srv_serror_rate, srv_se...</td>\n", - " <td>(class)</td>\n", - " <td>0.273959</td>\n", - " <td>0.466542</td>\n", - " <td>0.273676</td>\n", - " <td>0.998965</td>\n", - " <td>2.141212</td>\n", - " <td>0.145862</td>\n", - " <td>515.427209</td>\n", - " </tr>\n", - " <tr>\n", - " <th>246</th>\n", - " <td>(srv_serror_rate, dst_host_serror_rate)</td>\n", - " <td>(class)</td>\n", - " <td>0.275264</td>\n", - " <td>0.466542</td>\n", - " <td>0.274413</td>\n", - " <td>0.996910</td>\n", - " <td>2.136806</td>\n", - " <td>0.145991</td>\n", - " <td>172.627039</td>\n", - " </tr>\n", - " <tr>\n", - " <th>251</th>\n", - " <td>(serror_rate, srv_serror_rate, dst_host_serror...</td>\n", - " <td>(class)</td>\n", - " <td>0.275264</td>\n", - " <td>0.466542</td>\n", - " <td>0.274413</td>\n", - " <td>0.996910</td>\n", - " <td>2.136806</td>\n", - " <td>0.145991</td>\n", - " <td>172.627039</td>\n", - " </tr>\n", - " <tr>\n", - " <th>236</th>\n", - " <td>(dst_host_srv_serror_rate, srv_serror_rate)</td>\n", - " <td>(class)</td>\n", - " <td>0.275604</td>\n", - " <td>0.466542</td>\n", - " <td>0.274583</td>\n", - " <td>0.996296</td>\n", - " <td>2.135492</td>\n", - " <td>0.146002</td>\n", - " <td>144.033685</td>\n", - " </tr>\n", - " <tr>\n", - " <th>264</th>\n", - " <td>(dst_host_srv_serror_rate, serror_rate)</td>\n", - " <td>(class)</td>\n", - " <td>0.275377</td>\n", - " <td>0.466542</td>\n", - " <td>0.274356</td>\n", - " <td>0.996293</td>\n", - " <td>2.135485</td>\n", - " <td>0.145881</td>\n", - " <td>143.915139</td>\n", - " </tr>\n", - " <tr>\n", - " <th>269</th>\n", - " <td>(serror_rate, dst_host_srv_serror_rate, srv_se...</td>\n", - " <td>(class)</td>\n", - " <td>0.275320</td>\n", - " <td>0.466542</td>\n", - " <td>0.274300</td>\n", - " <td>0.996292</td>\n", - " <td>2.135483</td>\n", - " <td>0.145851</td>\n", - " <td>143.885502</td>\n", - " </tr>\n", - " <tr>\n", - " <th>241</th>\n", - " <td>(serror_rate, dst_host_serror_rate)</td>\n", - " <td>(class)</td>\n", - " <td>0.276341</td>\n", - " <td>0.466542</td>\n", - " <td>0.274526</td>\n", - " <td>0.993433</td>\n", - " <td>2.129355</td>\n", - " <td>0.145602</td>\n", - " <td>81.235665</td>\n", - " </tr>\n", - " <tr>\n", - " <th>308</th>\n", - " <td>(dst_host_srv_serror_rate, dst_host_serror_rate)</td>\n", - " <td>(class)</td>\n", - " <td>0.276625</td>\n", - " <td>0.466542</td>\n", - " <td>0.273789</td>\n", - " <td>0.989750</td>\n", - " <td>2.121460</td>\n", - " <td>0.144732</td>\n", - " <td>52.044171</td>\n", - " </tr>\n", - " <tr>\n", - " <th>233</th>\n", - " <td>(dst_host_srv_serror_rate)</td>\n", - " <td>(class)</td>\n", - " <td>0.279233</td>\n", - " <td>0.466542</td>\n", - " <td>0.274697</td>\n", - " <td>0.983753</td>\n", - " <td>2.108606</td>\n", - " <td>0.144423</td>\n", - " <td>32.834346</td>\n", - " </tr>\n", - " <tr>\n", - " <th>228</th>\n", - " <td>(serror_rate, srv_serror_rate)</td>\n", - " <td>(class)</td>\n", - " <td>0.282352</td>\n", - " <td>0.466542</td>\n", - " <td>0.275150</td>\n", - " <td>0.974493</td>\n", - " <td>2.088757</td>\n", - " <td>0.143421</td>\n", - " <td>20.914077</td>\n", - " </tr>\n", - " <tr>\n", - " <th>210</th>\n", - " <td>(srv_serror_rate)</td>\n", - " <td>(class)</td>\n", - " <td>0.283600</td>\n", - " <td>0.466542</td>\n", - " <td>0.275434</td>\n", - " <td>0.971206</td>\n", - " <td>2.081712</td>\n", - " <td>0.143123</td>\n", - " <td>18.526555</td>\n", - " </tr>\n", - " <tr>\n", - " <th>207</th>\n", - " <td>(serror_rate)</td>\n", - " <td>(class)</td>\n", - " <td>0.286152</td>\n", - " <td>0.466542</td>\n", - " <td>0.276001</td>\n", - " <td>0.964526</td>\n", - " <td>2.067395</td>\n", - " <td>0.142499</td>\n", - " <td>15.038154</td>\n", - " </tr>\n", - " <tr>\n", - " <th>232</th>\n", - " <td>(dst_host_serror_rate)</td>\n", - " <td>(class)</td>\n", - " <td>0.285358</td>\n", - " <td>0.466542</td>\n", - " <td>0.274980</td>\n", - " <td>0.963633</td>\n", - " <td>2.065480</td>\n", - " <td>0.141849</td>\n", - " <td>14.668640</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " antecedents ... conviction\n", - "314 (dst_host_srv_serror_rate, serror_rate, dst_ho... ... 515.533900\n", - "327 (dst_host_srv_serror_rate, srv_serror_rate, ds... ... 515.427209\n", - "341 (serror_rate, dst_host_srv_serror_rate, srv_se... ... 515.427209\n", - "246 (srv_serror_rate, dst_host_serror_rate) ... 172.627039\n", - "251 (serror_rate, srv_serror_rate, dst_host_serror... ... 172.627039\n", - "236 (dst_host_srv_serror_rate, srv_serror_rate) ... 144.033685\n", - "264 (dst_host_srv_serror_rate, serror_rate) ... 143.915139\n", - "269 (serror_rate, dst_host_srv_serror_rate, srv_se... ... 143.885502\n", - "241 (serror_rate, dst_host_serror_rate) ... 81.235665\n", - "308 (dst_host_srv_serror_rate, dst_host_serror_rate) ... 52.044171\n", - "233 (dst_host_srv_serror_rate) ... 32.834346\n", - "228 (serror_rate, srv_serror_rate) ... 20.914077\n", - "210 (srv_serror_rate) ... 18.526555\n", - "207 (serror_rate) ... 15.038154\n", - "232 (dst_host_serror_rate) ... 14.668640\n", - "\n", - "[15 rows x 9 columns]" - ] - }, - "metadata": {}, - "execution_count": 11 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "mmAOlrta8P1p", - "outputId": "87228663-6c09-4a17-ad78-ce1072472707" - }, - "source": [ - "col = rules_list['antecedents']\n", - "col" - ], - "execution_count": 12, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "314 (dst_host_srv_serror_rate, serror_rate, dst_ho...\n", - "327 (dst_host_srv_serror_rate, srv_serror_rate, ds...\n", - "341 (serror_rate, dst_host_srv_serror_rate, srv_se...\n", - "246 (srv_serror_rate, dst_host_serror_rate)\n", - "251 (serror_rate, srv_serror_rate, dst_host_serror...\n", - "236 (dst_host_srv_serror_rate, srv_serror_rate)\n", - "264 (dst_host_srv_serror_rate, serror_rate)\n", - "269 (serror_rate, dst_host_srv_serror_rate, srv_se...\n", - "241 (serror_rate, dst_host_serror_rate)\n", - "308 (dst_host_srv_serror_rate, dst_host_serror_rate)\n", - "233 (dst_host_srv_serror_rate)\n", - "228 (serror_rate, srv_serror_rate)\n", - "210 (srv_serror_rate)\n", - "207 (serror_rate)\n", - "232 (dst_host_serror_rate)\n", - "Name: antecedents, dtype: object" - ] - }, - "metadata": {}, - "execution_count": 12 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "jmbME98E91xf" - }, - "source": [ - "col.to_csv('./col_list.csv')" - ], - "execution_count": 13, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 383 - }, - "id": "9jVN92COXRsJ", - "outputId": "d1dad66e-914a-4386-a477-97ea3e348c55" - }, - "source": [ - "test = pd.DataFrame(X_test, columns=data.drop(columns = [\"index_num\"]).columns)\n", - "test.head(n=10)" - ], - "execution_count": 14, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>duration</th>\n", - " <th>protocol_type</th>\n", - " <th>service</th>\n", - " <th>flag</th>\n", - " <th>src_bytes</th>\n", - " <th>dst_bytes</th>\n", - " <th>land</th>\n", - " <th>wrong_fragment</th>\n", - " <th>hot</th>\n", - " <th>num_failed_logins</th>\n", - " <th>logged_in</th>\n", - " <th>num_compromised</th>\n", - " <th>root_shell</th>\n", - " <th>su_attempted</th>\n", - " <th>num_root</th>\n", - " <th>num_file_creations</th>\n", - " <th>num_shells</th>\n", - " <th>num_access_files</th>\n", - " <th>is_guest_login</th>\n", - " <th>count</th>\n", - " <th>srv_count</th>\n", - " <th>serror_rate</th>\n", - " <th>srv_serror_rate</th>\n", - " <th>rerror_rate</th>\n", - " <th>srv_rerror_rate</th>\n", - " <th>same_srv_rate</th>\n", - " <th>diff_srv_rate</th>\n", - " <th>srv_diff_host_rate</th>\n", - " <th>dst_host_count</th>\n", - " <th>dst_host_srv_count</th>\n", - " <th>dst_host_same_srv_rate</th>\n", - " <th>dst_host_diff_srv_rate</th>\n", - " <th>dst_host_same_src_port_rate</th>\n", - " <th>dst_host_srv_diff_host_rate</th>\n", - " <th>dst_host_serror_rate</th>\n", - " <th>dst_host_srv_serror_rate</th>\n", - " <th>dst_host_rerror_rate</th>\n", - " <th>dst_host_srv_rerror_rate</th>\n", - " <th>class</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>19064</th>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>11127</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>6517</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2973</th>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>13339</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>19289</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2166</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>5548</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>10887</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2222</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " duration protocol_type ... dst_host_srv_rerror_rate class\n", - "19064 0 1 ... 0 1\n", - "11127 0 0 ... 1 0\n", - "6517 0 0 ... 0 1\n", - "2973 0 1 ... 0 0\n", - "13339 0 0 ... 0 1\n", - "19289 0 0 ... 0 0\n", - "2166 0 0 ... 0 0\n", - "5548 0 0 ... 0 0\n", - "10887 0 0 ... 0 0\n", - "2222 0 0 ... 0 1\n", - "\n", - "[10 rows x 39 columns]" - ] - }, - "metadata": {}, - "execution_count": 14 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "hBLP3xt-ulXS" - }, - "source": [ - "col = ['dst_host_srv_serror_rate', 'srv_serror_rate', 'serror_rate', 'dst_host_serror_rate']" - ], - "execution_count": 15, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "vCNJIYoeD1sO", - "outputId": "5042860c-02d6-46fb-fc23-97f8327f70ef" - }, - "source": [ - "idx_b = test[(test['dst_host_srv_serror_rate'] == 0) | (test['srv_serror_rate'] == 0) | (test['serror_rate'] == 0) | (test['dst_host_serror_rate'] == 0)].index\n", - "test_df = test.drop(idx_b)\n", - "\n", - "idx_class = test_df[test_df['class'] == 0 ].index\n", - "test_err = test_df.drop(idx_class)\n", - "\n", - "print(test_df.shape)\n", - "print(test_err.shape)" - ], - "execution_count": 17, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "(2121, 39)\n", - "(2120, 39)\n" - ] - } - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "RWB6lAq0_GWg", - "outputId": "ce73c2e8-22d1-4977-c766-6f074d2945e8" - }, - "source": [ - "idx_a = test[(test['dst_host_srv_serror_rate'] == 1) & (test['srv_serror_rate'] == 1) & (test['serror_rate'] == 1) & (test['dst_host_serror_rate'] == 1)].index\n", - "test_df = test.drop(idx_a)\n", - "\n", - "idx_class = test_df[test_df['class'] == 1 ].index\n", - "test_err = test_df.drop(idx_class)\n", - "\n", - "print(test_df.shape)\n", - "print(test_err.shape)" - ], - "execution_count": 27, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "(5437, 39)\n", - "(4041, 39)\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "sK2Zm5_5ncb_" - }, - "source": [ - "\n", - "\n", - " | Prediction of Attack | Prediction of Non-Attack\n", - "---\n", - " Attack | True Positive : 2020 | False Negative : 1396\n", - "---\n", - " Non-Attack | False Positive : 1 | True Negative : 4041\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "VdYEcIpkHm_R" - }, - "source": [ - "**Apriori Test**\n", - "\n", - "\n", - "\n", - "* Accuracy = 80.19%\n", - "* Precision = 99.95%\n", - "* Recall = 59.13%\n", - "* Fallout = 0.02%\n", - "* F-score = 74.3" - ] - } - ] -}