diff --git a/[DM]Apriori.ipynb b/[DM]Apriori.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..1b49bbcbc1ef2ac92f6e005a89dea5c7d5c4827c --- /dev/null +++ b/[DM]Apriori.ipynb @@ -0,0 +1,2754 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "[DM]Apriori.ipynb", + "provenance": [], + "collapsed_sections": [], + "authorship_tag": "ABX9TyMXLQwD/DRSd8r6ijM5QcXc", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "<a href=\"https://colab.research.google.com/github/lani009/IDS-DataMining/blob/main/%5BDM%5DApriori.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "HAY_lKeo6NUE" + }, + "source": [ + "import os\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import time" + ], + "execution_count": 1, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "J02wdPhK76Yc", + "outputId": "09b1579c-618b-4489-fc8a-5a4fa25e7c48" + }, + "source": [ + "data = pd.read_csv('DM_data.csv')\n", + "data.info()" + ], + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "<class 'pandas.core.frame.DataFrame'>\n", + "RangeIndex: 25192 entries, 0 to 25191\n", + "Data columns (total 40 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 duration 25192 non-null int64 \n", + " 1 protocol_type 25192 non-null int64 \n", + " 2 service 25192 non-null int64 \n", + " 3 flag 25192 non-null int64 \n", + " 4 src_bytes 25192 non-null int64 \n", + " 5 dst_bytes 25192 non-null int64 \n", + " 6 land 25192 non-null int64 \n", + " 7 wrong_fragment 25192 non-null int64 \n", + " 8 hot 25192 non-null int64 \n", + " 9 num_failed_logins 25192 non-null int64 \n", + " 10 logged_in 25192 non-null int64 \n", + " 11 num_compromised 25192 non-null int64 \n", + " 12 root_shell 25192 non-null int64 \n", + " 13 su_attempted 25192 non-null int64 \n", + " 14 num_root 25192 non-null int64 \n", + " 15 num_file_creations 25192 non-null int64 \n", + " 16 num_shells 25192 non-null int64 \n", + " 17 num_access_files 25192 non-null int64 \n", + " 18 is_guest_login 25192 non-null int64 \n", + " 19 count 25192 non-null int64 \n", + " 20 srv_count 25192 non-null int64 \n", + " 21 serror_rate 25192 non-null float64\n", + " 22 srv_serror_rate 25192 non-null float64\n", + " 23 rerror_rate 25192 non-null float64\n", + " 24 srv_rerror_rate 25192 non-null float64\n", + " 25 same_srv_rate 25192 non-null float64\n", + " 26 diff_srv_rate 25192 non-null float64\n", + " 27 srv_diff_host_rate 25192 non-null float64\n", + " 28 dst_host_count 25192 non-null int64 \n", + " 29 dst_host_srv_count 25192 non-null int64 \n", + " 30 dst_host_same_srv_rate 25192 non-null float64\n", + " 31 dst_host_diff_srv_rate 25192 non-null float64\n", + " 32 dst_host_same_src_port_rate 25192 non-null float64\n", + " 33 dst_host_srv_diff_host_rate 25192 non-null float64\n", + " 34 dst_host_serror_rate 25192 non-null float64\n", + " 35 dst_host_srv_serror_rate 25192 non-null float64\n", + " 36 dst_host_rerror_rate 25192 non-null float64\n", + " 37 dst_host_srv_rerror_rate 25192 non-null float64\n", + " 38 class 25192 non-null int64 \n", + " 39 index_num 25192 non-null int64 \n", + "dtypes: float64(15), int64(25)\n", + "memory usage: 7.7 MB\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "phplztW08CAV" + }, + "source": [ + "from sklearn.model_selection import train_test_split\n", + "from sklearn.preprocessing import MinMaxScaler, StandardScaler" + ], + "execution_count": 3, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "rWLrmiHs86KH" + }, + "source": [ + "from mlxtend.frequent_patterns import apriori,association_rules" + ], + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 383 + }, + "id": "KpeVfpxYTAHF", + "outputId": "36c95e16-0050-40ee-f4a0-551c9a793eef" + }, + "source": [ + "sc = StandardScaler()\n", + "sc_data = sc.fit_transform(data)\n", + "\n", + "sc_df = pd.DataFrame(sc_data, columns=data.columns)\n", + "sc_df.head(n=10)" + ], + "execution_count": 5, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>duration</th>\n", + " <th>protocol_type</th>\n", + " <th>service</th>\n", + " <th>flag</th>\n", + " <th>src_bytes</th>\n", + " <th>dst_bytes</th>\n", + " <th>land</th>\n", + " <th>wrong_fragment</th>\n", + " <th>hot</th>\n", + " <th>num_failed_logins</th>\n", + " <th>logged_in</th>\n", + " <th>num_compromised</th>\n", + " <th>root_shell</th>\n", + " <th>su_attempted</th>\n", + " <th>num_root</th>\n", + " <th>num_file_creations</th>\n", + " <th>num_shells</th>\n", + " <th>num_access_files</th>\n", + " <th>is_guest_login</th>\n", + " <th>count</th>\n", + " <th>srv_count</th>\n", + " <th>serror_rate</th>\n", + " <th>srv_serror_rate</th>\n", + " <th>rerror_rate</th>\n", + " <th>srv_rerror_rate</th>\n", + " <th>same_srv_rate</th>\n", + " <th>diff_srv_rate</th>\n", + " <th>srv_diff_host_rate</th>\n", + " <th>dst_host_count</th>\n", + " <th>dst_host_srv_count</th>\n", + " <th>dst_host_same_srv_rate</th>\n", + " <th>dst_host_diff_srv_rate</th>\n", + " <th>dst_host_same_src_port_rate</th>\n", + " <th>dst_host_srv_diff_host_rate</th>\n", + " <th>dst_host_serror_rate</th>\n", + " <th>dst_host_srv_serror_rate</th>\n", + " <th>dst_host_rerror_rate</th>\n", + " <th>dst_host_srv_rerror_rate</th>\n", + " <th>class</th>\n", + " <th>index_num</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>-0.113551</td>\n", + " <td>-0.444009</td>\n", + " <td>-1.399448</td>\n", + " <td>0.744553</td>\n", + " <td>-0.009889</td>\n", + " <td>-0.039310</td>\n", + " <td>-0.00891</td>\n", + " <td>-0.091223</td>\n", + " <td>-0.091933</td>\n", + " <td>-0.02622</td>\n", + " <td>-0.807626</td>\n", + " <td>-0.021873</td>\n", + " <td>-0.039377</td>\n", + " <td>-0.027665</td>\n", + " <td>-0.021724</td>\n", + " <td>-0.027808</td>\n", + " <td>-0.018905</td>\n", + " <td>-0.043917</td>\n", + " <td>-0.09599</td>\n", + " <td>-0.720244</td>\n", + " <td>-0.354628</td>\n", + " <td>-0.640142</td>\n", + " <td>-0.633978</td>\n", + " <td>-0.372186</td>\n", + " <td>-0.373098</td>\n", + " <td>0.772109</td>\n", + " <td>-0.349282</td>\n", + " <td>-0.373886</td>\n", + " <td>-0.328634</td>\n", + " <td>-0.813985</td>\n", + " <td>-0.779157</td>\n", + " <td>-0.280673</td>\n", + " <td>0.073120</td>\n", + " <td>-0.287993</td>\n", + " <td>-0.641804</td>\n", + " <td>-0.627365</td>\n", + " <td>-0.221668</td>\n", + " <td>-0.374281</td>\n", + " <td>-0.934425</td>\n", + " <td>-1.731982</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>-0.113551</td>\n", + " <td>1.325565</td>\n", + " <td>0.780883</td>\n", + " <td>0.744553</td>\n", + " <td>-0.010032</td>\n", + " <td>-0.039310</td>\n", + " <td>-0.00891</td>\n", + " <td>-0.091223</td>\n", + " <td>-0.091933</td>\n", + " <td>-0.02622</td>\n", + " <td>-0.807626</td>\n", + " <td>-0.021873</td>\n", + " <td>-0.039377</td>\n", + " <td>-0.027665</td>\n", + " <td>-0.021724</td>\n", + " <td>-0.027808</td>\n", + " <td>-0.018905</td>\n", + " <td>-0.043917</td>\n", + " <td>-0.09599</td>\n", + " <td>-0.624317</td>\n", + " <td>-0.368427</td>\n", + " <td>-0.640142</td>\n", + " <td>-0.633978</td>\n", + " <td>-0.372186</td>\n", + " <td>-0.373098</td>\n", + " <td>-1.320567</td>\n", + " <td>0.490836</td>\n", + " <td>-0.373886</td>\n", + " <td>0.732059</td>\n", + " <td>-1.030895</td>\n", + " <td>-1.157831</td>\n", + " <td>2.764403</td>\n", + " <td>2.375620</td>\n", + " <td>-0.287993</td>\n", + " <td>-0.641804</td>\n", + " <td>-0.627365</td>\n", + " <td>-0.385140</td>\n", + " <td>-0.374281</td>\n", + " <td>-0.934425</td>\n", + " <td>-1.731845</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>-0.113551</td>\n", + " <td>-0.444009</td>\n", + " <td>-1.377199</td>\n", + " <td>-0.917300</td>\n", + " <td>-0.010093</td>\n", + " <td>-0.039310</td>\n", + " <td>-0.00891</td>\n", + " <td>-0.091223</td>\n", + " <td>-0.091933</td>\n", + " <td>-0.02622</td>\n", + " <td>-0.807626</td>\n", + " <td>-0.021873</td>\n", + " <td>-0.039377</td>\n", + " <td>-0.027665</td>\n", + " <td>-0.021724</td>\n", + " <td>-0.027808</td>\n", + " <td>-0.018905</td>\n", + " <td>-0.043917</td>\n", + " <td>-0.09599</td>\n", + " <td>0.334947</td>\n", + " <td>-0.299430</td>\n", + " <td>1.595477</td>\n", + " <td>1.600209</td>\n", + " <td>-0.372186</td>\n", + " <td>-0.373098</td>\n", + " <td>-1.388806</td>\n", + " <td>0.042773</td>\n", + " <td>-0.373886</td>\n", + " <td>0.732059</td>\n", + " <td>-0.804947</td>\n", + " <td>-0.935081</td>\n", + " <td>-0.173828</td>\n", + " <td>-0.478183</td>\n", + " <td>-0.287993</td>\n", + " <td>1.603834</td>\n", + " <td>1.614454</td>\n", + " <td>-0.385140</td>\n", + " <td>-0.374281</td>\n", + " <td>1.070177</td>\n", + " <td>-1.731707</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>-0.113551</td>\n", + " <td>-0.444009</td>\n", + " <td>0.780883</td>\n", + " <td>0.744553</td>\n", + " <td>-0.009996</td>\n", + " <td>0.052473</td>\n", + " <td>-0.00891</td>\n", + " <td>-0.091223</td>\n", + " <td>-0.091933</td>\n", + " <td>-0.02622</td>\n", + " <td>1.238197</td>\n", + " <td>-0.021873</td>\n", + " <td>-0.039377</td>\n", + " <td>-0.027665</td>\n", + " <td>-0.021724</td>\n", + " <td>-0.027808</td>\n", + " <td>-0.018905</td>\n", + " <td>-0.043917</td>\n", + " <td>-0.09599</td>\n", + " <td>-0.694082</td>\n", + " <td>-0.313230</td>\n", + " <td>-0.193018</td>\n", + " <td>-0.187141</td>\n", + " <td>-0.372186</td>\n", + " <td>-0.373098</td>\n", + " <td>0.772109</td>\n", + " <td>-0.349282</td>\n", + " <td>-0.373886</td>\n", + " <td>-1.540854</td>\n", + " <td>1.264742</td>\n", + " <td>1.069663</td>\n", + " <td>-0.440940</td>\n", + " <td>-0.380894</td>\n", + " <td>0.073759</td>\n", + " <td>-0.574435</td>\n", + " <td>-0.604947</td>\n", + " <td>-0.385140</td>\n", + " <td>-0.342768</td>\n", + " <td>-0.934425</td>\n", + " <td>-1.731570</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>-0.113551</td>\n", + " <td>-0.444009</td>\n", + " <td>0.780883</td>\n", + " <td>0.744553</td>\n", + " <td>-0.010010</td>\n", + " <td>-0.034582</td>\n", + " <td>-0.00891</td>\n", + " <td>-0.091223</td>\n", + " <td>-0.091933</td>\n", + " <td>-0.02622</td>\n", + " <td>1.238197</td>\n", + " <td>-0.021873</td>\n", + " <td>-0.039377</td>\n", + " <td>-0.027665</td>\n", + " <td>-0.021724</td>\n", + " <td>-0.027808</td>\n", + " <td>-0.018905</td>\n", + " <td>-0.043917</td>\n", + " <td>-0.09599</td>\n", + " <td>-0.476067</td>\n", + " <td>0.059355</td>\n", + " <td>-0.640142</td>\n", + " <td>-0.633978</td>\n", + " <td>-0.372186</td>\n", + " <td>-0.373098</td>\n", + " <td>0.772109</td>\n", + " <td>-0.349282</td>\n", + " <td>-0.023115</td>\n", + " <td>0.732059</td>\n", + " <td>1.264742</td>\n", + " <td>1.069663</td>\n", + " <td>-0.440940</td>\n", + " <td>-0.478183</td>\n", + " <td>-0.287993</td>\n", + " <td>-0.641804</td>\n", + " <td>-0.627365</td>\n", + " <td>-0.385140</td>\n", + " <td>-0.374281</td>\n", + " <td>-0.934425</td>\n", + " <td>-1.731432</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>-0.113551</td>\n", + " <td>-0.444009</td>\n", + " <td>-1.377199</td>\n", + " <td>-2.025203</td>\n", + " <td>-0.010093</td>\n", + " <td>-0.039310</td>\n", + " <td>-0.00891</td>\n", + " <td>-0.091223</td>\n", + " <td>-0.091933</td>\n", + " <td>-0.02622</td>\n", + " <td>-0.807626</td>\n", + " <td>-0.021873</td>\n", + " <td>-0.039377</td>\n", + " <td>-0.027665</td>\n", + " <td>-0.021724</td>\n", + " <td>-0.027808</td>\n", + " <td>-0.018905</td>\n", + " <td>-0.043917</td>\n", + " <td>-0.09599</td>\n", + " <td>0.317506</td>\n", + " <td>-0.120038</td>\n", + " <td>-0.640142</td>\n", + " <td>-0.633978</td>\n", + " <td>2.765176</td>\n", + " <td>2.729322</td>\n", + " <td>-1.138595</td>\n", + " <td>-0.013235</td>\n", + " <td>-0.373886</td>\n", + " <td>0.732059</td>\n", + " <td>-0.868212</td>\n", + " <td>-1.001906</td>\n", + " <td>-0.066984</td>\n", + " <td>-0.478183</td>\n", + " <td>-0.287993</td>\n", + " <td>-0.641804</td>\n", + " <td>-0.627365</td>\n", + " <td>2.884296</td>\n", + " <td>2.777041</td>\n", + " <td>1.070177</td>\n", + " <td>-1.731295</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>-0.113551</td>\n", + " <td>-0.444009</td>\n", + " <td>-1.377199</td>\n", + " <td>-0.917300</td>\n", + " <td>-0.010093</td>\n", + " <td>-0.039310</td>\n", + " <td>-0.00891</td>\n", + " <td>-0.091223</td>\n", + " <td>-0.091933</td>\n", + " <td>-0.02622</td>\n", + " <td>-0.807626</td>\n", + " <td>-0.021873</td>\n", + " <td>-0.039377</td>\n", + " <td>-0.027665</td>\n", + " <td>-0.021724</td>\n", + " <td>-0.027808</td>\n", + " <td>-0.018905</td>\n", + " <td>-0.043917</td>\n", + " <td>-0.09599</td>\n", + " <td>0.709933</td>\n", + " <td>-0.258032</td>\n", + " <td>1.595477</td>\n", + " <td>1.600209</td>\n", + " <td>-0.372186</td>\n", + " <td>-0.373098</td>\n", + " <td>-1.388806</td>\n", + " <td>-0.013235</td>\n", + " <td>-0.373886</td>\n", + " <td>0.732059</td>\n", + " <td>-0.958592</td>\n", + " <td>-1.068731</td>\n", + " <td>-0.173828</td>\n", + " <td>-0.478183</td>\n", + " <td>-0.287993</td>\n", + " <td>1.603834</td>\n", + " <td>1.614454</td>\n", + " <td>-0.385140</td>\n", + " <td>-0.374281</td>\n", + " <td>1.070177</td>\n", + " <td>-1.731157</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>-0.113551</td>\n", + " <td>-0.444009</td>\n", + " <td>-1.377199</td>\n", + " <td>-0.917300</td>\n", + " <td>-0.010093</td>\n", + " <td>-0.039310</td>\n", + " <td>-0.00891</td>\n", + " <td>-0.091223</td>\n", + " <td>-0.091933</td>\n", + " <td>-0.02622</td>\n", + " <td>-0.807626</td>\n", + " <td>-0.021873</td>\n", + " <td>-0.039377</td>\n", + " <td>-0.027665</td>\n", + " <td>-0.021724</td>\n", + " <td>-0.027808</td>\n", + " <td>-0.018905</td>\n", + " <td>-0.043917</td>\n", + " <td>-0.09599</td>\n", + " <td>0.282624</td>\n", + " <td>-0.161436</td>\n", + " <td>1.595477</td>\n", + " <td>1.600209</td>\n", + " <td>-0.372186</td>\n", + " <td>-0.373098</td>\n", + " <td>-1.184088</td>\n", + " <td>-0.013235</td>\n", + " <td>-0.373886</td>\n", + " <td>0.732059</td>\n", + " <td>-0.904364</td>\n", + " <td>-1.024181</td>\n", + " <td>-0.066984</td>\n", + " <td>-0.478183</td>\n", + " <td>-0.287993</td>\n", + " <td>1.603834</td>\n", + " <td>1.614454</td>\n", + " <td>-0.385140</td>\n", + " <td>-0.374281</td>\n", + " <td>1.070177</td>\n", + " <td>-1.731019</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>-0.113551</td>\n", + " <td>-0.444009</td>\n", + " <td>0.780883</td>\n", + " <td>-0.917300</td>\n", + " <td>-0.010093</td>\n", + " <td>-0.039310</td>\n", + " <td>-0.00891</td>\n", + " <td>-0.091223</td>\n", + " <td>-0.091933</td>\n", + " <td>-0.02622</td>\n", + " <td>-0.807626</td>\n", + " <td>-0.021873</td>\n", + " <td>-0.039377</td>\n", + " <td>-0.027665</td>\n", + " <td>-0.021724</td>\n", + " <td>-0.027808</td>\n", + " <td>-0.018905</td>\n", + " <td>-0.043917</td>\n", + " <td>-0.09599</td>\n", + " <td>1.616874</td>\n", + " <td>-0.064840</td>\n", + " <td>1.595477</td>\n", + " <td>1.600209</td>\n", + " <td>-0.372186</td>\n", + " <td>-0.373098</td>\n", + " <td>-1.297820</td>\n", + " <td>-0.069243</td>\n", + " <td>-0.373886</td>\n", + " <td>0.732059</td>\n", + " <td>-0.832060</td>\n", + " <td>-0.957356</td>\n", + " <td>-0.173828</td>\n", + " <td>-0.478183</td>\n", + " <td>-0.287993</td>\n", + " <td>1.603834</td>\n", + " <td>1.614454</td>\n", + " <td>-0.385140</td>\n", + " <td>-0.374281</td>\n", + " <td>1.070177</td>\n", + " <td>-1.730882</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>-0.113551</td>\n", + " <td>-0.444009</td>\n", + " <td>-1.377199</td>\n", + " <td>-0.917300</td>\n", + " <td>-0.010093</td>\n", + " <td>-0.039310</td>\n", + " <td>-0.00891</td>\n", + " <td>-0.091223</td>\n", + " <td>-0.091933</td>\n", + " <td>-0.02622</td>\n", + " <td>-0.807626</td>\n", + " <td>-0.021873</td>\n", + " <td>-0.039377</td>\n", + " <td>-0.027665</td>\n", + " <td>-0.021724</td>\n", + " <td>-0.027808</td>\n", + " <td>-0.018905</td>\n", + " <td>-0.043917</td>\n", + " <td>-0.09599</td>\n", + " <td>0.422153</td>\n", + " <td>-0.271831</td>\n", + " <td>1.595477</td>\n", + " <td>1.600209</td>\n", + " <td>-0.372186</td>\n", + " <td>-0.373098</td>\n", + " <td>-1.366060</td>\n", + " <td>-0.013235</td>\n", + " <td>-0.373886</td>\n", + " <td>0.732059</td>\n", + " <td>-0.922440</td>\n", + " <td>-1.046456</td>\n", + " <td>-0.120406</td>\n", + " <td>-0.478183</td>\n", + " <td>-0.287993</td>\n", + " <td>1.603834</td>\n", + " <td>1.614454</td>\n", + " <td>-0.385140</td>\n", + " <td>-0.374281</td>\n", + " <td>1.070177</td>\n", + " <td>-1.730744</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " duration protocol_type ... class index_num\n", + "0 -0.113551 -0.444009 ... -0.934425 -1.731982\n", + "1 -0.113551 1.325565 ... -0.934425 -1.731845\n", + "2 -0.113551 -0.444009 ... 1.070177 -1.731707\n", + "3 -0.113551 -0.444009 ... -0.934425 -1.731570\n", + "4 -0.113551 -0.444009 ... -0.934425 -1.731432\n", + "5 -0.113551 -0.444009 ... 1.070177 -1.731295\n", + "6 -0.113551 -0.444009 ... 1.070177 -1.731157\n", + "7 -0.113551 -0.444009 ... 1.070177 -1.731019\n", + "8 -0.113551 -0.444009 ... 1.070177 -1.730882\n", + "9 -0.113551 -0.444009 ... 1.070177 -1.730744\n", + "\n", + "[10 rows x 40 columns]" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 383 + }, + "id": "LKK6fIznTzpy", + "outputId": "76b1e212-a88e-4397-da4c-27bae9acbff8" + }, + "source": [ + "def encode_units(x):\n", + " if x <= 0 :\n", + " return 0\n", + " if x >= 0 :\n", + " return 1\n", + "\n", + "train_df = sc_df.applymap(encode_units)\n", + "\n", + "train_df.head(n=10)" + ], + "execution_count": 6, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>duration</th>\n", + " <th>protocol_type</th>\n", + " <th>service</th>\n", + " <th>flag</th>\n", + " <th>src_bytes</th>\n", + " <th>dst_bytes</th>\n", + " <th>land</th>\n", + " <th>wrong_fragment</th>\n", + " <th>hot</th>\n", + " <th>num_failed_logins</th>\n", + " <th>logged_in</th>\n", + " <th>num_compromised</th>\n", + " <th>root_shell</th>\n", + " <th>su_attempted</th>\n", + " <th>num_root</th>\n", + " <th>num_file_creations</th>\n", + " <th>num_shells</th>\n", + " <th>num_access_files</th>\n", + " <th>is_guest_login</th>\n", + " <th>count</th>\n", + " <th>srv_count</th>\n", + " <th>serror_rate</th>\n", + " <th>srv_serror_rate</th>\n", + " <th>rerror_rate</th>\n", + " <th>srv_rerror_rate</th>\n", + " <th>same_srv_rate</th>\n", + " <th>diff_srv_rate</th>\n", + " <th>srv_diff_host_rate</th>\n", + " <th>dst_host_count</th>\n", + " <th>dst_host_srv_count</th>\n", + " <th>dst_host_same_srv_rate</th>\n", + " <th>dst_host_diff_srv_rate</th>\n", + " <th>dst_host_same_src_port_rate</th>\n", + " <th>dst_host_srv_diff_host_rate</th>\n", + " <th>dst_host_serror_rate</th>\n", + " <th>dst_host_srv_serror_rate</th>\n", + " <th>dst_host_rerror_rate</th>\n", + " <th>dst_host_srv_rerror_rate</th>\n", + " <th>class</th>\n", + " <th>index_num</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " duration protocol_type service ... dst_host_srv_rerror_rate class index_num\n", + "0 0 0 0 ... 0 0 0\n", + "1 0 1 1 ... 0 0 0\n", + "2 0 0 0 ... 0 1 0\n", + "3 0 0 1 ... 0 0 0\n", + "4 0 0 1 ... 0 0 0\n", + "5 0 0 0 ... 1 1 0\n", + "6 0 0 0 ... 0 1 0\n", + "7 0 0 0 ... 0 1 0\n", + "8 0 0 1 ... 0 1 0\n", + "9 0 0 0 ... 0 1 0\n", + "\n", + "[10 rows x 40 columns]" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JmfJO9mn9_Te", + "outputId": "9c34b569-33d6-470a-f9fe-9d35b40f05ac" + }, + "source": [ + "data_X = train_df.drop(columns = [\"index_num\"])\n", + "\n", + "X_train, X_test = train_test_split(data_X, test_size=0.33, random_state=42)\n", + "print(X_train.shape, X_test.shape)" + ], + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(16878, 39) (8314, 39)\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 226 + }, + "id": "FAqOwB0oVeAK", + "outputId": "77753fc9-2e55-4810-97e7-4be2d0491ede" + }, + "source": [ + "df = pd.DataFrame(X_train, columns=data.drop(columns = [\"index_num\"]).columns)\n", + "\n", + "df.head()" + ], + "execution_count": 8, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>duration</th>\n", + " <th>protocol_type</th>\n", + " <th>service</th>\n", + " <th>flag</th>\n", + " <th>src_bytes</th>\n", + " <th>dst_bytes</th>\n", + " <th>land</th>\n", + " <th>wrong_fragment</th>\n", + " <th>hot</th>\n", + " <th>num_failed_logins</th>\n", + " <th>logged_in</th>\n", + " <th>num_compromised</th>\n", + " <th>root_shell</th>\n", + " <th>su_attempted</th>\n", + " <th>num_root</th>\n", + " <th>num_file_creations</th>\n", + " <th>num_shells</th>\n", + " <th>num_access_files</th>\n", + " <th>is_guest_login</th>\n", + " <th>count</th>\n", + " <th>srv_count</th>\n", + " <th>serror_rate</th>\n", + " <th>srv_serror_rate</th>\n", + " <th>rerror_rate</th>\n", + " <th>srv_rerror_rate</th>\n", + " <th>same_srv_rate</th>\n", + " <th>diff_srv_rate</th>\n", + " <th>srv_diff_host_rate</th>\n", + " <th>dst_host_count</th>\n", + " <th>dst_host_srv_count</th>\n", + " <th>dst_host_same_srv_rate</th>\n", + " <th>dst_host_diff_srv_rate</th>\n", + " <th>dst_host_same_src_port_rate</th>\n", + " <th>dst_host_srv_diff_host_rate</th>\n", + " <th>dst_host_serror_rate</th>\n", + " <th>dst_host_srv_serror_rate</th>\n", + " <th>dst_host_rerror_rate</th>\n", + " <th>dst_host_srv_rerror_rate</th>\n", + " <th>class</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>14666</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10743</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2487</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>21251</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7387</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " duration protocol_type ... dst_host_srv_rerror_rate class\n", + "14666 0 0 ... 0 1\n", + "10743 0 0 ... 0 0\n", + "2487 0 0 ... 0 1\n", + "21251 0 0 ... 0 1\n", + "7387 0 1 ... 0 0\n", + "\n", + "[5 rows x 39 columns]" + ] + }, + "metadata": {}, + "execution_count": 8 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 424 + }, + "id": "71bCJO3_-Nrz", + "outputId": "d7f6b79a-7144-473e-f576-aeeb523d856e" + }, + "source": [ + "frequent_itemsets = apriori( df, min_support = 0.01, use_colnames=True, max_len =2)\n", + "result_desc = frequent_itemsets.sort_values(['support'],ascending =[False])\n", + "result_desc" + ], + "execution_count": 9, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>support</th>\n", + " <th>itemsets</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>18</th>\n", + " <td>0.642612</td>\n", + " <td>(dst_host_count)</td>\n", + " </tr>\n", + " <tr>\n", + " <th>15</th>\n", + " <td>0.621519</td>\n", + " <td>(same_srv_rate)</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>0.619327</td>\n", + " <td>(service)</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>0.610973</td>\n", + " <td>(flag)</td>\n", + " </tr>\n", + " <tr>\n", + " <th>90</th>\n", + " <td>0.566833</td>\n", + " <td>(flag, same_srv_rate)</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>112</th>\n", + " <td>0.010724</td>\n", + " <td>(dst_bytes, dst_host_same_src_port_rate)</td>\n", + " </tr>\n", + " <tr>\n", + " <th>129</th>\n", + " <td>0.010606</td>\n", + " <td>(num_compromised, same_srv_rate)</td>\n", + " </tr>\n", + " <tr>\n", + " <th>38</th>\n", + " <td>0.010487</td>\n", + " <td>(dst_host_rerror_rate, duration)</td>\n", + " </tr>\n", + " <tr>\n", + " <th>85</th>\n", + " <td>0.010191</td>\n", + " <td>(num_compromised, flag)</td>\n", + " </tr>\n", + " <tr>\n", + " <th>40</th>\n", + " <td>0.010013</td>\n", + " <td>(class, duration)</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>259 rows × 2 columns</p>\n", + "</div>" + ], + "text/plain": [ + " support itemsets\n", + "18 0.642612 (dst_host_count)\n", + "15 0.621519 (same_srv_rate)\n", + "2 0.619327 (service)\n", + "3 0.610973 (flag)\n", + "90 0.566833 (flag, same_srv_rate)\n", + ".. ... ...\n", + "112 0.010724 (dst_bytes, dst_host_same_src_port_rate)\n", + "129 0.010606 (num_compromised, same_srv_rate)\n", + "38 0.010487 (dst_host_rerror_rate, duration)\n", + "85 0.010191 (num_compromised, flag)\n", + "40 0.010013 (class, duration)\n", + "\n", + "[259 rows x 2 columns]" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 424 + }, + "id": "DTT1_SWX-btw", + "outputId": "3c61a887-5192-497b-aaf2-df0bf26e3f56" + }, + "source": [ + "rules = association_rules(result_desc , metric = \"confidence\" , min_threshold = 0.90)\n", + "rules = rules.sort_values(['confidence','lift'], ascending=[False , False])\n", + "rules" + ], + "execution_count": 10, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>antecedents</th>\n", + " <th>consequents</th>\n", + " <th>antecedent support</th>\n", + " <th>consequent support</th>\n", + " <th>support</th>\n", + " <th>confidence</th>\n", + " <th>lift</th>\n", + " <th>leverage</th>\n", + " <th>conviction</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>61</th>\n", + " <td>(num_compromised)</td>\n", + " <td>(logged_in)</td>\n", + " <td>0.010724</td>\n", + " <td>0.392464</td>\n", + " <td>0.010724</td>\n", + " <td>1.000000</td>\n", + " <td>2.548007</td>\n", + " <td>0.006515</td>\n", + " <td>inf</td>\n", + " </tr>\n", + " <tr>\n", + " <th>31</th>\n", + " <td>(protocol_type)</td>\n", + " <td>(flag)</td>\n", + " <td>0.186515</td>\n", + " <td>0.610973</td>\n", + " <td>0.186515</td>\n", + " <td>1.000000</td>\n", + " <td>1.636734</td>\n", + " <td>0.072559</td>\n", + " <td>inf</td>\n", + " </tr>\n", + " <tr>\n", + " <th>60</th>\n", + " <td>(num_compromised)</td>\n", + " <td>(service)</td>\n", + " <td>0.010724</td>\n", + " <td>0.619327</td>\n", + " <td>0.010724</td>\n", + " <td>1.000000</td>\n", + " <td>1.614656</td>\n", + " <td>0.004082</td>\n", + " <td>inf</td>\n", + " </tr>\n", + " <tr>\n", + " <th>37</th>\n", + " <td>(srv_count)</td>\n", + " <td>(flag)</td>\n", + " <td>0.129814</td>\n", + " <td>0.610973</td>\n", + " <td>0.129281</td>\n", + " <td>0.995892</td>\n", + " <td>1.630011</td>\n", + " <td>0.049968</td>\n", + " <td>94.706495</td>\n", + " </tr>\n", + " <tr>\n", + " <th>11</th>\n", + " <td>(srv_serror_rate)</td>\n", + " <td>(serror_rate)</td>\n", + " <td>0.285164</td>\n", + " <td>0.287771</td>\n", + " <td>0.283920</td>\n", + " <td>0.995637</td>\n", + " <td>3.459823</td>\n", + " <td>0.201858</td>\n", + " <td>163.236089</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>(logged_in)</td>\n", + " <td>(service)</td>\n", + " <td>0.392464</td>\n", + " <td>0.619327</td>\n", + " <td>0.359403</td>\n", + " <td>0.915761</td>\n", + " <td>1.478639</td>\n", + " <td>0.116340</td>\n", + " <td>4.518958</td>\n", + " </tr>\n", + " <tr>\n", + " <th>53</th>\n", + " <td>(dst_bytes)</td>\n", + " <td>(dst_host_same_srv_rate)</td>\n", + " <td>0.097583</td>\n", + " <td>0.498163</td>\n", + " <td>0.089288</td>\n", + " <td>0.914997</td>\n", + " <td>1.836741</td>\n", + " <td>0.040676</td>\n", + " <td>5.903750</td>\n", + " </tr>\n", + " <tr>\n", + " <th>59</th>\n", + " <td>(src_bytes)</td>\n", + " <td>(flag)</td>\n", + " <td>0.012798</td>\n", + " <td>0.610973</td>\n", + " <td>0.011672</td>\n", + " <td>0.912037</td>\n", + " <td>1.492762</td>\n", + " <td>0.003853</td>\n", + " <td>4.422624</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>(same_srv_rate)</td>\n", + " <td>(flag)</td>\n", + " <td>0.621519</td>\n", + " <td>0.610973</td>\n", + " <td>0.566833</td>\n", + " <td>0.912011</td>\n", + " <td>1.492720</td>\n", + " <td>0.187101</td>\n", + " <td>4.421338</td>\n", + " </tr>\n", + " <tr>\n", + " <th>47</th>\n", + " <td>(srv_count)</td>\n", + " <td>(dst_host_same_srv_rate)</td>\n", + " <td>0.129814</td>\n", + " <td>0.498163</td>\n", + " <td>0.117194</td>\n", + " <td>0.902784</td>\n", + " <td>1.812225</td>\n", + " <td>0.052525</td>\n", + " <td>5.162086</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>64 rows × 9 columns</p>\n", + "</div>" + ], + "text/plain": [ + " antecedents consequents ... leverage conviction\n", + "61 (num_compromised) (logged_in) ... 0.006515 inf\n", + "31 (protocol_type) (flag) ... 0.072559 inf\n", + "60 (num_compromised) (service) ... 0.004082 inf\n", + "37 (srv_count) (flag) ... 0.049968 94.706495\n", + "11 (srv_serror_rate) (serror_rate) ... 0.201858 163.236089\n", + ".. ... ... ... ... ...\n", + "9 (logged_in) (service) ... 0.116340 4.518958\n", + "53 (dst_bytes) (dst_host_same_srv_rate) ... 0.040676 5.903750\n", + "59 (src_bytes) (flag) ... 0.003853 4.422624\n", + "1 (same_srv_rate) (flag) ... 0.187101 4.421338\n", + "47 (srv_count) (dst_host_same_srv_rate) ... 0.052525 5.162086\n", + "\n", + "[64 rows x 9 columns]" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 175 + }, + "id": "vy-AH96DXMYb", + "outputId": "990db3eb-ab88-4504-8c79-873f50c9a521" + }, + "source": [ + "rules[rules['consequents'] == {\"class\"}]" + ], + "execution_count": 11, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>antecedents</th>\n", + " <th>consequents</th>\n", + " <th>antecedent support</th>\n", + " <th>consequent support</th>\n", + " <th>support</th>\n", + " <th>confidence</th>\n", + " <th>lift</th>\n", + " <th>leverage</th>\n", + " <th>conviction</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>26</th>\n", + " <td>(dst_host_srv_serror_rate)</td>\n", + " <td>(class)</td>\n", + " <td>0.281017</td>\n", + " <td>0.467295</td>\n", + " <td>0.276277</td>\n", + " <td>0.983133</td>\n", + " <td>2.103882</td>\n", + " <td>0.144959</td>\n", + " <td>31.582765</td>\n", + " </tr>\n", + " <tr>\n", + " <th>20</th>\n", + " <td>(srv_serror_rate)</td>\n", + " <td>(class)</td>\n", + " <td>0.285164</td>\n", + " <td>0.467295</td>\n", + " <td>0.277047</td>\n", + " <td>0.971535</td>\n", + " <td>2.079064</td>\n", + " <td>0.143791</td>\n", + " <td>18.714676</td>\n", + " </tr>\n", + " <tr>\n", + " <th>17</th>\n", + " <td>(serror_rate)</td>\n", + " <td>(class)</td>\n", + " <td>0.287771</td>\n", + " <td>0.467295</td>\n", + " <td>0.277580</td>\n", + " <td>0.964587</td>\n", + " <td>2.064195</td>\n", + " <td>0.143106</td>\n", + " <td>15.042730</td>\n", + " </tr>\n", + " <tr>\n", + " <th>25</th>\n", + " <td>(dst_host_serror_rate)</td>\n", + " <td>(class)</td>\n", + " <td>0.287060</td>\n", + " <td>0.467295</td>\n", + " <td>0.276514</td>\n", + " <td>0.963261</td>\n", + " <td>2.061357</td>\n", + " <td>0.142372</td>\n", + " <td>14.499759</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " antecedents consequents ... leverage conviction\n", + "26 (dst_host_srv_serror_rate) (class) ... 0.144959 31.582765\n", + "20 (srv_serror_rate) (class) ... 0.143791 18.714676\n", + "17 (serror_rate) (class) ... 0.143106 15.042730\n", + "25 (dst_host_serror_rate) (class) ... 0.142372 14.499759\n", + "\n", + "[4 rows x 9 columns]" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 383 + }, + "id": "9jVN92COXRsJ", + "outputId": "b1a430b7-2403-409f-d7ff-50e55fa0b815" + }, + "source": [ + "test = pd.DataFrame(X_test, columns=data.drop(columns = [\"index_num\"]).columns)\n", + "test.head(n=10)" + ], + "execution_count": 12, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>duration</th>\n", + " <th>protocol_type</th>\n", + " <th>service</th>\n", + " <th>flag</th>\n", + " <th>src_bytes</th>\n", + " <th>dst_bytes</th>\n", + " <th>land</th>\n", + " <th>wrong_fragment</th>\n", + " <th>hot</th>\n", + " <th>num_failed_logins</th>\n", + " <th>logged_in</th>\n", + " <th>num_compromised</th>\n", + " <th>root_shell</th>\n", + " <th>su_attempted</th>\n", + " <th>num_root</th>\n", + " <th>num_file_creations</th>\n", + " <th>num_shells</th>\n", + " <th>num_access_files</th>\n", + " <th>is_guest_login</th>\n", + " <th>count</th>\n", + " <th>srv_count</th>\n", + " <th>serror_rate</th>\n", + " <th>srv_serror_rate</th>\n", + " <th>rerror_rate</th>\n", + " <th>srv_rerror_rate</th>\n", + " <th>same_srv_rate</th>\n", + " <th>diff_srv_rate</th>\n", + " <th>srv_diff_host_rate</th>\n", + " <th>dst_host_count</th>\n", + " <th>dst_host_srv_count</th>\n", + " <th>dst_host_same_srv_rate</th>\n", + " <th>dst_host_diff_srv_rate</th>\n", + " <th>dst_host_same_src_port_rate</th>\n", + " <th>dst_host_srv_diff_host_rate</th>\n", + " <th>dst_host_serror_rate</th>\n", + " <th>dst_host_srv_serror_rate</th>\n", + " <th>dst_host_rerror_rate</th>\n", + " <th>dst_host_srv_rerror_rate</th>\n", + " <th>class</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>19064</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>11127</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6517</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2973</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>13339</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>19289</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2166</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5548</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10887</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2222</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " duration protocol_type ... dst_host_srv_rerror_rate class\n", + "19064 0 1 ... 0 1\n", + "11127 0 0 ... 1 0\n", + "6517 0 0 ... 0 1\n", + "2973 0 1 ... 0 0\n", + "13339 0 0 ... 0 1\n", + "19289 0 0 ... 0 0\n", + "2166 0 0 ... 0 0\n", + "5548 0 0 ... 0 0\n", + "10887 0 0 ... 0 0\n", + "2222 0 0 ... 0 1\n", + "\n", + "[10 rows x 39 columns]" + ] + }, + "metadata": {}, + "execution_count": 12 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "hBLP3xt-ulXS" + }, + "source": [ + "col = ['dst_host_srv_serror_rate', 'srv_serror_rate', 'serror_rate', 'dst_host_serror_rate']" + ], + "execution_count": 13, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "fEZdTRhXtTG4", + "outputId": "5fca2953-7360-4cf8-82c1-5baab9a49c21" + }, + "source": [ + "idx_1 = test[test['dst_host_srv_serror_rate'] == 0 ].index\n", + "test_df = test.drop(idx_1)\n", + "\n", + "idx_class = test_df[test_df['class'] == 0 ].index\n", + "test_err = test_df.drop(idx_class)\n", + "\n", + "print(test_df.shape)\n", + "print(test_err.shape)\n" + ], + "execution_count": 14, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(2348, 39)\n", + "(2317, 39)\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "IoeXwrG28q42", + "outputId": "e4f64606-53ec-428a-9e9f-772ac40fe21b" + }, + "source": [ + "idx_2 = test[test['srv_serror_rate'] == 0 ].index\n", + "test_df = test.drop(idx_2)\n", + "\n", + "idx_class = test_df[test_df['class'] == 0 ].index\n", + "test_err = test_df.drop(idx_class)\n", + "\n", + "print(test_df.shape)\n", + "print(test_err.shape)" + ], + "execution_count": 15, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(2403, 39)\n", + "(2320, 39)\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NcRtGdgi-oFU", + "outputId": "7735a532-0de5-4f37-90b8-a83ef15d7b46" + }, + "source": [ + "idx_3 = test[test['serror_rate'] == 0 ].index\n", + "test_df = test.drop(idx_3)\n", + "\n", + "idx_class = test_df[test_df['class'] == 0 ].index\n", + "test_err = test_df.drop(idx_class)\n", + "\n", + "print(test_df.shape)\n", + "print(test_err.shape)" + ], + "execution_count": 16, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(2412, 39)\n", + "(2323, 39)\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "BJh3SoYy-1bm", + "outputId": "7520531c-4624-4125-86ff-41bc4a119313" + }, + "source": [ + "idx_4 = test[test['dst_host_serror_rate'] == 0 ].index\n", + "test_df = test.drop(idx_4)\n", + "\n", + "idx_class = test_df[test_df['class'] == 0 ].index\n", + "test_err = test_df.drop(idx_class)\n", + "\n", + "print(test_df.shape)\n", + "print(test_err.shape)" + ], + "execution_count": 17, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(2398, 39)\n", + "(2316, 39)\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "RWB6lAq0_GWg", + "outputId": "7b2bf9cb-1e23-4180-bf21-70dd20ea2986" + }, + "source": [ + "idx_a = test[(test['dst_host_srv_serror_rate'] == 0) & (test['srv_serror_rate'] == 0) & (test['serror_rate'] == 0) & (test['dst_host_serror_rate'] == 0)].index\n", + "test_df = test.drop(idx_a)\n", + "\n", + "idx_class = test_df[test_df['class'] == 0 ].index\n", + "test_err = test_df.drop(idx_class)\n", + "\n", + "print(test_df.shape)\n", + "print(test_err.shape)\n" + ], + "execution_count": 18, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(2499, 39)\n", + "(2338, 39)\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vCNJIYoeD1sO", + "outputId": "7f323bf9-e7c9-4c8b-9ba9-44110c1ebd2f" + }, + "source": [ + "idx_b = test[(test['dst_host_srv_serror_rate'] == 0) | (test['srv_serror_rate'] == 0) | (test['serror_rate'] == 0) | (test['dst_host_serror_rate'] == 0)].index\n", + "test_df = test.drop(idx_b)\n", + "\n", + "idx_class = test_df[test_df['class'] == 0 ].index\n", + "test_err = test_df.drop(idx_class)\n", + "\n", + "print(test_df.shape)\n", + "print(test_err.shape)" + ], + "execution_count": 19, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(2301, 39)\n", + "(2300, 39)\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VdYEcIpkHm_R" + }, + "source": [ + "*Apriori test accuracy = 97.03%*" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "3IJIJs3YHyrO" + }, + "source": [ + "" + ], + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file