diff --git a/experiment/a-priori/[DM]Apriori.ipynb b/experiment/a-priori/[DM]Apriori.ipynb deleted file mode 100644 index 799d177003e0562309a916f671f350c5769fb949..0000000000000000000000000000000000000000 --- a/experiment/a-priori/[DM]Apriori.ipynb +++ /dev/null @@ -1,2754 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "[DM]Apriori.ipynb", - "provenance": [], - "collapsed_sections": [], - "authorship_tag": "ABX9TyMXLQwD/DRSd8r6ijM5QcXc", - "include_colab_link": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "view-in-github", - "colab_type": "text" - }, - "source": [ - "<a href=\"https://colab.research.google.com/github/lani009/IDS-DataMining/blob/main/%5BDM%5DApriori.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "HAY_lKeo6NUE" - }, - "source": [ - "import os\n", - "import pandas as pd\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "import time" - ], - "execution_count": 1, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "J02wdPhK76Yc", - "outputId": "09b1579c-618b-4489-fc8a-5a4fa25e7c48" - }, - "source": [ - "data = pd.read_csv('DM_data.csv')\n", - "data.info()" - ], - "execution_count": 2, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "<class 'pandas.core.frame.DataFrame'>\n", - "RangeIndex: 25192 entries, 0 to 25191\n", - "Data columns (total 40 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 duration 25192 non-null int64 \n", - " 1 protocol_type 25192 non-null int64 \n", - " 2 service 25192 non-null int64 \n", - " 3 flag 25192 non-null int64 \n", - " 4 src_bytes 25192 non-null int64 \n", - " 5 dst_bytes 25192 non-null int64 \n", - " 6 land 25192 non-null int64 \n", - " 7 wrong_fragment 25192 non-null int64 \n", - " 8 hot 25192 non-null int64 \n", - " 9 num_failed_logins 25192 non-null int64 \n", - " 10 logged_in 25192 non-null int64 \n", - " 11 num_compromised 25192 non-null int64 \n", - " 12 root_shell 25192 non-null int64 \n", - " 13 su_attempted 25192 non-null int64 \n", - " 14 num_root 25192 non-null int64 \n", - " 15 num_file_creations 25192 non-null int64 \n", - " 16 num_shells 25192 non-null int64 \n", - " 17 num_access_files 25192 non-null int64 \n", - " 18 is_guest_login 25192 non-null int64 \n", - " 19 count 25192 non-null int64 \n", - " 20 srv_count 25192 non-null int64 \n", - " 21 serror_rate 25192 non-null float64\n", - " 22 srv_serror_rate 25192 non-null float64\n", - " 23 rerror_rate 25192 non-null float64\n", - " 24 srv_rerror_rate 25192 non-null float64\n", - " 25 same_srv_rate 25192 non-null float64\n", - " 26 diff_srv_rate 25192 non-null float64\n", - " 27 srv_diff_host_rate 25192 non-null float64\n", - " 28 dst_host_count 25192 non-null int64 \n", - " 29 dst_host_srv_count 25192 non-null int64 \n", - " 30 dst_host_same_srv_rate 25192 non-null float64\n", - " 31 dst_host_diff_srv_rate 25192 non-null float64\n", - " 32 dst_host_same_src_port_rate 25192 non-null float64\n", - " 33 dst_host_srv_diff_host_rate 25192 non-null float64\n", - " 34 dst_host_serror_rate 25192 non-null float64\n", - " 35 dst_host_srv_serror_rate 25192 non-null float64\n", - " 36 dst_host_rerror_rate 25192 non-null float64\n", - " 37 dst_host_srv_rerror_rate 25192 non-null float64\n", - " 38 class 25192 non-null int64 \n", - " 39 index_num 25192 non-null int64 \n", - "dtypes: float64(15), int64(25)\n", - "memory usage: 7.7 MB\n" - ] - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "phplztW08CAV" - }, - "source": [ - "from sklearn.model_selection import train_test_split\n", - "from sklearn.preprocessing import MinMaxScaler, StandardScaler" - ], - "execution_count": 3, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "rWLrmiHs86KH" - }, - "source": [ - "from mlxtend.frequent_patterns import apriori,association_rules" - ], - "execution_count": 4, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 383 - }, - "id": "KpeVfpxYTAHF", - "outputId": "36c95e16-0050-40ee-f4a0-551c9a793eef" - }, - "source": [ - "sc = StandardScaler()\n", - "sc_data = sc.fit_transform(data)\n", - "\n", - "sc_df = pd.DataFrame(sc_data, columns=data.columns)\n", - "sc_df.head(n=10)" - ], - "execution_count": 5, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>duration</th>\n", - " <th>protocol_type</th>\n", - " <th>service</th>\n", - " <th>flag</th>\n", - " <th>src_bytes</th>\n", - " <th>dst_bytes</th>\n", - " <th>land</th>\n", - " <th>wrong_fragment</th>\n", - " <th>hot</th>\n", - " <th>num_failed_logins</th>\n", - " <th>logged_in</th>\n", - " <th>num_compromised</th>\n", - " <th>root_shell</th>\n", - " <th>su_attempted</th>\n", - " <th>num_root</th>\n", - " <th>num_file_creations</th>\n", - " <th>num_shells</th>\n", - " <th>num_access_files</th>\n", - " <th>is_guest_login</th>\n", - " <th>count</th>\n", - " <th>srv_count</th>\n", - " <th>serror_rate</th>\n", - " <th>srv_serror_rate</th>\n", - " <th>rerror_rate</th>\n", - " <th>srv_rerror_rate</th>\n", - " <th>same_srv_rate</th>\n", - " <th>diff_srv_rate</th>\n", - " <th>srv_diff_host_rate</th>\n", - " <th>dst_host_count</th>\n", - " <th>dst_host_srv_count</th>\n", - " <th>dst_host_same_srv_rate</th>\n", - " <th>dst_host_diff_srv_rate</th>\n", - " <th>dst_host_same_src_port_rate</th>\n", - " <th>dst_host_srv_diff_host_rate</th>\n", - " <th>dst_host_serror_rate</th>\n", - " <th>dst_host_srv_serror_rate</th>\n", - " <th>dst_host_rerror_rate</th>\n", - " <th>dst_host_srv_rerror_rate</th>\n", - " <th>class</th>\n", - " <th>index_num</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>-0.113551</td>\n", - " <td>-0.444009</td>\n", - " <td>-1.399448</td>\n", - " <td>0.744553</td>\n", - " <td>-0.009889</td>\n", - " <td>-0.039310</td>\n", - " <td>-0.00891</td>\n", - " <td>-0.091223</td>\n", - " <td>-0.091933</td>\n", - " <td>-0.02622</td>\n", - " <td>-0.807626</td>\n", - " <td>-0.021873</td>\n", - " <td>-0.039377</td>\n", - " <td>-0.027665</td>\n", - " <td>-0.021724</td>\n", - " <td>-0.027808</td>\n", - " <td>-0.018905</td>\n", - " <td>-0.043917</td>\n", - " <td>-0.09599</td>\n", - " <td>-0.720244</td>\n", - " <td>-0.354628</td>\n", - " <td>-0.640142</td>\n", - " <td>-0.633978</td>\n", - " <td>-0.372186</td>\n", - " <td>-0.373098</td>\n", - " <td>0.772109</td>\n", - " <td>-0.349282</td>\n", - " <td>-0.373886</td>\n", - " <td>-0.328634</td>\n", - " <td>-0.813985</td>\n", - " <td>-0.779157</td>\n", - " <td>-0.280673</td>\n", - " <td>0.073120</td>\n", - " <td>-0.287993</td>\n", - " <td>-0.641804</td>\n", - " <td>-0.627365</td>\n", - " <td>-0.221668</td>\n", - " <td>-0.374281</td>\n", - " <td>-0.934425</td>\n", - " <td>-1.731982</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>-0.113551</td>\n", - " <td>1.325565</td>\n", - " <td>0.780883</td>\n", - " <td>0.744553</td>\n", - " <td>-0.010032</td>\n", - " <td>-0.039310</td>\n", - " <td>-0.00891</td>\n", - " <td>-0.091223</td>\n", - " <td>-0.091933</td>\n", - " <td>-0.02622</td>\n", - " <td>-0.807626</td>\n", - " <td>-0.021873</td>\n", - " <td>-0.039377</td>\n", - " <td>-0.027665</td>\n", - " <td>-0.021724</td>\n", - " <td>-0.027808</td>\n", - " <td>-0.018905</td>\n", - " <td>-0.043917</td>\n", - " <td>-0.09599</td>\n", - " <td>-0.624317</td>\n", - " <td>-0.368427</td>\n", - " <td>-0.640142</td>\n", - " <td>-0.633978</td>\n", - " <td>-0.372186</td>\n", - " <td>-0.373098</td>\n", - " <td>-1.320567</td>\n", - " <td>0.490836</td>\n", - " <td>-0.373886</td>\n", - " <td>0.732059</td>\n", - " <td>-1.030895</td>\n", - " <td>-1.157831</td>\n", - " <td>2.764403</td>\n", - " <td>2.375620</td>\n", - " <td>-0.287993</td>\n", - " <td>-0.641804</td>\n", - " <td>-0.627365</td>\n", - " <td>-0.385140</td>\n", - " <td>-0.374281</td>\n", - " <td>-0.934425</td>\n", - " <td>-1.731845</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>-0.113551</td>\n", - " <td>-0.444009</td>\n", - " <td>-1.377199</td>\n", - " <td>-0.917300</td>\n", - " <td>-0.010093</td>\n", - " <td>-0.039310</td>\n", - " <td>-0.00891</td>\n", - " <td>-0.091223</td>\n", - " <td>-0.091933</td>\n", - " <td>-0.02622</td>\n", - " <td>-0.807626</td>\n", - " <td>-0.021873</td>\n", - " <td>-0.039377</td>\n", - " <td>-0.027665</td>\n", - " <td>-0.021724</td>\n", - " <td>-0.027808</td>\n", - " <td>-0.018905</td>\n", - " <td>-0.043917</td>\n", - " <td>-0.09599</td>\n", - " <td>0.334947</td>\n", - " <td>-0.299430</td>\n", - " <td>1.595477</td>\n", - " <td>1.600209</td>\n", - " <td>-0.372186</td>\n", - " <td>-0.373098</td>\n", - " <td>-1.388806</td>\n", - " <td>0.042773</td>\n", - " <td>-0.373886</td>\n", - " <td>0.732059</td>\n", - " <td>-0.804947</td>\n", - " <td>-0.935081</td>\n", - " <td>-0.173828</td>\n", - " <td>-0.478183</td>\n", - " <td>-0.287993</td>\n", - " <td>1.603834</td>\n", - " <td>1.614454</td>\n", - " <td>-0.385140</td>\n", - " <td>-0.374281</td>\n", - " <td>1.070177</td>\n", - " <td>-1.731707</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>-0.113551</td>\n", - " <td>-0.444009</td>\n", - " <td>0.780883</td>\n", - " <td>0.744553</td>\n", - " <td>-0.009996</td>\n", - " <td>0.052473</td>\n", - " <td>-0.00891</td>\n", - " <td>-0.091223</td>\n", - " <td>-0.091933</td>\n", - " <td>-0.02622</td>\n", - " <td>1.238197</td>\n", - " <td>-0.021873</td>\n", - " <td>-0.039377</td>\n", - " <td>-0.027665</td>\n", - " <td>-0.021724</td>\n", - " <td>-0.027808</td>\n", - " <td>-0.018905</td>\n", - " <td>-0.043917</td>\n", - " <td>-0.09599</td>\n", - " <td>-0.694082</td>\n", - " <td>-0.313230</td>\n", - " <td>-0.193018</td>\n", - " <td>-0.187141</td>\n", - " <td>-0.372186</td>\n", - " <td>-0.373098</td>\n", - " <td>0.772109</td>\n", - " <td>-0.349282</td>\n", - " <td>-0.373886</td>\n", - " <td>-1.540854</td>\n", - " <td>1.264742</td>\n", - " <td>1.069663</td>\n", - " <td>-0.440940</td>\n", - " <td>-0.380894</td>\n", - " <td>0.073759</td>\n", - " <td>-0.574435</td>\n", - " <td>-0.604947</td>\n", - " <td>-0.385140</td>\n", - " <td>-0.342768</td>\n", - " <td>-0.934425</td>\n", - " <td>-1.731570</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>-0.113551</td>\n", - " <td>-0.444009</td>\n", - " <td>0.780883</td>\n", - " <td>0.744553</td>\n", - " <td>-0.010010</td>\n", - " <td>-0.034582</td>\n", - " <td>-0.00891</td>\n", - " <td>-0.091223</td>\n", - " <td>-0.091933</td>\n", - " <td>-0.02622</td>\n", - " <td>1.238197</td>\n", - " <td>-0.021873</td>\n", - " <td>-0.039377</td>\n", - " <td>-0.027665</td>\n", - " <td>-0.021724</td>\n", - " <td>-0.027808</td>\n", - " <td>-0.018905</td>\n", - " <td>-0.043917</td>\n", - " <td>-0.09599</td>\n", - " <td>-0.476067</td>\n", - " <td>0.059355</td>\n", - " <td>-0.640142</td>\n", - " <td>-0.633978</td>\n", - " <td>-0.372186</td>\n", - " <td>-0.373098</td>\n", - " <td>0.772109</td>\n", - " <td>-0.349282</td>\n", - " <td>-0.023115</td>\n", - " <td>0.732059</td>\n", - " <td>1.264742</td>\n", - " <td>1.069663</td>\n", - " <td>-0.440940</td>\n", - " <td>-0.478183</td>\n", - " <td>-0.287993</td>\n", - " <td>-0.641804</td>\n", - " <td>-0.627365</td>\n", - " <td>-0.385140</td>\n", - " <td>-0.374281</td>\n", - " <td>-0.934425</td>\n", - " <td>-1.731432</td>\n", - " </tr>\n", - " <tr>\n", - " <th>5</th>\n", - " <td>-0.113551</td>\n", - " <td>-0.444009</td>\n", - " <td>-1.377199</td>\n", - " <td>-2.025203</td>\n", - " <td>-0.010093</td>\n", - " <td>-0.039310</td>\n", - " <td>-0.00891</td>\n", - " <td>-0.091223</td>\n", - " <td>-0.091933</td>\n", - " <td>-0.02622</td>\n", - " <td>-0.807626</td>\n", - " <td>-0.021873</td>\n", - " <td>-0.039377</td>\n", - " <td>-0.027665</td>\n", - " <td>-0.021724</td>\n", - " <td>-0.027808</td>\n", - " <td>-0.018905</td>\n", - " <td>-0.043917</td>\n", - " <td>-0.09599</td>\n", - " <td>0.317506</td>\n", - " <td>-0.120038</td>\n", - " <td>-0.640142</td>\n", - " <td>-0.633978</td>\n", - " <td>2.765176</td>\n", - " <td>2.729322</td>\n", - " <td>-1.138595</td>\n", - " <td>-0.013235</td>\n", - " <td>-0.373886</td>\n", - " <td>0.732059</td>\n", - " <td>-0.868212</td>\n", - " <td>-1.001906</td>\n", - " <td>-0.066984</td>\n", - " <td>-0.478183</td>\n", - " <td>-0.287993</td>\n", - " <td>-0.641804</td>\n", - " <td>-0.627365</td>\n", - " <td>2.884296</td>\n", - " <td>2.777041</td>\n", - " <td>1.070177</td>\n", - " <td>-1.731295</td>\n", - " </tr>\n", - " <tr>\n", - " <th>6</th>\n", - " <td>-0.113551</td>\n", - " <td>-0.444009</td>\n", - " <td>-1.377199</td>\n", - " <td>-0.917300</td>\n", - " <td>-0.010093</td>\n", - " <td>-0.039310</td>\n", - " <td>-0.00891</td>\n", - " <td>-0.091223</td>\n", - " <td>-0.091933</td>\n", - " <td>-0.02622</td>\n", - " <td>-0.807626</td>\n", - " <td>-0.021873</td>\n", - " <td>-0.039377</td>\n", - " <td>-0.027665</td>\n", - " <td>-0.021724</td>\n", - " <td>-0.027808</td>\n", - " <td>-0.018905</td>\n", - " <td>-0.043917</td>\n", - " <td>-0.09599</td>\n", - " <td>0.709933</td>\n", - " <td>-0.258032</td>\n", - " <td>1.595477</td>\n", - " <td>1.600209</td>\n", - " <td>-0.372186</td>\n", - " <td>-0.373098</td>\n", - " <td>-1.388806</td>\n", - " <td>-0.013235</td>\n", - " <td>-0.373886</td>\n", - " <td>0.732059</td>\n", - " <td>-0.958592</td>\n", - " <td>-1.068731</td>\n", - " <td>-0.173828</td>\n", - " <td>-0.478183</td>\n", - " <td>-0.287993</td>\n", - " <td>1.603834</td>\n", - " <td>1.614454</td>\n", - " <td>-0.385140</td>\n", - " <td>-0.374281</td>\n", - " <td>1.070177</td>\n", - " <td>-1.731157</td>\n", - " </tr>\n", - " <tr>\n", - " <th>7</th>\n", - " <td>-0.113551</td>\n", - " <td>-0.444009</td>\n", - " <td>-1.377199</td>\n", - " <td>-0.917300</td>\n", - " <td>-0.010093</td>\n", - " <td>-0.039310</td>\n", - " <td>-0.00891</td>\n", - " <td>-0.091223</td>\n", - " <td>-0.091933</td>\n", - " <td>-0.02622</td>\n", - " <td>-0.807626</td>\n", - " <td>-0.021873</td>\n", - " <td>-0.039377</td>\n", - " <td>-0.027665</td>\n", - " <td>-0.021724</td>\n", - " <td>-0.027808</td>\n", - " <td>-0.018905</td>\n", - " <td>-0.043917</td>\n", - " <td>-0.09599</td>\n", - " <td>0.282624</td>\n", - " <td>-0.161436</td>\n", - " <td>1.595477</td>\n", - " <td>1.600209</td>\n", - " <td>-0.372186</td>\n", - " <td>-0.373098</td>\n", - " <td>-1.184088</td>\n", - " <td>-0.013235</td>\n", - " <td>-0.373886</td>\n", - " <td>0.732059</td>\n", - " <td>-0.904364</td>\n", - " <td>-1.024181</td>\n", - " <td>-0.066984</td>\n", - " <td>-0.478183</td>\n", - " <td>-0.287993</td>\n", - " <td>1.603834</td>\n", - " <td>1.614454</td>\n", - " <td>-0.385140</td>\n", - " <td>-0.374281</td>\n", - " <td>1.070177</td>\n", - " <td>-1.731019</td>\n", - " </tr>\n", - " <tr>\n", - " <th>8</th>\n", - " <td>-0.113551</td>\n", - " <td>-0.444009</td>\n", - " <td>0.780883</td>\n", - " <td>-0.917300</td>\n", - " <td>-0.010093</td>\n", - " <td>-0.039310</td>\n", - " <td>-0.00891</td>\n", - " <td>-0.091223</td>\n", - " <td>-0.091933</td>\n", - " <td>-0.02622</td>\n", - " <td>-0.807626</td>\n", - " <td>-0.021873</td>\n", - " <td>-0.039377</td>\n", - " <td>-0.027665</td>\n", - " <td>-0.021724</td>\n", - " <td>-0.027808</td>\n", - " <td>-0.018905</td>\n", - " <td>-0.043917</td>\n", - " <td>-0.09599</td>\n", - " <td>1.616874</td>\n", - " <td>-0.064840</td>\n", - " <td>1.595477</td>\n", - " <td>1.600209</td>\n", - " <td>-0.372186</td>\n", - " <td>-0.373098</td>\n", - " <td>-1.297820</td>\n", - " <td>-0.069243</td>\n", - " <td>-0.373886</td>\n", - " <td>0.732059</td>\n", - " <td>-0.832060</td>\n", - " <td>-0.957356</td>\n", - " <td>-0.173828</td>\n", - " <td>-0.478183</td>\n", - " <td>-0.287993</td>\n", - " <td>1.603834</td>\n", - " <td>1.614454</td>\n", - " <td>-0.385140</td>\n", - " <td>-0.374281</td>\n", - " <td>1.070177</td>\n", - " <td>-1.730882</td>\n", - " </tr>\n", - " <tr>\n", - " <th>9</th>\n", - " <td>-0.113551</td>\n", - " <td>-0.444009</td>\n", - " <td>-1.377199</td>\n", - " <td>-0.917300</td>\n", - " <td>-0.010093</td>\n", - " <td>-0.039310</td>\n", - " <td>-0.00891</td>\n", - " <td>-0.091223</td>\n", - " <td>-0.091933</td>\n", - " <td>-0.02622</td>\n", - " <td>-0.807626</td>\n", - " <td>-0.021873</td>\n", - " <td>-0.039377</td>\n", - " <td>-0.027665</td>\n", - " <td>-0.021724</td>\n", - " <td>-0.027808</td>\n", - " <td>-0.018905</td>\n", - " <td>-0.043917</td>\n", - " <td>-0.09599</td>\n", - " <td>0.422153</td>\n", - " <td>-0.271831</td>\n", - " <td>1.595477</td>\n", - " <td>1.600209</td>\n", - " <td>-0.372186</td>\n", - " <td>-0.373098</td>\n", - " <td>-1.366060</td>\n", - " <td>-0.013235</td>\n", - " <td>-0.373886</td>\n", - " <td>0.732059</td>\n", - " <td>-0.922440</td>\n", - " <td>-1.046456</td>\n", - " <td>-0.120406</td>\n", - " <td>-0.478183</td>\n", - " <td>-0.287993</td>\n", - " <td>1.603834</td>\n", - " <td>1.614454</td>\n", - " <td>-0.385140</td>\n", - " <td>-0.374281</td>\n", - " <td>1.070177</td>\n", - " <td>-1.730744</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " duration protocol_type ... class index_num\n", - "0 -0.113551 -0.444009 ... -0.934425 -1.731982\n", - "1 -0.113551 1.325565 ... -0.934425 -1.731845\n", - "2 -0.113551 -0.444009 ... 1.070177 -1.731707\n", - "3 -0.113551 -0.444009 ... -0.934425 -1.731570\n", - "4 -0.113551 -0.444009 ... -0.934425 -1.731432\n", - "5 -0.113551 -0.444009 ... 1.070177 -1.731295\n", - "6 -0.113551 -0.444009 ... 1.070177 -1.731157\n", - "7 -0.113551 -0.444009 ... 1.070177 -1.731019\n", - "8 -0.113551 -0.444009 ... 1.070177 -1.730882\n", - "9 -0.113551 -0.444009 ... 1.070177 -1.730744\n", - "\n", - "[10 rows x 40 columns]" - ] - }, - "metadata": {}, - "execution_count": 5 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 383 - }, - "id": "LKK6fIznTzpy", - "outputId": "76b1e212-a88e-4397-da4c-27bae9acbff8" - }, - "source": [ - "def encode_units(x):\n", - " if x <= 0 :\n", - " return 0\n", - " if x >= 0 :\n", - " return 1\n", - "\n", - "train_df = sc_df.applymap(encode_units)\n", - "\n", - "train_df.head(n=10)" - ], - "execution_count": 6, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>duration</th>\n", - " <th>protocol_type</th>\n", - " <th>service</th>\n", - " <th>flag</th>\n", - " <th>src_bytes</th>\n", - " <th>dst_bytes</th>\n", - " <th>land</th>\n", - " <th>wrong_fragment</th>\n", - " <th>hot</th>\n", - " <th>num_failed_logins</th>\n", - " <th>logged_in</th>\n", - " <th>num_compromised</th>\n", - " <th>root_shell</th>\n", - " <th>su_attempted</th>\n", - " <th>num_root</th>\n", - " <th>num_file_creations</th>\n", - " <th>num_shells</th>\n", - " <th>num_access_files</th>\n", - " <th>is_guest_login</th>\n", - " <th>count</th>\n", - " <th>srv_count</th>\n", - " <th>serror_rate</th>\n", - " <th>srv_serror_rate</th>\n", - " <th>rerror_rate</th>\n", - " <th>srv_rerror_rate</th>\n", - " <th>same_srv_rate</th>\n", - " <th>diff_srv_rate</th>\n", - " <th>srv_diff_host_rate</th>\n", - " <th>dst_host_count</th>\n", - " <th>dst_host_srv_count</th>\n", - " <th>dst_host_same_srv_rate</th>\n", - " <th>dst_host_diff_srv_rate</th>\n", - " <th>dst_host_same_src_port_rate</th>\n", - " <th>dst_host_srv_diff_host_rate</th>\n", - " <th>dst_host_serror_rate</th>\n", - " <th>dst_host_srv_serror_rate</th>\n", - " <th>dst_host_rerror_rate</th>\n", - " <th>dst_host_srv_rerror_rate</th>\n", - " <th>class</th>\n", - " <th>index_num</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>5</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>6</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>7</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>8</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>9</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " duration protocol_type service ... dst_host_srv_rerror_rate class index_num\n", - "0 0 0 0 ... 0 0 0\n", - "1 0 1 1 ... 0 0 0\n", - "2 0 0 0 ... 0 1 0\n", - "3 0 0 1 ... 0 0 0\n", - "4 0 0 1 ... 0 0 0\n", - "5 0 0 0 ... 1 1 0\n", - "6 0 0 0 ... 0 1 0\n", - "7 0 0 0 ... 0 1 0\n", - "8 0 0 1 ... 0 1 0\n", - "9 0 0 0 ... 0 1 0\n", - "\n", - "[10 rows x 40 columns]" - ] - }, - "metadata": {}, - "execution_count": 6 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "JmfJO9mn9_Te", - "outputId": "9c34b569-33d6-470a-f9fe-9d35b40f05ac" - }, - "source": [ - "data_X = train_df.drop(columns = [\"index_num\"])\n", - "\n", - "X_train, X_test = train_test_split(data_X, test_size=0.33, random_state=42)\n", - "print(X_train.shape, X_test.shape)" - ], - "execution_count": 7, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "(16878, 39) (8314, 39)\n" - ] - } - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 226 - }, - "id": "FAqOwB0oVeAK", - "outputId": "77753fc9-2e55-4810-97e7-4be2d0491ede" - }, - "source": [ - "df = pd.DataFrame(X_train, columns=data.drop(columns = [\"index_num\"]).columns)\n", - "\n", - "df.head()" - ], - "execution_count": 8, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>duration</th>\n", - " <th>protocol_type</th>\n", - " <th>service</th>\n", - " <th>flag</th>\n", - " <th>src_bytes</th>\n", - " <th>dst_bytes</th>\n", - " <th>land</th>\n", - " <th>wrong_fragment</th>\n", - " <th>hot</th>\n", - " <th>num_failed_logins</th>\n", - " <th>logged_in</th>\n", - " <th>num_compromised</th>\n", - " <th>root_shell</th>\n", - " <th>su_attempted</th>\n", - " <th>num_root</th>\n", - " <th>num_file_creations</th>\n", - " <th>num_shells</th>\n", - " <th>num_access_files</th>\n", - " <th>is_guest_login</th>\n", - " <th>count</th>\n", - " <th>srv_count</th>\n", - " <th>serror_rate</th>\n", - " <th>srv_serror_rate</th>\n", - " <th>rerror_rate</th>\n", - " <th>srv_rerror_rate</th>\n", - " <th>same_srv_rate</th>\n", - " <th>diff_srv_rate</th>\n", - " <th>srv_diff_host_rate</th>\n", - " <th>dst_host_count</th>\n", - " <th>dst_host_srv_count</th>\n", - " <th>dst_host_same_srv_rate</th>\n", - " <th>dst_host_diff_srv_rate</th>\n", - " <th>dst_host_same_src_port_rate</th>\n", - " <th>dst_host_srv_diff_host_rate</th>\n", - " <th>dst_host_serror_rate</th>\n", - " <th>dst_host_srv_serror_rate</th>\n", - " <th>dst_host_rerror_rate</th>\n", - " <th>dst_host_srv_rerror_rate</th>\n", - " <th>class</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>14666</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>10743</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2487</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>21251</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>7387</th>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " duration protocol_type ... dst_host_srv_rerror_rate class\n", - "14666 0 0 ... 0 1\n", - "10743 0 0 ... 0 0\n", - "2487 0 0 ... 0 1\n", - "21251 0 0 ... 0 1\n", - "7387 0 1 ... 0 0\n", - "\n", - "[5 rows x 39 columns]" - ] - }, - "metadata": {}, - "execution_count": 8 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 424 - }, - "id": "71bCJO3_-Nrz", - "outputId": "d7f6b79a-7144-473e-f576-aeeb523d856e" - }, - "source": [ - "frequent_itemsets = apriori( df, min_support = 0.01, use_colnames=True, max_len =2)\n", - "result_desc = frequent_itemsets.sort_values(['support'],ascending =[False])\n", - "result_desc" - ], - "execution_count": 9, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>support</th>\n", - " <th>itemsets</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>18</th>\n", - " <td>0.642612</td>\n", - " <td>(dst_host_count)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>15</th>\n", - " <td>0.621519</td>\n", - " <td>(same_srv_rate)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>0.619327</td>\n", - " <td>(service)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>0.610973</td>\n", - " <td>(flag)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>90</th>\n", - " <td>0.566833</td>\n", - " <td>(flag, same_srv_rate)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>...</th>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>112</th>\n", - " <td>0.010724</td>\n", - " <td>(dst_bytes, dst_host_same_src_port_rate)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>129</th>\n", - " <td>0.010606</td>\n", - " <td>(num_compromised, same_srv_rate)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>38</th>\n", - " <td>0.010487</td>\n", - " <td>(dst_host_rerror_rate, duration)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>85</th>\n", - " <td>0.010191</td>\n", - " <td>(num_compromised, flag)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>40</th>\n", - " <td>0.010013</td>\n", - " <td>(class, duration)</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>259 rows × 2 columns</p>\n", - "</div>" - ], - "text/plain": [ - " support itemsets\n", - "18 0.642612 (dst_host_count)\n", - "15 0.621519 (same_srv_rate)\n", - "2 0.619327 (service)\n", - "3 0.610973 (flag)\n", - "90 0.566833 (flag, same_srv_rate)\n", - ".. ... ...\n", - "112 0.010724 (dst_bytes, dst_host_same_src_port_rate)\n", - "129 0.010606 (num_compromised, same_srv_rate)\n", - "38 0.010487 (dst_host_rerror_rate, duration)\n", - "85 0.010191 (num_compromised, flag)\n", - "40 0.010013 (class, duration)\n", - "\n", - "[259 rows x 2 columns]" - ] - }, - "metadata": {}, - "execution_count": 9 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 424 - }, - "id": "DTT1_SWX-btw", - "outputId": "3c61a887-5192-497b-aaf2-df0bf26e3f56" - }, - "source": [ - "rules = association_rules(result_desc , metric = \"confidence\" , min_threshold = 0.90)\n", - "rules = rules.sort_values(['confidence','lift'], ascending=[False , False])\n", - "rules" - ], - "execution_count": 10, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>antecedents</th>\n", - " <th>consequents</th>\n", - " <th>antecedent support</th>\n", - " <th>consequent support</th>\n", - " <th>support</th>\n", - " <th>confidence</th>\n", - " <th>lift</th>\n", - " <th>leverage</th>\n", - " <th>conviction</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>61</th>\n", - " <td>(num_compromised)</td>\n", - " <td>(logged_in)</td>\n", - " <td>0.010724</td>\n", - " <td>0.392464</td>\n", - " <td>0.010724</td>\n", - " <td>1.000000</td>\n", - " <td>2.548007</td>\n", - " <td>0.006515</td>\n", - " <td>inf</td>\n", - " </tr>\n", - " <tr>\n", - " <th>31</th>\n", - " <td>(protocol_type)</td>\n", - " <td>(flag)</td>\n", - " <td>0.186515</td>\n", - " <td>0.610973</td>\n", - " <td>0.186515</td>\n", - " <td>1.000000</td>\n", - " <td>1.636734</td>\n", - " <td>0.072559</td>\n", - " <td>inf</td>\n", - " </tr>\n", - " <tr>\n", - " <th>60</th>\n", - " <td>(num_compromised)</td>\n", - " <td>(service)</td>\n", - " <td>0.010724</td>\n", - " <td>0.619327</td>\n", - " <td>0.010724</td>\n", - " <td>1.000000</td>\n", - " <td>1.614656</td>\n", - " <td>0.004082</td>\n", - " <td>inf</td>\n", - " </tr>\n", - " <tr>\n", - " <th>37</th>\n", - " <td>(srv_count)</td>\n", - " <td>(flag)</td>\n", - " <td>0.129814</td>\n", - " <td>0.610973</td>\n", - " <td>0.129281</td>\n", - " <td>0.995892</td>\n", - " <td>1.630011</td>\n", - " <td>0.049968</td>\n", - " <td>94.706495</td>\n", - " </tr>\n", - " <tr>\n", - " <th>11</th>\n", - " <td>(srv_serror_rate)</td>\n", - " <td>(serror_rate)</td>\n", - " <td>0.285164</td>\n", - " <td>0.287771</td>\n", - " <td>0.283920</td>\n", - " <td>0.995637</td>\n", - " <td>3.459823</td>\n", - " <td>0.201858</td>\n", - " <td>163.236089</td>\n", - " </tr>\n", - " <tr>\n", - " <th>...</th>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>9</th>\n", - " <td>(logged_in)</td>\n", - " <td>(service)</td>\n", - " <td>0.392464</td>\n", - " <td>0.619327</td>\n", - " <td>0.359403</td>\n", - " <td>0.915761</td>\n", - " <td>1.478639</td>\n", - " <td>0.116340</td>\n", - " <td>4.518958</td>\n", - " </tr>\n", - " <tr>\n", - " <th>53</th>\n", - " <td>(dst_bytes)</td>\n", - " <td>(dst_host_same_srv_rate)</td>\n", - " <td>0.097583</td>\n", - " <td>0.498163</td>\n", - " <td>0.089288</td>\n", - " <td>0.914997</td>\n", - " <td>1.836741</td>\n", - " <td>0.040676</td>\n", - " <td>5.903750</td>\n", - " </tr>\n", - " <tr>\n", - " <th>59</th>\n", - " <td>(src_bytes)</td>\n", - " <td>(flag)</td>\n", - " <td>0.012798</td>\n", - " <td>0.610973</td>\n", - " <td>0.011672</td>\n", - " <td>0.912037</td>\n", - " <td>1.492762</td>\n", - " <td>0.003853</td>\n", - " <td>4.422624</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>(same_srv_rate)</td>\n", - " <td>(flag)</td>\n", - " <td>0.621519</td>\n", - " <td>0.610973</td>\n", - " <td>0.566833</td>\n", - " <td>0.912011</td>\n", - " <td>1.492720</td>\n", - " <td>0.187101</td>\n", - " <td>4.421338</td>\n", - " </tr>\n", - " <tr>\n", - " <th>47</th>\n", - " <td>(srv_count)</td>\n", - " <td>(dst_host_same_srv_rate)</td>\n", - " <td>0.129814</td>\n", - " <td>0.498163</td>\n", - " <td>0.117194</td>\n", - " <td>0.902784</td>\n", - " <td>1.812225</td>\n", - " <td>0.052525</td>\n", - " <td>5.162086</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>64 rows × 9 columns</p>\n", - "</div>" - ], - "text/plain": [ - " antecedents consequents ... leverage conviction\n", - "61 (num_compromised) (logged_in) ... 0.006515 inf\n", - "31 (protocol_type) (flag) ... 0.072559 inf\n", - "60 (num_compromised) (service) ... 0.004082 inf\n", - "37 (srv_count) (flag) ... 0.049968 94.706495\n", - "11 (srv_serror_rate) (serror_rate) ... 0.201858 163.236089\n", - ".. ... ... ... ... ...\n", - "9 (logged_in) (service) ... 0.116340 4.518958\n", - "53 (dst_bytes) (dst_host_same_srv_rate) ... 0.040676 5.903750\n", - "59 (src_bytes) (flag) ... 0.003853 4.422624\n", - "1 (same_srv_rate) (flag) ... 0.187101 4.421338\n", - "47 (srv_count) (dst_host_same_srv_rate) ... 0.052525 5.162086\n", - "\n", - "[64 rows x 9 columns]" - ] - }, - "metadata": {}, - "execution_count": 10 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 175 - }, - "id": "vy-AH96DXMYb", - "outputId": "990db3eb-ab88-4504-8c79-873f50c9a521" - }, - "source": [ - "rules[rules['consequents'] == {\"class\"}]" - ], - "execution_count": 11, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>antecedents</th>\n", - " <th>consequents</th>\n", - " <th>antecedent support</th>\n", - " <th>consequent support</th>\n", - " <th>support</th>\n", - " <th>confidence</th>\n", - " <th>lift</th>\n", - " <th>leverage</th>\n", - " <th>conviction</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>26</th>\n", - " <td>(dst_host_srv_serror_rate)</td>\n", - " <td>(class)</td>\n", - " <td>0.281017</td>\n", - " <td>0.467295</td>\n", - " <td>0.276277</td>\n", - " <td>0.983133</td>\n", - " <td>2.103882</td>\n", - " <td>0.144959</td>\n", - " <td>31.582765</td>\n", - " </tr>\n", - " <tr>\n", - " <th>20</th>\n", - " <td>(srv_serror_rate)</td>\n", - " <td>(class)</td>\n", - " <td>0.285164</td>\n", - " <td>0.467295</td>\n", - " <td>0.277047</td>\n", - " <td>0.971535</td>\n", - " <td>2.079064</td>\n", - " <td>0.143791</td>\n", - " <td>18.714676</td>\n", - " </tr>\n", - " <tr>\n", - " <th>17</th>\n", - " <td>(serror_rate)</td>\n", - " <td>(class)</td>\n", - " <td>0.287771</td>\n", - " <td>0.467295</td>\n", - " <td>0.277580</td>\n", - " <td>0.964587</td>\n", - " <td>2.064195</td>\n", - " <td>0.143106</td>\n", - " <td>15.042730</td>\n", - " </tr>\n", - " <tr>\n", - " <th>25</th>\n", - " <td>(dst_host_serror_rate)</td>\n", - " <td>(class)</td>\n", - " <td>0.287060</td>\n", - " <td>0.467295</td>\n", - " <td>0.276514</td>\n", - " <td>0.963261</td>\n", - " <td>2.061357</td>\n", - " <td>0.142372</td>\n", - " <td>14.499759</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " antecedents consequents ... leverage conviction\n", - "26 (dst_host_srv_serror_rate) (class) ... 0.144959 31.582765\n", - "20 (srv_serror_rate) (class) ... 0.143791 18.714676\n", - "17 (serror_rate) (class) ... 0.143106 15.042730\n", - "25 (dst_host_serror_rate) (class) ... 0.142372 14.499759\n", - "\n", - "[4 rows x 9 columns]" - ] - }, - "metadata": {}, - "execution_count": 11 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 383 - }, - "id": "9jVN92COXRsJ", - "outputId": "b1a430b7-2403-409f-d7ff-50e55fa0b815" - }, - "source": [ - "test = pd.DataFrame(X_test, columns=data.drop(columns = [\"index_num\"]).columns)\n", - "test.head(n=10)" - ], - "execution_count": 12, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>duration</th>\n", - " <th>protocol_type</th>\n", - " <th>service</th>\n", - " <th>flag</th>\n", - " <th>src_bytes</th>\n", - " <th>dst_bytes</th>\n", - " <th>land</th>\n", - " <th>wrong_fragment</th>\n", - " <th>hot</th>\n", - " <th>num_failed_logins</th>\n", - " <th>logged_in</th>\n", - " <th>num_compromised</th>\n", - " <th>root_shell</th>\n", - " <th>su_attempted</th>\n", - " <th>num_root</th>\n", - " <th>num_file_creations</th>\n", - " <th>num_shells</th>\n", - " <th>num_access_files</th>\n", - " <th>is_guest_login</th>\n", - " <th>count</th>\n", - " <th>srv_count</th>\n", - " <th>serror_rate</th>\n", - " <th>srv_serror_rate</th>\n", - " <th>rerror_rate</th>\n", - " <th>srv_rerror_rate</th>\n", - " <th>same_srv_rate</th>\n", - " <th>diff_srv_rate</th>\n", - " <th>srv_diff_host_rate</th>\n", - " <th>dst_host_count</th>\n", - " <th>dst_host_srv_count</th>\n", - " <th>dst_host_same_srv_rate</th>\n", - " <th>dst_host_diff_srv_rate</th>\n", - " <th>dst_host_same_src_port_rate</th>\n", - " <th>dst_host_srv_diff_host_rate</th>\n", - " <th>dst_host_serror_rate</th>\n", - " <th>dst_host_srv_serror_rate</th>\n", - " <th>dst_host_rerror_rate</th>\n", - " <th>dst_host_srv_rerror_rate</th>\n", - " <th>class</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>19064</th>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>11127</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>6517</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2973</th>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>13339</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>19289</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2166</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>5548</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>10887</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2222</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " duration protocol_type ... dst_host_srv_rerror_rate class\n", - "19064 0 1 ... 0 1\n", - "11127 0 0 ... 1 0\n", - "6517 0 0 ... 0 1\n", - "2973 0 1 ... 0 0\n", - "13339 0 0 ... 0 1\n", - "19289 0 0 ... 0 0\n", - "2166 0 0 ... 0 0\n", - "5548 0 0 ... 0 0\n", - "10887 0 0 ... 0 0\n", - "2222 0 0 ... 0 1\n", - "\n", - "[10 rows x 39 columns]" - ] - }, - "metadata": {}, - "execution_count": 12 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "hBLP3xt-ulXS" - }, - "source": [ - "col = ['dst_host_srv_serror_rate', 'srv_serror_rate', 'serror_rate', 'dst_host_serror_rate']" - ], - "execution_count": 13, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "fEZdTRhXtTG4", - "outputId": "5fca2953-7360-4cf8-82c1-5baab9a49c21" - }, - "source": [ - "idx_1 = test[test['dst_host_srv_serror_rate'] == 0 ].index\n", - "test_df = test.drop(idx_1)\n", - "\n", - "idx_class = test_df[test_df['class'] == 0 ].index\n", - "test_err = test_df.drop(idx_class)\n", - "\n", - "print(test_df.shape)\n", - "print(test_err.shape)\n" - ], - "execution_count": 14, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "(2348, 39)\n", - "(2317, 39)\n" - ] - } - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "IoeXwrG28q42", - "outputId": "e4f64606-53ec-428a-9e9f-772ac40fe21b" - }, - "source": [ - "idx_2 = test[test['srv_serror_rate'] == 0 ].index\n", - "test_df = test.drop(idx_2)\n", - "\n", - "idx_class = test_df[test_df['class'] == 0 ].index\n", - "test_err = test_df.drop(idx_class)\n", - "\n", - "print(test_df.shape)\n", - "print(test_err.shape)" - ], - "execution_count": 15, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "(2403, 39)\n", - "(2320, 39)\n" - ] - } - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "NcRtGdgi-oFU", - "outputId": "7735a532-0de5-4f37-90b8-a83ef15d7b46" - }, - "source": [ - "idx_3 = test[test['serror_rate'] == 0 ].index\n", - "test_df = test.drop(idx_3)\n", - "\n", - "idx_class = test_df[test_df['class'] == 0 ].index\n", - "test_err = test_df.drop(idx_class)\n", - "\n", - "print(test_df.shape)\n", - "print(test_err.shape)" - ], - "execution_count": 16, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "(2412, 39)\n", - "(2323, 39)\n" - ] - } - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "BJh3SoYy-1bm", - "outputId": "7520531c-4624-4125-86ff-41bc4a119313" - }, - "source": [ - "idx_4 = test[test['dst_host_serror_rate'] == 0 ].index\n", - "test_df = test.drop(idx_4)\n", - "\n", - "idx_class = test_df[test_df['class'] == 0 ].index\n", - "test_err = test_df.drop(idx_class)\n", - "\n", - "print(test_df.shape)\n", - "print(test_err.shape)" - ], - "execution_count": 17, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "(2398, 39)\n", - "(2316, 39)\n" - ] - } - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "RWB6lAq0_GWg", - "outputId": "7b2bf9cb-1e23-4180-bf21-70dd20ea2986" - }, - "source": [ - "idx_a = test[(test['dst_host_srv_serror_rate'] == 0) & (test['srv_serror_rate'] == 0) & (test['serror_rate'] == 0) & (test['dst_host_serror_rate'] == 0)].index\n", - "test_df = test.drop(idx_a)\n", - "\n", - "idx_class = test_df[test_df['class'] == 0 ].index\n", - "test_err = test_df.drop(idx_class)\n", - "\n", - "print(test_df.shape)\n", - "print(test_err.shape)\n" - ], - "execution_count": 18, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "(2499, 39)\n", - "(2338, 39)\n" - ] - } - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "vCNJIYoeD1sO", - "outputId": "7f323bf9-e7c9-4c8b-9ba9-44110c1ebd2f" - }, - "source": [ - "idx_b = test[(test['dst_host_srv_serror_rate'] == 0) | (test['srv_serror_rate'] == 0) | (test['serror_rate'] == 0) | (test['dst_host_serror_rate'] == 0)].index\n", - "test_df = test.drop(idx_b)\n", - "\n", - "idx_class = test_df[test_df['class'] == 0 ].index\n", - "test_err = test_df.drop(idx_class)\n", - "\n", - "print(test_df.shape)\n", - "print(test_err.shape)" - ], - "execution_count": 19, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "(2301, 39)\n", - "(2300, 39)\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "VdYEcIpkHm_R" - }, - "source": [ - "*Apriori test accuracy = 97.03%*" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "3IJIJs3YHyrO" - }, - "source": [ - "" - ], - "execution_count": null, - "outputs": [] - } - ] -}