From b692c7a24c9050c38db2be432d4c8b37c6d8b5e2 Mon Sep 17 00:00:00 2001 From: Lani Jung <lani009@naver.com> Date: Tue, 7 Dec 2021 19:06:07 +0900 Subject: [PATCH] delete A_priori from kaggle deom code --- experiment/a-priori/A_priori.ipynb | 3100 ---------------------------- 1 file changed, 3100 deletions(-) delete mode 100644 experiment/a-priori/A_priori.ipynb diff --git a/experiment/a-priori/A_priori.ipynb b/experiment/a-priori/A_priori.ipynb deleted file mode 100644 index 8c24cb4..0000000 --- a/experiment/a-priori/A_priori.ipynb +++ /dev/null @@ -1,3100 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "<class 'pandas.core.frame.DataFrame'>\n", - "RangeIndex: 25192 entries, 0 to 25191\n", - "Data columns (total 42 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 duration 25192 non-null int64 \n", - " 1 protocol_type 25192 non-null object \n", - " 2 service 25192 non-null object \n", - " 3 flag 25192 non-null object \n", - " 4 src_bytes 25192 non-null int64 \n", - " 5 dst_bytes 25192 non-null int64 \n", - " 6 land 25192 non-null int64 \n", - " 7 wrong_fragment 25192 non-null int64 \n", - " 8 urgent 25192 non-null int64 \n", - " 9 hot 25192 non-null int64 \n", - " 10 num_failed_logins 25192 non-null int64 \n", - " 11 logged_in 25192 non-null int64 \n", - " 12 num_compromised 25192 non-null int64 \n", - " 13 root_shell 25192 non-null int64 \n", - " 14 su_attempted 25192 non-null int64 \n", - " 15 num_root 25192 non-null int64 \n", - " 16 num_file_creations 25192 non-null int64 \n", - " 17 num_shells 25192 non-null int64 \n", - " 18 num_access_files 25192 non-null int64 \n", - " 19 num_outbound_cmds 25192 non-null int64 \n", - " 20 is_host_login 25192 non-null int64 \n", - " 21 is_guest_login 25192 non-null int64 \n", - " 22 count 25192 non-null int64 \n", - " 23 srv_count 25192 non-null int64 \n", - " 24 serror_rate 25192 non-null float64\n", - " 25 srv_serror_rate 25192 non-null float64\n", - " 26 rerror_rate 25192 non-null float64\n", - " 27 srv_rerror_rate 25192 non-null float64\n", - " 28 same_srv_rate 25192 non-null float64\n", - " 29 diff_srv_rate 25192 non-null float64\n", - " 30 srv_diff_host_rate 25192 non-null float64\n", - " 31 dst_host_count 25192 non-null int64 \n", - " 32 dst_host_srv_count 25192 non-null int64 \n", - " 33 dst_host_same_srv_rate 25192 non-null float64\n", - " 34 dst_host_diff_srv_rate 25192 non-null float64\n", - " 35 dst_host_same_src_port_rate 25192 non-null float64\n", - " 36 dst_host_srv_diff_host_rate 25192 non-null float64\n", - " 37 dst_host_serror_rate 25192 non-null float64\n", - " 38 dst_host_srv_serror_rate 25192 non-null float64\n", - " 39 dst_host_rerror_rate 25192 non-null float64\n", - " 40 dst_host_srv_rerror_rate 25192 non-null float64\n", - " 41 class 25192 non-null object \n", - "dtypes: float64(15), int64(23), object(4)\n", - "memory usage: 8.1+ MB\n" - ] - } - ], - "source": [ - "import numpy as np \n", - "import pandas as pd\n", - "from mlxtend.frequent_patterns import apriori,association_rules\n", - "\n", - "df_train = pd.read_csv(\"../../dataset/Train_data.csv\")\n", - "\n", - "df_train.info()" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>duration</th>\n", - " <th>protocol_type</th>\n", - " <th>service</th>\n", - " <th>flag</th>\n", - " <th>src_bytes</th>\n", - " <th>dst_bytes</th>\n", - " <th>land</th>\n", - " <th>wrong_fragment</th>\n", - " <th>urgent</th>\n", - " <th>hot</th>\n", - " <th>...</th>\n", - " <th>dst_host_srv_count</th>\n", - " <th>dst_host_same_srv_rate</th>\n", - " <th>dst_host_diff_srv_rate</th>\n", - " <th>dst_host_same_src_port_rate</th>\n", - " <th>dst_host_srv_diff_host_rate</th>\n", - " <th>dst_host_serror_rate</th>\n", - " <th>dst_host_srv_serror_rate</th>\n", - " <th>dst_host_rerror_rate</th>\n", - " <th>dst_host_srv_rerror_rate</th>\n", - " <th>class</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>0</td>\n", - " <td>tcp</td>\n", - " <td>ftp_data</td>\n", - " <td>SF</td>\n", - " <td>491</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>...</td>\n", - " <td>25</td>\n", - " <td>0.17</td>\n", - " <td>0.03</td>\n", - " <td>0.17</td>\n", - " <td>0.00</td>\n", - " <td>0.00</td>\n", - " <td>0.00</td>\n", - " <td>0.05</td>\n", - " <td>0.00</td>\n", - " <td>normal</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>0</td>\n", - " <td>udp</td>\n", - " <td>other</td>\n", - " <td>SF</td>\n", - " <td>146</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>...</td>\n", - " <td>1</td>\n", - " <td>0.00</td>\n", - " <td>0.60</td>\n", - " <td>0.88</td>\n", - " <td>0.00</td>\n", - " <td>0.00</td>\n", - " <td>0.00</td>\n", - " <td>0.00</td>\n", - " <td>0.00</td>\n", - " <td>normal</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>0</td>\n", - " <td>tcp</td>\n", - " <td>private</td>\n", - " <td>S0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>...</td>\n", - " <td>26</td>\n", - " <td>0.10</td>\n", - " <td>0.05</td>\n", - " <td>0.00</td>\n", - " <td>0.00</td>\n", - " <td>1.00</td>\n", - " <td>1.00</td>\n", - " <td>0.00</td>\n", - " <td>0.00</td>\n", - " <td>anomaly</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>0</td>\n", - " <td>tcp</td>\n", - " <td>http</td>\n", - " <td>SF</td>\n", - " <td>232</td>\n", - " <td>8153</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>...</td>\n", - " <td>255</td>\n", - " <td>1.00</td>\n", - " <td>0.00</td>\n", - " <td>0.03</td>\n", - " <td>0.04</td>\n", - " <td>0.03</td>\n", - " <td>0.01</td>\n", - " <td>0.00</td>\n", - " <td>0.01</td>\n", - " <td>normal</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>0</td>\n", - " <td>tcp</td>\n", - " <td>http</td>\n", - " <td>SF</td>\n", - " <td>199</td>\n", - " <td>420</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>...</td>\n", - " <td>255</td>\n", - " <td>1.00</td>\n", - " <td>0.00</td>\n", - " <td>0.00</td>\n", - " <td>0.00</td>\n", - " <td>0.00</td>\n", - " <td>0.00</td>\n", - " <td>0.00</td>\n", - " <td>0.00</td>\n", - " <td>normal</td>\n", - " </tr>\n", - " <tr>\n", - " <th>5</th>\n", - " <td>0</td>\n", - " <td>tcp</td>\n", - " <td>private</td>\n", - " <td>REJ</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>...</td>\n", - " <td>19</td>\n", - " <td>0.07</td>\n", - " <td>0.07</td>\n", - " <td>0.00</td>\n", - " <td>0.00</td>\n", - " <td>0.00</td>\n", - " <td>0.00</td>\n", - " <td>1.00</td>\n", - " <td>1.00</td>\n", - " <td>anomaly</td>\n", - " </tr>\n", - " <tr>\n", - " <th>6</th>\n", - " <td>0</td>\n", - " <td>tcp</td>\n", - " <td>private</td>\n", - " <td>S0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>...</td>\n", - " <td>9</td>\n", - " <td>0.04</td>\n", - " <td>0.05</td>\n", - " <td>0.00</td>\n", - " <td>0.00</td>\n", - " <td>1.00</td>\n", - " <td>1.00</td>\n", - " <td>0.00</td>\n", - " <td>0.00</td>\n", - " <td>anomaly</td>\n", - " </tr>\n", - " <tr>\n", - " <th>7</th>\n", - " <td>0</td>\n", - " <td>tcp</td>\n", - " <td>private</td>\n", - " <td>S0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>...</td>\n", - " <td>15</td>\n", - " <td>0.06</td>\n", - " <td>0.07</td>\n", - " <td>0.00</td>\n", - " <td>0.00</td>\n", - " <td>1.00</td>\n", - " <td>1.00</td>\n", - " <td>0.00</td>\n", - " <td>0.00</td>\n", - " <td>anomaly</td>\n", - " </tr>\n", - " <tr>\n", - " <th>8</th>\n", - " <td>0</td>\n", - " <td>tcp</td>\n", - " <td>remote_job</td>\n", - " <td>S0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>...</td>\n", - " <td>23</td>\n", - " <td>0.09</td>\n", - " <td>0.05</td>\n", - " <td>0.00</td>\n", - " <td>0.00</td>\n", - " <td>1.00</td>\n", - " <td>1.00</td>\n", - " <td>0.00</td>\n", - " <td>0.00</td>\n", - " <td>anomaly</td>\n", - " </tr>\n", - " <tr>\n", - " <th>9</th>\n", - " <td>0</td>\n", - " <td>tcp</td>\n", - " <td>private</td>\n", - " <td>S0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>...</td>\n", - " <td>13</td>\n", - " <td>0.05</td>\n", - " <td>0.06</td>\n", - " <td>0.00</td>\n", - " <td>0.00</td>\n", - " <td>1.00</td>\n", - " <td>1.00</td>\n", - " <td>0.00</td>\n", - " <td>0.00</td>\n", - " <td>anomaly</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>10 rows × 42 columns</p>\n", - "</div>" - ], - "text/plain": [ - " duration protocol_type service flag src_bytes dst_bytes land \\\n", - "0 0 tcp ftp_data SF 491 0 0 \n", - "1 0 udp other SF 146 0 0 \n", - "2 0 tcp private S0 0 0 0 \n", - "3 0 tcp http SF 232 8153 0 \n", - "4 0 tcp http SF 199 420 0 \n", - "5 0 tcp private REJ 0 0 0 \n", - "6 0 tcp private S0 0 0 0 \n", - "7 0 tcp private S0 0 0 0 \n", - "8 0 tcp remote_job S0 0 0 0 \n", - "9 0 tcp private S0 0 0 0 \n", - "\n", - " wrong_fragment urgent hot ... dst_host_srv_count \\\n", - "0 0 0 0 ... 25 \n", - "1 0 0 0 ... 1 \n", - "2 0 0 0 ... 26 \n", - "3 0 0 0 ... 255 \n", - "4 0 0 0 ... 255 \n", - "5 0 0 0 ... 19 \n", - "6 0 0 0 ... 9 \n", - "7 0 0 0 ... 15 \n", - "8 0 0 0 ... 23 \n", - "9 0 0 0 ... 13 \n", - "\n", - " dst_host_same_srv_rate dst_host_diff_srv_rate \\\n", - "0 0.17 0.03 \n", - "1 0.00 0.60 \n", - "2 0.10 0.05 \n", - "3 1.00 0.00 \n", - "4 1.00 0.00 \n", - "5 0.07 0.07 \n", - "6 0.04 0.05 \n", - "7 0.06 0.07 \n", - "8 0.09 0.05 \n", - "9 0.05 0.06 \n", - "\n", - " dst_host_same_src_port_rate dst_host_srv_diff_host_rate \\\n", - "0 0.17 0.00 \n", - "1 0.88 0.00 \n", - "2 0.00 0.00 \n", - "3 0.03 0.04 \n", - "4 0.00 0.00 \n", - "5 0.00 0.00 \n", - "6 0.00 0.00 \n", - "7 0.00 0.00 \n", - "8 0.00 0.00 \n", - "9 0.00 0.00 \n", - "\n", - " dst_host_serror_rate dst_host_srv_serror_rate dst_host_rerror_rate \\\n", - "0 0.00 0.00 0.05 \n", - "1 0.00 0.00 0.00 \n", - "2 1.00 1.00 0.00 \n", - "3 0.03 0.01 0.00 \n", - "4 0.00 0.00 0.00 \n", - "5 0.00 0.00 1.00 \n", - "6 1.00 1.00 0.00 \n", - "7 1.00 1.00 0.00 \n", - "8 1.00 1.00 0.00 \n", - "9 1.00 1.00 0.00 \n", - "\n", - " dst_host_srv_rerror_rate class \n", - "0 0.00 normal \n", - "1 0.00 normal \n", - "2 0.00 anomaly \n", - "3 0.01 normal \n", - "4 0.00 normal \n", - "5 1.00 anomaly \n", - "6 0.00 anomaly \n", - "7 0.00 anomaly \n", - "8 0.00 anomaly \n", - "9 0.00 anomaly \n", - "\n", - "[10 rows x 42 columns]" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_train.head(n=10)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>duration</th>\n", - " <th>src_bytes</th>\n", - " <th>dst_bytes</th>\n", - " <th>land</th>\n", - " <th>wrong_fragment</th>\n", - " <th>urgent</th>\n", - " <th>hot</th>\n", - " <th>num_failed_logins</th>\n", - " <th>logged_in</th>\n", - " <th>num_compromised</th>\n", - " <th>...</th>\n", - " <th>service_tim_i</th>\n", - " <th>service_time</th>\n", - " <th>service_urh_i</th>\n", - " <th>service_urp_i</th>\n", - " <th>service_uucp</th>\n", - " <th>service_uucp_path</th>\n", - " <th>service_vmnet</th>\n", - " <th>service_whois</th>\n", - " <th>class_anomaly</th>\n", - " <th>class_normal</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>0</td>\n", - " <td>491</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>...</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>0</td>\n", - " <td>146</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>...</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>...</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>0</td>\n", - " <td>232</td>\n", - " <td>8153</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>...</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>0</td>\n", - " <td>199</td>\n", - " <td>420</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>...</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>5 rows × 120 columns</p>\n", - "</div>" - ], - "text/plain": [ - " duration src_bytes dst_bytes land wrong_fragment urgent hot \\\n", - "0 0 491 0 0 0 0 0 \n", - "1 0 146 0 0 0 0 0 \n", - "2 0 0 0 0 0 0 0 \n", - "3 0 232 8153 0 0 0 0 \n", - "4 0 199 420 0 0 0 0 \n", - "\n", - " num_failed_logins logged_in num_compromised ... service_tim_i \\\n", - "0 0 0 0 ... 0 \n", - "1 0 0 0 ... 0 \n", - "2 0 0 0 ... 0 \n", - "3 0 1 0 ... 0 \n", - "4 0 1 0 ... 0 \n", - "\n", - " service_time service_urh_i service_urp_i service_uucp \\\n", - "0 0 0 0 0 \n", - "1 0 0 0 0 \n", - "2 0 0 0 0 \n", - "3 0 0 0 0 \n", - "4 0 0 0 0 \n", - "\n", - " service_uucp_path service_vmnet service_whois class_anomaly \\\n", - "0 0 0 0 0 \n", - "1 0 0 0 0 \n", - "2 0 0 0 1 \n", - "3 0 0 0 0 \n", - "4 0 0 0 0 \n", - "\n", - " class_normal \n", - "0 1 \n", - "1 1 \n", - "2 0 \n", - "3 1 \n", - "4 1 \n", - "\n", - "[5 rows x 120 columns]" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data_train = pd.get_dummies(df_train,columns = [\"protocol_type\", \"flag\", \"service\", \"class\"],)\n", - "\n", - "data_train.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>duration</th>\n", - " <th>src_bytes</th>\n", - " <th>dst_bytes</th>\n", - " <th>land</th>\n", - " <th>wrong_fragment</th>\n", - " <th>urgent</th>\n", - " <th>hot</th>\n", - " <th>num_failed_logins</th>\n", - " <th>logged_in</th>\n", - " <th>num_compromised</th>\n", - " <th>...</th>\n", - " <th>dst_host_count</th>\n", - " <th>dst_host_srv_count</th>\n", - " <th>dst_host_same_srv_rate</th>\n", - " <th>dst_host_diff_srv_rate</th>\n", - " <th>dst_host_same_src_port_rate</th>\n", - " <th>dst_host_srv_diff_host_rate</th>\n", - " <th>dst_host_serror_rate</th>\n", - " <th>dst_host_srv_serror_rate</th>\n", - " <th>dst_host_rerror_rate</th>\n", - " <th>dst_host_srv_rerror_rate</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>-0.113551</td>\n", - " <td>-0.009889</td>\n", - " <td>-0.039310</td>\n", - " <td>-0.00891</td>\n", - " <td>-0.091223</td>\n", - " <td>-0.006301</td>\n", - " <td>-0.091933</td>\n", - " <td>-0.02622</td>\n", - " <td>-0.807626</td>\n", - " <td>-0.021873</td>\n", - " <td>...</td>\n", - " <td>-0.328634</td>\n", - " <td>-0.813985</td>\n", - " <td>-0.779157</td>\n", - " <td>-0.280673</td>\n", - " <td>0.073120</td>\n", - " <td>-0.287993</td>\n", - " <td>-0.641804</td>\n", - " <td>-0.627365</td>\n", - " <td>-0.221668</td>\n", - " <td>-0.374281</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>-0.113551</td>\n", - " <td>-0.010032</td>\n", - " <td>-0.039310</td>\n", - " <td>-0.00891</td>\n", - " <td>-0.091223</td>\n", - " <td>-0.006301</td>\n", - " <td>-0.091933</td>\n", - " <td>-0.02622</td>\n", - " <td>-0.807626</td>\n", - " <td>-0.021873</td>\n", - " <td>...</td>\n", - " <td>0.732059</td>\n", - " <td>-1.030895</td>\n", - " <td>-1.157831</td>\n", - " <td>2.764403</td>\n", - " <td>2.375620</td>\n", - " <td>-0.287993</td>\n", - " <td>-0.641804</td>\n", - " <td>-0.627365</td>\n", - " <td>-0.385140</td>\n", - " <td>-0.374281</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>-0.113551</td>\n", - " <td>-0.010093</td>\n", - " <td>-0.039310</td>\n", - " <td>-0.00891</td>\n", - " <td>-0.091223</td>\n", - " <td>-0.006301</td>\n", - " <td>-0.091933</td>\n", - " <td>-0.02622</td>\n", - " <td>-0.807626</td>\n", - " <td>-0.021873</td>\n", - " <td>...</td>\n", - " <td>0.732059</td>\n", - " <td>-0.804947</td>\n", - " <td>-0.935081</td>\n", - " <td>-0.173828</td>\n", - " <td>-0.478183</td>\n", - " <td>-0.287993</td>\n", - " <td>1.603834</td>\n", - " <td>1.614454</td>\n", - " <td>-0.385140</td>\n", - " <td>-0.374281</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>-0.113551</td>\n", - " <td>-0.009996</td>\n", - " <td>0.052473</td>\n", - " <td>-0.00891</td>\n", - " <td>-0.091223</td>\n", - " <td>-0.006301</td>\n", - " <td>-0.091933</td>\n", - " <td>-0.02622</td>\n", - " <td>1.238197</td>\n", - " <td>-0.021873</td>\n", - " <td>...</td>\n", - " <td>-1.540854</td>\n", - " <td>1.264742</td>\n", - " <td>1.069663</td>\n", - " <td>-0.440940</td>\n", - " <td>-0.380894</td>\n", - " <td>0.073759</td>\n", - " <td>-0.574435</td>\n", - " <td>-0.604947</td>\n", - " <td>-0.385140</td>\n", - " <td>-0.342768</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>-0.113551</td>\n", - " <td>-0.010010</td>\n", - " <td>-0.034582</td>\n", - " <td>-0.00891</td>\n", - " <td>-0.091223</td>\n", - " <td>-0.006301</td>\n", - " <td>-0.091933</td>\n", - " <td>-0.02622</td>\n", - " <td>1.238197</td>\n", - " <td>-0.021873</td>\n", - " <td>...</td>\n", - " <td>0.732059</td>\n", - " <td>1.264742</td>\n", - " <td>1.069663</td>\n", - " <td>-0.440940</td>\n", - " <td>-0.478183</td>\n", - " <td>-0.287993</td>\n", - " <td>-0.641804</td>\n", - " <td>-0.627365</td>\n", - " <td>-0.385140</td>\n", - " <td>-0.374281</td>\n", - " </tr>\n", - " <tr>\n", - " <th>5</th>\n", - " <td>-0.113551</td>\n", - " <td>-0.010093</td>\n", - " <td>-0.039310</td>\n", - " <td>-0.00891</td>\n", - " <td>-0.091223</td>\n", - " <td>-0.006301</td>\n", - " <td>-0.091933</td>\n", - " <td>-0.02622</td>\n", - " <td>-0.807626</td>\n", - " <td>-0.021873</td>\n", - " <td>...</td>\n", - " <td>0.732059</td>\n", - " <td>-0.868212</td>\n", - " <td>-1.001906</td>\n", - " <td>-0.066984</td>\n", - " <td>-0.478183</td>\n", - " <td>-0.287993</td>\n", - " <td>-0.641804</td>\n", - " <td>-0.627365</td>\n", - " <td>2.884296</td>\n", - " <td>2.777041</td>\n", - " </tr>\n", - " <tr>\n", - " <th>6</th>\n", - " <td>-0.113551</td>\n", - " <td>-0.010093</td>\n", - " <td>-0.039310</td>\n", - " <td>-0.00891</td>\n", - " <td>-0.091223</td>\n", - " <td>-0.006301</td>\n", - " <td>-0.091933</td>\n", - " <td>-0.02622</td>\n", - " <td>-0.807626</td>\n", - " <td>-0.021873</td>\n", - " <td>...</td>\n", - " <td>0.732059</td>\n", - " <td>-0.958592</td>\n", - " <td>-1.068731</td>\n", - " <td>-0.173828</td>\n", - " <td>-0.478183</td>\n", - " <td>-0.287993</td>\n", - " <td>1.603834</td>\n", - " <td>1.614454</td>\n", - " <td>-0.385140</td>\n", - " <td>-0.374281</td>\n", - " </tr>\n", - " <tr>\n", - " <th>7</th>\n", - " <td>-0.113551</td>\n", - " <td>-0.010093</td>\n", - " <td>-0.039310</td>\n", - " <td>-0.00891</td>\n", - " <td>-0.091223</td>\n", - " <td>-0.006301</td>\n", - " <td>-0.091933</td>\n", - " <td>-0.02622</td>\n", - " <td>-0.807626</td>\n", - " <td>-0.021873</td>\n", - " <td>...</td>\n", - " <td>0.732059</td>\n", - " <td>-0.904364</td>\n", - " <td>-1.024181</td>\n", - " <td>-0.066984</td>\n", - " <td>-0.478183</td>\n", - " <td>-0.287993</td>\n", - " <td>1.603834</td>\n", - " <td>1.614454</td>\n", - " <td>-0.385140</td>\n", - " <td>-0.374281</td>\n", - " </tr>\n", - " <tr>\n", - " <th>8</th>\n", - " <td>-0.113551</td>\n", - " <td>-0.010093</td>\n", - " <td>-0.039310</td>\n", - " <td>-0.00891</td>\n", - " <td>-0.091223</td>\n", - " <td>-0.006301</td>\n", - " <td>-0.091933</td>\n", - " <td>-0.02622</td>\n", - " <td>-0.807626</td>\n", - " <td>-0.021873</td>\n", - " <td>...</td>\n", - " <td>0.732059</td>\n", - " <td>-0.832060</td>\n", - " <td>-0.957356</td>\n", - " <td>-0.173828</td>\n", - " <td>-0.478183</td>\n", - " <td>-0.287993</td>\n", - " <td>1.603834</td>\n", - " <td>1.614454</td>\n", - " <td>-0.385140</td>\n", - " <td>-0.374281</td>\n", - " </tr>\n", - " <tr>\n", - " <th>9</th>\n", - " <td>-0.113551</td>\n", - " <td>-0.010093</td>\n", - " <td>-0.039310</td>\n", - " <td>-0.00891</td>\n", - " <td>-0.091223</td>\n", - " <td>-0.006301</td>\n", - " <td>-0.091933</td>\n", - " <td>-0.02622</td>\n", - " <td>-0.807626</td>\n", - " <td>-0.021873</td>\n", - " <td>...</td>\n", - " <td>0.732059</td>\n", - " <td>-0.922440</td>\n", - " <td>-1.046456</td>\n", - " <td>-0.120406</td>\n", - " <td>-0.478183</td>\n", - " <td>-0.287993</td>\n", - " <td>1.603834</td>\n", - " <td>1.614454</td>\n", - " <td>-0.385140</td>\n", - " <td>-0.374281</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>10 rows × 38 columns</p>\n", - "</div>" - ], - "text/plain": [ - " duration src_bytes dst_bytes land wrong_fragment urgent \\\n", - "0 -0.113551 -0.009889 -0.039310 -0.00891 -0.091223 -0.006301 \n", - "1 -0.113551 -0.010032 -0.039310 -0.00891 -0.091223 -0.006301 \n", - "2 -0.113551 -0.010093 -0.039310 -0.00891 -0.091223 -0.006301 \n", - "3 -0.113551 -0.009996 0.052473 -0.00891 -0.091223 -0.006301 \n", - "4 -0.113551 -0.010010 -0.034582 -0.00891 -0.091223 -0.006301 \n", - "5 -0.113551 -0.010093 -0.039310 -0.00891 -0.091223 -0.006301 \n", - "6 -0.113551 -0.010093 -0.039310 -0.00891 -0.091223 -0.006301 \n", - "7 -0.113551 -0.010093 -0.039310 -0.00891 -0.091223 -0.006301 \n", - "8 -0.113551 -0.010093 -0.039310 -0.00891 -0.091223 -0.006301 \n", - "9 -0.113551 -0.010093 -0.039310 -0.00891 -0.091223 -0.006301 \n", - "\n", - " hot num_failed_logins logged_in num_compromised ... \\\n", - "0 -0.091933 -0.02622 -0.807626 -0.021873 ... \n", - "1 -0.091933 -0.02622 -0.807626 -0.021873 ... \n", - "2 -0.091933 -0.02622 -0.807626 -0.021873 ... \n", - "3 -0.091933 -0.02622 1.238197 -0.021873 ... \n", - "4 -0.091933 -0.02622 1.238197 -0.021873 ... \n", - "5 -0.091933 -0.02622 -0.807626 -0.021873 ... \n", - "6 -0.091933 -0.02622 -0.807626 -0.021873 ... \n", - "7 -0.091933 -0.02622 -0.807626 -0.021873 ... \n", - "8 -0.091933 -0.02622 -0.807626 -0.021873 ... \n", - "9 -0.091933 -0.02622 -0.807626 -0.021873 ... \n", - "\n", - " dst_host_count dst_host_srv_count dst_host_same_srv_rate \\\n", - "0 -0.328634 -0.813985 -0.779157 \n", - "1 0.732059 -1.030895 -1.157831 \n", - "2 0.732059 -0.804947 -0.935081 \n", - "3 -1.540854 1.264742 1.069663 \n", - "4 0.732059 1.264742 1.069663 \n", - "5 0.732059 -0.868212 -1.001906 \n", - "6 0.732059 -0.958592 -1.068731 \n", - "7 0.732059 -0.904364 -1.024181 \n", - "8 0.732059 -0.832060 -0.957356 \n", - "9 0.732059 -0.922440 -1.046456 \n", - "\n", - " dst_host_diff_srv_rate dst_host_same_src_port_rate \\\n", - "0 -0.280673 0.073120 \n", - "1 2.764403 2.375620 \n", - "2 -0.173828 -0.478183 \n", - "3 -0.440940 -0.380894 \n", - "4 -0.440940 -0.478183 \n", - "5 -0.066984 -0.478183 \n", - "6 -0.173828 -0.478183 \n", - "7 -0.066984 -0.478183 \n", - "8 -0.173828 -0.478183 \n", - "9 -0.120406 -0.478183 \n", - "\n", - " dst_host_srv_diff_host_rate dst_host_serror_rate \\\n", - "0 -0.287993 -0.641804 \n", - "1 -0.287993 -0.641804 \n", - "2 -0.287993 1.603834 \n", - "3 0.073759 -0.574435 \n", - "4 -0.287993 -0.641804 \n", - "5 -0.287993 -0.641804 \n", - "6 -0.287993 1.603834 \n", - "7 -0.287993 1.603834 \n", - "8 -0.287993 1.603834 \n", - "9 -0.287993 1.603834 \n", - "\n", - " dst_host_srv_serror_rate dst_host_rerror_rate dst_host_srv_rerror_rate \n", - "0 -0.627365 -0.221668 -0.374281 \n", - "1 -0.627365 -0.385140 -0.374281 \n", - "2 1.614454 -0.385140 -0.374281 \n", - "3 -0.604947 -0.385140 -0.342768 \n", - "4 -0.627365 -0.385140 -0.374281 \n", - "5 -0.627365 2.884296 2.777041 \n", - "6 1.614454 -0.385140 -0.374281 \n", - "7 1.614454 -0.385140 -0.374281 \n", - "8 1.614454 -0.385140 -0.374281 \n", - "9 1.614454 -0.385140 -0.374281 \n", - "\n", - "[10 rows x 38 columns]" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from sklearn.preprocessing import StandardScaler\n", - "scaler = StandardScaler()\n", - "\n", - "# extract numerical attributes and scale it to have zero mean and unit variance \n", - "cols = data_train.select_dtypes(include=['float64','int64']).columns\n", - "sc_train = scaler.fit_transform(data_train.select_dtypes(include=['float64','int64']))\n", - "\n", - "\n", - "# turn the result back to a dataframe\n", - "sc_traindf = pd.DataFrame(sc_train, columns = cols)\n", - "sc_traindf.head(n=10)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>duration</th>\n", - " <th>src_bytes</th>\n", - " <th>dst_bytes</th>\n", - " <th>land</th>\n", - " <th>wrong_fragment</th>\n", - " <th>urgent</th>\n", - " <th>hot</th>\n", - " <th>num_failed_logins</th>\n", - " <th>logged_in</th>\n", - " <th>num_compromised</th>\n", - " <th>...</th>\n", - " <th>dst_host_count</th>\n", - " <th>dst_host_srv_count</th>\n", - " <th>dst_host_same_srv_rate</th>\n", - " <th>dst_host_diff_srv_rate</th>\n", - " <th>dst_host_same_src_port_rate</th>\n", - " <th>dst_host_srv_diff_host_rate</th>\n", - " <th>dst_host_serror_rate</th>\n", - " <th>dst_host_srv_serror_rate</th>\n", - " <th>dst_host_rerror_rate</th>\n", - " <th>dst_host_srv_rerror_rate</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>...</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>...</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>...</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>...</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>...</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>5</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>...</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>6</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>...</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>7</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>...</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>8</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>...</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>9</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>...</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>10 rows × 38 columns</p>\n", - "</div>" - ], - "text/plain": [ - " duration src_bytes dst_bytes land wrong_fragment urgent hot \\\n", - "0 0 0 0 0 0 0 0 \n", - "1 0 0 0 0 0 0 0 \n", - "2 0 0 0 0 0 0 0 \n", - "3 0 0 1 0 0 0 0 \n", - "4 0 0 0 0 0 0 0 \n", - "5 0 0 0 0 0 0 0 \n", - "6 0 0 0 0 0 0 0 \n", - "7 0 0 0 0 0 0 0 \n", - "8 0 0 0 0 0 0 0 \n", - "9 0 0 0 0 0 0 0 \n", - "\n", - " num_failed_logins logged_in num_compromised ... dst_host_count \\\n", - "0 0 0 0 ... 0 \n", - "1 0 0 0 ... 1 \n", - "2 0 0 0 ... 1 \n", - "3 0 1 0 ... 0 \n", - "4 0 1 0 ... 1 \n", - "5 0 0 0 ... 1 \n", - "6 0 0 0 ... 1 \n", - "7 0 0 0 ... 1 \n", - "8 0 0 0 ... 1 \n", - "9 0 0 0 ... 1 \n", - "\n", - " dst_host_srv_count dst_host_same_srv_rate dst_host_diff_srv_rate \\\n", - "0 0 0 0 \n", - "1 0 0 1 \n", - "2 0 0 0 \n", - "3 1 1 0 \n", - "4 1 1 0 \n", - "5 0 0 0 \n", - "6 0 0 0 \n", - "7 0 0 0 \n", - "8 0 0 0 \n", - "9 0 0 0 \n", - "\n", - " dst_host_same_src_port_rate dst_host_srv_diff_host_rate \\\n", - "0 1 0 \n", - "1 1 0 \n", - "2 0 0 \n", - "3 0 1 \n", - "4 0 0 \n", - "5 0 0 \n", - "6 0 0 \n", - "7 0 0 \n", - "8 0 0 \n", - "9 0 0 \n", - "\n", - " dst_host_serror_rate dst_host_srv_serror_rate dst_host_rerror_rate \\\n", - "0 0 0 0 \n", - "1 0 0 0 \n", - "2 1 1 0 \n", - "3 0 0 0 \n", - "4 0 0 0 \n", - "5 0 0 1 \n", - "6 1 1 0 \n", - "7 1 1 0 \n", - "8 1 1 0 \n", - "9 1 1 0 \n", - "\n", - " dst_host_srv_rerror_rate \n", - "0 0 \n", - "1 0 \n", - "2 0 \n", - "3 0 \n", - "4 0 \n", - "5 1 \n", - "6 0 \n", - "7 0 \n", - "8 0 \n", - "9 0 \n", - "\n", - "[10 rows x 38 columns]" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "def encode_units(x):\n", - " if x<=0:\n", - " return 0\n", - " if x>=0 :\n", - " return 1\n", - " \n", - "train_df = sc_traindf.applymap(encode_units)\n", - "\n", - "train_df.head(n=10)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>duration</th>\n", - " <th>src_bytes</th>\n", - " <th>dst_bytes</th>\n", - " <th>land</th>\n", - " <th>wrong_fragment</th>\n", - " <th>urgent</th>\n", - " <th>hot</th>\n", - " <th>num_failed_logins</th>\n", - " <th>logged_in</th>\n", - " <th>num_compromised</th>\n", - " <th>...</th>\n", - " <th>service_tim_i</th>\n", - " <th>service_time</th>\n", - " <th>service_urh_i</th>\n", - " <th>service_urp_i</th>\n", - " <th>service_uucp</th>\n", - " <th>service_uucp_path</th>\n", - " <th>service_vmnet</th>\n", - " <th>service_whois</th>\n", - " <th>class_anomaly</th>\n", - " <th>class_normal</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>...</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>...</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>...</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>...</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>...</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>5 rows × 120 columns</p>\n", - "</div>" - ], - "text/plain": [ - " duration src_bytes dst_bytes land wrong_fragment urgent hot \\\n", - "0 0 0 0 0 0 0 0 \n", - "1 0 0 0 0 0 0 0 \n", - "2 0 0 0 0 0 0 0 \n", - "3 0 0 1 0 0 0 0 \n", - "4 0 0 0 0 0 0 0 \n", - "\n", - " num_failed_logins logged_in num_compromised ... service_tim_i \\\n", - "0 0 0 0 ... 0 \n", - "1 0 0 0 ... 0 \n", - "2 0 0 0 ... 0 \n", - "3 0 1 0 ... 0 \n", - "4 0 1 0 ... 0 \n", - "\n", - " service_time service_urh_i service_urp_i service_uucp \\\n", - "0 0 0 0 0 \n", - "1 0 0 0 0 \n", - "2 0 0 0 0 \n", - "3 0 0 0 0 \n", - "4 0 0 0 0 \n", - "\n", - " service_uucp_path service_vmnet service_whois class_anomaly \\\n", - "0 0 0 0 0 \n", - "1 0 0 0 0 \n", - "2 0 0 0 1 \n", - "3 0 0 0 0 \n", - "4 0 0 0 0 \n", - "\n", - " class_normal \n", - "0 1 \n", - "1 1 \n", - "2 0 \n", - "3 1 \n", - "4 1 \n", - "\n", - "[5 rows x 120 columns]" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "columns = data_train.columns\n", - "colname = ['duration', 'src_bytes', 'dst_bytes', 'land', 'wrong_fragment',\n", - " 'urgent', 'hot', 'num_failed_logins', 'logged_in', 'num_compromised',\n", - " 'root_shell', 'su_attempted', 'num_root', 'num_file_creations',\n", - " 'num_shells', 'num_access_files', 'num_outbound_cmds', 'is_host_login',\n", - " 'is_guest_login', 'count', 'srv_count', 'serror_rate',\n", - " 'srv_serror_rate', 'rerror_rate', 'srv_rerror_rate', 'same_srv_rate',\n", - " 'diff_srv_rate', 'srv_diff_host_rate', 'dst_host_count',\n", - " 'dst_host_srv_count', 'dst_host_same_srv_rate',\n", - " 'dst_host_diff_srv_rate', 'dst_host_same_src_port_rate',\n", - " 'dst_host_srv_diff_host_rate', 'dst_host_serror_rate',\n", - " 'dst_host_srv_serror_rate', 'dst_host_rerror_rate',\n", - " 'dst_host_srv_rerror_rate']\n", - "for col in columns :\n", - " for j in colname :\n", - " if col == j :\n", - " data_train[col] = train_df[col]\n", - "data_train.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "num_outbound_cmds\n", - "is_host_login\n" - ] - } - ], - "source": [ - "data_train[['num_outbound_cmds','is_host_login']].head()\n", - "for col in data_train.columns :\n", - " if data_train[col].mean() == 0 :\n", - " print(col)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>support</th>\n", - " <th>itemsets</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>26</th>\n", - " <td>0.814782</td>\n", - " <td>(protocol_type_tcp)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>15</th>\n", - " <td>0.643022</td>\n", - " <td>(dst_host_count)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>12</th>\n", - " <td>0.622102</td>\n", - " <td>(same_srv_rate)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>32</th>\n", - " <td>0.594355</td>\n", - " <td>(flag_SF)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>227</th>\n", - " <td>0.562837</td>\n", - " <td>(same_srv_rate, flag_SF)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>...</th>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>281</th>\n", - " <td>0.010241</td>\n", - " <td>(dst_host_count, flag_RSTO)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>100</th>\n", - " <td>0.010241</td>\n", - " <td>(num_compromised, flag_SF)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>287</th>\n", - " <td>0.010162</td>\n", - " <td>(service_finger, dst_host_count)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>47</th>\n", - " <td>0.010043</td>\n", - " <td>(duration, srv_rerror_rate)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>46</th>\n", - " <td>0.010043</td>\n", - " <td>(rerror_rate, duration)</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>467 rows × 2 columns</p>\n", - "</div>" - ], - "text/plain": [ - " support itemsets\n", - "26 0.814782 (protocol_type_tcp)\n", - "15 0.643022 (dst_host_count)\n", - "12 0.622102 (same_srv_rate)\n", - "32 0.594355 (flag_SF)\n", - "227 0.562837 (same_srv_rate, flag_SF)\n", - ".. ... ...\n", - "281 0.010241 (dst_host_count, flag_RSTO)\n", - "100 0.010241 (num_compromised, flag_SF)\n", - "287 0.010162 (service_finger, dst_host_count)\n", - "47 0.010043 (duration, srv_rerror_rate)\n", - "46 0.010043 (rerror_rate, duration)\n", - "\n", - "[467 rows x 2 columns]" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "frequent_itemsets = apriori ( data_train , min_support = 0.01 , use_colnames=True ,max_len =2)\n", - "result_desc = frequent_itemsets.sort_values(['support'],ascending =[False])\n", - "result_desc" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>antecedents</th>\n", - " <th>consequents</th>\n", - " <th>antecedent support</th>\n", - " <th>consequent support</th>\n", - " <th>support</th>\n", - " <th>confidence</th>\n", - " <th>lift</th>\n", - " <th>leverage</th>\n", - " <th>conviction</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>104</th>\n", - " <td>(service_eco_i)</td>\n", - " <td>(protocol_type_icmp)</td>\n", - " <td>0.036083</td>\n", - " <td>0.065695</td>\n", - " <td>0.036083</td>\n", - " <td>1.000000</td>\n", - " <td>15.221752</td>\n", - " <td>0.033712</td>\n", - " <td>inf</td>\n", - " </tr>\n", - " <tr>\n", - " <th>111</th>\n", - " <td>(service_ecr_i)</td>\n", - " <td>(protocol_type_icmp)</td>\n", - " <td>0.024333</td>\n", - " <td>0.065695</td>\n", - " <td>0.024333</td>\n", - " <td>1.000000</td>\n", - " <td>15.221752</td>\n", - " <td>0.022735</td>\n", - " <td>inf</td>\n", - " </tr>\n", - " <tr>\n", - " <th>91</th>\n", - " <td>(service_domain_u)</td>\n", - " <td>(protocol_type_udp)</td>\n", - " <td>0.072245</td>\n", - " <td>0.119522</td>\n", - " <td>0.072245</td>\n", - " <td>1.000000</td>\n", - " <td>8.366656</td>\n", - " <td>0.063610</td>\n", - " <td>inf</td>\n", - " </tr>\n", - " <tr>\n", - " <th>118</th>\n", - " <td>(flag_RSTR)</td>\n", - " <td>(rerror_rate)</td>\n", - " <td>0.019728</td>\n", - " <td>0.124127</td>\n", - " <td>0.019728</td>\n", - " <td>1.000000</td>\n", - " <td>8.056284</td>\n", - " <td>0.017280</td>\n", - " <td>inf</td>\n", - " </tr>\n", - " <tr>\n", - " <th>131</th>\n", - " <td>(flag_RSTO)</td>\n", - " <td>(rerror_rate)</td>\n", - " <td>0.012067</td>\n", - " <td>0.124127</td>\n", - " <td>0.012067</td>\n", - " <td>1.000000</td>\n", - " <td>8.056284</td>\n", - " <td>0.010569</td>\n", - " <td>inf</td>\n", - " </tr>\n", - " <tr>\n", - " <th>...</th>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>122</th>\n", - " <td>(hot)</td>\n", - " <td>(flag_SF)</td>\n", - " <td>0.020641</td>\n", - " <td>0.594355</td>\n", - " <td>0.018736</td>\n", - " <td>0.907692</td>\n", - " <td>1.527188</td>\n", - " <td>0.006468</td>\n", - " <td>4.394484</td>\n", - " </tr>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>(same_srv_rate)</td>\n", - " <td>(flag_SF)</td>\n", - " <td>0.622102</td>\n", - " <td>0.594355</td>\n", - " <td>0.562837</td>\n", - " <td>0.904735</td>\n", - " <td>1.522212</td>\n", - " <td>0.193088</td>\n", - " <td>4.258046</td>\n", - " </tr>\n", - " <tr>\n", - " <th>84</th>\n", - " <td>(dst_bytes)</td>\n", - " <td>(service_http)</td>\n", - " <td>0.098206</td>\n", - " <td>0.317680</td>\n", - " <td>0.088719</td>\n", - " <td>0.903395</td>\n", - " <td>2.843725</td>\n", - " <td>0.057521</td>\n", - " <td>7.063009</td>\n", - " </tr>\n", - " <tr>\n", - " <th>109</th>\n", - " <td>(service_other)</td>\n", - " <td>(dst_host_count)</td>\n", - " <td>0.034058</td>\n", - " <td>0.643022</td>\n", - " <td>0.030764</td>\n", - " <td>0.903263</td>\n", - " <td>1.404717</td>\n", - " <td>0.008863</td>\n", - " <td>3.690211</td>\n", - " </tr>\n", - " <tr>\n", - " <th>75</th>\n", - " <td>(srv_count)</td>\n", - " <td>(dst_host_same_srv_rate)</td>\n", - " <td>0.130597</td>\n", - " <td>0.498730</td>\n", - " <td>0.117855</td>\n", - " <td>0.902432</td>\n", - " <td>1.809460</td>\n", - " <td>0.052722</td>\n", - " <td>5.137630</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>138 rows × 9 columns</p>\n", - "</div>" - ], - "text/plain": [ - " antecedents consequents antecedent support \\\n", - "104 (service_eco_i) (protocol_type_icmp) 0.036083 \n", - "111 (service_ecr_i) (protocol_type_icmp) 0.024333 \n", - "91 (service_domain_u) (protocol_type_udp) 0.072245 \n", - "118 (flag_RSTR) (rerror_rate) 0.019728 \n", - "131 (flag_RSTO) (rerror_rate) 0.012067 \n", - ".. ... ... ... \n", - "122 (hot) (flag_SF) 0.020641 \n", - "0 (same_srv_rate) (flag_SF) 0.622102 \n", - "84 (dst_bytes) (service_http) 0.098206 \n", - "109 (service_other) (dst_host_count) 0.034058 \n", - "75 (srv_count) (dst_host_same_srv_rate) 0.130597 \n", - "\n", - " consequent support support confidence lift leverage conviction \n", - "104 0.065695 0.036083 1.000000 15.221752 0.033712 inf \n", - "111 0.065695 0.024333 1.000000 15.221752 0.022735 inf \n", - "91 0.119522 0.072245 1.000000 8.366656 0.063610 inf \n", - "118 0.124127 0.019728 1.000000 8.056284 0.017280 inf \n", - "131 0.124127 0.012067 1.000000 8.056284 0.010569 inf \n", - ".. ... ... ... ... ... ... \n", - "122 0.594355 0.018736 0.907692 1.527188 0.006468 4.394484 \n", - "0 0.594355 0.562837 0.904735 1.522212 0.193088 4.258046 \n", - "84 0.317680 0.088719 0.903395 2.843725 0.057521 7.063009 \n", - "109 0.643022 0.030764 0.903263 1.404717 0.008863 3.690211 \n", - "75 0.498730 0.117855 0.902432 1.809460 0.052722 5.137630 \n", - "\n", - "[138 rows x 9 columns]" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "rules = association_rules(result_desc , metric = \"confidence\" , min_threshold = 0.90)\n", - "rules = rules.sort_values(['confidence','lift'], ascending=[False , False])\n", - "rules" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>antecedents</th>\n", - " <th>consequents</th>\n", - " <th>antecedent support</th>\n", - " <th>consequent support</th>\n", - " <th>support</th>\n", - " <th>confidence</th>\n", - " <th>lift</th>\n", - " <th>leverage</th>\n", - " <th>conviction</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>45</th>\n", - " <td>(flag_S0)</td>\n", - " <td>(class_anomaly)</td>\n", - " <td>0.278223</td>\n", - " <td>0.46614</td>\n", - " <td>0.275048</td>\n", - " <td>0.988586</td>\n", - " <td>2.120792</td>\n", - " <td>0.145357</td>\n", - " <td>46.772805</td>\n", - " </tr>\n", - " <tr>\n", - " <th>44</th>\n", - " <td>(dst_host_srv_serror_rate)</td>\n", - " <td>(class_anomaly)</td>\n", - " <td>0.281478</td>\n", - " <td>0.46614</td>\n", - " <td>0.277072</td>\n", - " <td>0.984346</td>\n", - " <td>2.111697</td>\n", - " <td>0.145864</td>\n", - " <td>34.104513</td>\n", - " </tr>\n", - " <tr>\n", - " <th>36</th>\n", - " <td>(srv_serror_rate)</td>\n", - " <td>(class_anomaly)</td>\n", - " <td>0.286440</td>\n", - " <td>0.46614</td>\n", - " <td>0.277707</td>\n", - " <td>0.969512</td>\n", - " <td>2.079873</td>\n", - " <td>0.144186</td>\n", - " <td>17.510607</td>\n", - " </tr>\n", - " <tr>\n", - " <th>43</th>\n", - " <td>(dst_host_serror_rate)</td>\n", - " <td>(class_anomaly)</td>\n", - " <td>0.287512</td>\n", - " <td>0.46614</td>\n", - " <td>0.277191</td>\n", - " <td>0.964103</td>\n", - " <td>2.068270</td>\n", - " <td>0.143170</td>\n", - " <td>14.872106</td>\n", - " </tr>\n", - " <tr>\n", - " <th>31</th>\n", - " <td>(serror_rate)</td>\n", - " <td>(class_anomaly)</td>\n", - " <td>0.288544</td>\n", - " <td>0.46614</td>\n", - " <td>0.278184</td>\n", - " <td>0.964094</td>\n", - " <td>2.068250</td>\n", - " <td>0.143682</td>\n", - " <td>14.868307</td>\n", - " </tr>\n", - " <tr>\n", - " <th>56</th>\n", - " <td>(service_private)</td>\n", - " <td>(class_anomaly)</td>\n", - " <td>0.172714</td>\n", - " <td>0.46614</td>\n", - " <td>0.164814</td>\n", - " <td>0.954263</td>\n", - " <td>2.047160</td>\n", - " <td>0.084306</td>\n", - " <td>11.672486</td>\n", - " </tr>\n", - " <tr>\n", - " <th>123</th>\n", - " <td>(flag_RSTR)</td>\n", - " <td>(class_anomaly)</td>\n", - " <td>0.019728</td>\n", - " <td>0.46614</td>\n", - " <td>0.018617</td>\n", - " <td>0.943662</td>\n", - " <td>2.024417</td>\n", - " <td>0.009421</td>\n", - " <td>9.476014</td>\n", - " </tr>\n", - " <tr>\n", - " <th>113</th>\n", - " <td>(service_ecr_i)</td>\n", - " <td>(class_anomaly)</td>\n", - " <td>0.024333</td>\n", - " <td>0.46614</td>\n", - " <td>0.022825</td>\n", - " <td>0.938010</td>\n", - " <td>2.012292</td>\n", - " <td>0.011482</td>\n", - " <td>8.612004</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " antecedents consequents antecedent support \\\n", - "45 (flag_S0) (class_anomaly) 0.278223 \n", - "44 (dst_host_srv_serror_rate) (class_anomaly) 0.281478 \n", - "36 (srv_serror_rate) (class_anomaly) 0.286440 \n", - "43 (dst_host_serror_rate) (class_anomaly) 0.287512 \n", - "31 (serror_rate) (class_anomaly) 0.288544 \n", - "56 (service_private) (class_anomaly) 0.172714 \n", - "123 (flag_RSTR) (class_anomaly) 0.019728 \n", - "113 (service_ecr_i) (class_anomaly) 0.024333 \n", - "\n", - " consequent support support confidence lift leverage conviction \n", - "45 0.46614 0.275048 0.988586 2.120792 0.145357 46.772805 \n", - "44 0.46614 0.277072 0.984346 2.111697 0.145864 34.104513 \n", - "36 0.46614 0.277707 0.969512 2.079873 0.144186 17.510607 \n", - "43 0.46614 0.277191 0.964103 2.068270 0.143170 14.872106 \n", - "31 0.46614 0.278184 0.964094 2.068250 0.143682 14.868307 \n", - "56 0.46614 0.164814 0.954263 2.047160 0.084306 11.672486 \n", - "123 0.46614 0.018617 0.943662 2.024417 0.009421 9.476014 \n", - "113 0.46614 0.022825 0.938010 2.012292 0.011482 8.612004 " - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "rules[rules['consequents'] == {'class_anomaly'}]" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>support</th>\n", - " <th>itemsets</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>6</th>\n", - " <td>0.814782</td>\n", - " <td>(protocol_type_tcp)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>0.643022</td>\n", - " <td>(dst_host_count)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>0.622102</td>\n", - " <td>(same_srv_rate)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>7</th>\n", - " <td>0.594355</td>\n", - " <td>(flag_SF)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>19</th>\n", - " <td>0.562837</td>\n", - " <td>(same_srv_rate, flag_SF)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>9</th>\n", - " <td>0.533860</td>\n", - " <td>(class_normal)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>21</th>\n", - " <td>0.516077</td>\n", - " <td>(dst_host_count, protocol_type_tcp)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>20</th>\n", - " <td>0.509963</td>\n", - " <td>(same_srv_rate, class_normal)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>33</th>\n", - " <td>0.502660</td>\n", - " <td>(class_normal, flag_SF)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>5</th>\n", - " <td>0.498730</td>\n", - " <td>(dst_host_same_srv_rate)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>17</th>\n", - " <td>0.492339</td>\n", - " <td>(same_srv_rate, dst_host_same_srv_rate)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>28</th>\n", - " <td>0.469117</td>\n", - " <td>(dst_host_same_srv_rate, flag_SF)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>8</th>\n", - " <td>0.466140</td>\n", - " <td>(class_anomaly)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>18</th>\n", - " <td>0.460027</td>\n", - " <td>(same_srv_rate, protocol_type_tcp)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>0.435773</td>\n", - " <td>(dst_host_srv_count)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>29</th>\n", - " <td>0.432637</td>\n", - " <td>(dst_host_same_srv_rate, class_normal)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>16</th>\n", - " <td>0.430891</td>\n", - " <td>(same_srv_rate, dst_host_srv_count)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>23</th>\n", - " <td>0.426485</td>\n", - " <td>(dst_host_same_srv_rate, dst_host_srv_count)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>32</th>\n", - " <td>0.423984</td>\n", - " <td>(class_normal, protocol_type_tcp)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>25</th>\n", - " <td>0.412234</td>\n", - " <td>(dst_host_srv_count, flag_SF)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>30</th>\n", - " <td>0.409138</td>\n", - " <td>(protocol_type_tcp, flag_SF)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>26</th>\n", - " <td>0.401437</td>\n", - " <td>(dst_host_srv_count, class_normal)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>22</th>\n", - " <td>0.395403</td>\n", - " <td>(class_anomaly, dst_host_count)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>0.394768</td>\n", - " <td>(logged_in)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>12</th>\n", - " <td>0.394768</td>\n", - " <td>(protocol_type_tcp, logged_in)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>31</th>\n", - " <td>0.390799</td>\n", - " <td>(class_anomaly, protocol_type_tcp)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>13</th>\n", - " <td>0.387861</td>\n", - " <td>(flag_SF, logged_in)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>10</th>\n", - " <td>0.387702</td>\n", - " <td>(same_srv_rate, logged_in)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>14</th>\n", - " <td>0.378533</td>\n", - " <td>(class_normal, logged_in)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>27</th>\n", - " <td>0.365314</td>\n", - " <td>(dst_host_same_srv_rate, protocol_type_tcp)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>0.364481</td>\n", - " <td>(count)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>15</th>\n", - " <td>0.351818</td>\n", - " <td>(dst_host_count, count)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>11</th>\n", - " <td>0.337051</td>\n", - " <td>(dst_host_same_srv_rate, logged_in)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>24</th>\n", - " <td>0.333519</td>\n", - " <td>(dst_host_srv_count, protocol_type_tcp)</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " support itemsets\n", - "6 0.814782 (protocol_type_tcp)\n", - "3 0.643022 (dst_host_count)\n", - "2 0.622102 (same_srv_rate)\n", - "7 0.594355 (flag_SF)\n", - "19 0.562837 (same_srv_rate, flag_SF)\n", - "9 0.533860 (class_normal)\n", - "21 0.516077 (dst_host_count, protocol_type_tcp)\n", - "20 0.509963 (same_srv_rate, class_normal)\n", - "33 0.502660 (class_normal, flag_SF)\n", - "5 0.498730 (dst_host_same_srv_rate)\n", - "17 0.492339 (same_srv_rate, dst_host_same_srv_rate)\n", - "28 0.469117 (dst_host_same_srv_rate, flag_SF)\n", - "8 0.466140 (class_anomaly)\n", - "18 0.460027 (same_srv_rate, protocol_type_tcp)\n", - "4 0.435773 (dst_host_srv_count)\n", - "29 0.432637 (dst_host_same_srv_rate, class_normal)\n", - "16 0.430891 (same_srv_rate, dst_host_srv_count)\n", - "23 0.426485 (dst_host_same_srv_rate, dst_host_srv_count)\n", - "32 0.423984 (class_normal, protocol_type_tcp)\n", - "25 0.412234 (dst_host_srv_count, flag_SF)\n", - "30 0.409138 (protocol_type_tcp, flag_SF)\n", - "26 0.401437 (dst_host_srv_count, class_normal)\n", - "22 0.395403 (class_anomaly, dst_host_count)\n", - "0 0.394768 (logged_in)\n", - "12 0.394768 (protocol_type_tcp, logged_in)\n", - "31 0.390799 (class_anomaly, protocol_type_tcp)\n", - "13 0.387861 (flag_SF, logged_in)\n", - "10 0.387702 (same_srv_rate, logged_in)\n", - "14 0.378533 (class_normal, logged_in)\n", - "27 0.365314 (dst_host_same_srv_rate, protocol_type_tcp)\n", - "1 0.364481 (count)\n", - "15 0.351818 (dst_host_count, count)\n", - "11 0.337051 (dst_host_same_srv_rate, logged_in)\n", - "24 0.333519 (dst_host_srv_count, protocol_type_tcp)" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "frequent_itemsets = apriori ( data_train , min_support = 0.33 , use_colnames=True ,max_len =2)\n", - "result_desc = frequent_itemsets.sort_values(['support'],ascending =[False])\n", - "result_desc" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>antecedents</th>\n", - " <th>consequents</th>\n", - " <th>antecedent support</th>\n", - " <th>consequent support</th>\n", - " <th>support</th>\n", - " <th>confidence</th>\n", - " <th>lift</th>\n", - " <th>leverage</th>\n", - " <th>conviction</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>21</th>\n", - " <td>(logged_in)</td>\n", - " <td>(protocol_type_tcp)</td>\n", - " <td>0.394768</td>\n", - " <td>0.814782</td>\n", - " <td>0.394768</td>\n", - " <td>1.000000</td>\n", - " <td>1.227321</td>\n", - " <td>0.073118</td>\n", - " <td>inf</td>\n", - " </tr>\n", - " <tr>\n", - " <th>13</th>\n", - " <td>(dst_host_srv_count)</td>\n", - " <td>(same_srv_rate)</td>\n", - " <td>0.435773</td>\n", - " <td>0.622102</td>\n", - " <td>0.430891</td>\n", - " <td>0.988796</td>\n", - " <td>1.589443</td>\n", - " <td>0.159795</td>\n", - " <td>33.728142</td>\n", - " </tr>\n", - " <tr>\n", - " <th>8</th>\n", - " <td>(dst_host_same_srv_rate)</td>\n", - " <td>(same_srv_rate)</td>\n", - " <td>0.498730</td>\n", - " <td>0.622102</td>\n", - " <td>0.492339</td>\n", - " <td>0.987186</td>\n", - " <td>1.586854</td>\n", - " <td>0.182078</td>\n", - " <td>29.490107</td>\n", - " </tr>\n", - " <tr>\n", - " <th>23</th>\n", - " <td>(logged_in)</td>\n", - " <td>(flag_SF)</td>\n", - " <td>0.394768</td>\n", - " <td>0.594355</td>\n", - " <td>0.387861</td>\n", - " <td>0.982504</td>\n", - " <td>1.653058</td>\n", - " <td>0.153229</td>\n", - " <td>23.184690</td>\n", - " </tr>\n", - " <tr>\n", - " <th>24</th>\n", - " <td>(logged_in)</td>\n", - " <td>(same_srv_rate)</td>\n", - " <td>0.394768</td>\n", - " <td>0.622102</td>\n", - " <td>0.387702</td>\n", - " <td>0.982102</td>\n", - " <td>1.578682</td>\n", - " <td>0.142116</td>\n", - " <td>21.113444</td>\n", - " </tr>\n", - " <tr>\n", - " <th>15</th>\n", - " <td>(dst_host_srv_count)</td>\n", - " <td>(dst_host_same_srv_rate)</td>\n", - " <td>0.435773</td>\n", - " <td>0.498730</td>\n", - " <td>0.426485</td>\n", - " <td>0.978685</td>\n", - " <td>1.962355</td>\n", - " <td>0.209152</td>\n", - " <td>23.516858</td>\n", - " </tr>\n", - " <tr>\n", - " <th>26</th>\n", - " <td>(count)</td>\n", - " <td>(dst_host_count)</td>\n", - " <td>0.364481</td>\n", - " <td>0.643022</td>\n", - " <td>0.351818</td>\n", - " <td>0.965258</td>\n", - " <td>1.501129</td>\n", - " <td>0.117449</td>\n", - " <td>10.275159</td>\n", - " </tr>\n", - " <tr>\n", - " <th>25</th>\n", - " <td>(logged_in)</td>\n", - " <td>(class_normal)</td>\n", - " <td>0.394768</td>\n", - " <td>0.533860</td>\n", - " <td>0.378533</td>\n", - " <td>0.958874</td>\n", - " <td>1.796115</td>\n", - " <td>0.167782</td>\n", - " <td>11.334383</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>(class_normal)</td>\n", - " <td>(same_srv_rate)</td>\n", - " <td>0.533860</td>\n", - " <td>0.622102</td>\n", - " <td>0.509963</td>\n", - " <td>0.955238</td>\n", - " <td>1.535500</td>\n", - " <td>0.177848</td>\n", - " <td>8.442437</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>(flag_SF)</td>\n", - " <td>(same_srv_rate)</td>\n", - " <td>0.594355</td>\n", - " <td>0.622102</td>\n", - " <td>0.562837</td>\n", - " <td>0.946971</td>\n", - " <td>1.522212</td>\n", - " <td>0.193088</td>\n", - " <td>7.126276</td>\n", - " </tr>\n", - " <tr>\n", - " <th>17</th>\n", - " <td>(dst_host_srv_count)</td>\n", - " <td>(flag_SF)</td>\n", - " <td>0.435773</td>\n", - " <td>0.594355</td>\n", - " <td>0.412234</td>\n", - " <td>0.945983</td>\n", - " <td>1.591612</td>\n", - " <td>0.153230</td>\n", - " <td>7.509556</td>\n", - " </tr>\n", - " <tr>\n", - " <th>5</th>\n", - " <td>(class_normal)</td>\n", - " <td>(flag_SF)</td>\n", - " <td>0.533860</td>\n", - " <td>0.594355</td>\n", - " <td>0.502660</td>\n", - " <td>0.941557</td>\n", - " <td>1.584165</td>\n", - " <td>0.185357</td>\n", - " <td>6.940859</td>\n", - " </tr>\n", - " <tr>\n", - " <th>9</th>\n", - " <td>(dst_host_same_srv_rate)</td>\n", - " <td>(flag_SF)</td>\n", - " <td>0.498730</td>\n", - " <td>0.594355</td>\n", - " <td>0.469117</td>\n", - " <td>0.940624</td>\n", - " <td>1.582595</td>\n", - " <td>0.172694</td>\n", - " <td>6.831795</td>\n", - " </tr>\n", - " <tr>\n", - " <th>18</th>\n", - " <td>(dst_host_srv_count)</td>\n", - " <td>(class_normal)</td>\n", - " <td>0.435773</td>\n", - " <td>0.533860</td>\n", - " <td>0.401437</td>\n", - " <td>0.921206</td>\n", - " <td>1.725557</td>\n", - " <td>0.168795</td>\n", - " <td>5.915937</td>\n", - " </tr>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>(same_srv_rate)</td>\n", - " <td>(flag_SF)</td>\n", - " <td>0.622102</td>\n", - " <td>0.594355</td>\n", - " <td>0.562837</td>\n", - " <td>0.904735</td>\n", - " <td>1.522212</td>\n", - " <td>0.193088</td>\n", - " <td>4.258046</td>\n", - " </tr>\n", - " <tr>\n", - " <th>11</th>\n", - " <td>(dst_host_same_srv_rate)</td>\n", - " <td>(class_normal)</td>\n", - " <td>0.498730</td>\n", - " <td>0.533860</td>\n", - " <td>0.432637</td>\n", - " <td>0.867479</td>\n", - " <td>1.624918</td>\n", - " <td>0.166386</td>\n", - " <td>3.517468</td>\n", - " </tr>\n", - " <tr>\n", - " <th>14</th>\n", - " <td>(dst_host_same_srv_rate)</td>\n", - " <td>(dst_host_srv_count)</td>\n", - " <td>0.498730</td>\n", - " <td>0.435773</td>\n", - " <td>0.426485</td>\n", - " <td>0.855142</td>\n", - " <td>1.962355</td>\n", - " <td>0.209152</td>\n", - " <td>3.895025</td>\n", - " </tr>\n", - " <tr>\n", - " <th>27</th>\n", - " <td>(logged_in)</td>\n", - " <td>(dst_host_same_srv_rate)</td>\n", - " <td>0.394768</td>\n", - " <td>0.498730</td>\n", - " <td>0.337051</td>\n", - " <td>0.853796</td>\n", - " <td>1.711941</td>\n", - " <td>0.140169</td>\n", - " <td>3.428564</td>\n", - " </tr>\n", - " <tr>\n", - " <th>20</th>\n", - " <td>(class_anomaly)</td>\n", - " <td>(dst_host_count)</td>\n", - " <td>0.466140</td>\n", - " <td>0.643022</td>\n", - " <td>0.395403</td>\n", - " <td>0.848250</td>\n", - " <td>1.319163</td>\n", - " <td>0.095665</td>\n", - " <td>2.352412</td>\n", - " </tr>\n", - " <tr>\n", - " <th>6</th>\n", - " <td>(flag_SF)</td>\n", - " <td>(class_normal)</td>\n", - " <td>0.594355</td>\n", - " <td>0.533860</td>\n", - " <td>0.502660</td>\n", - " <td>0.845722</td>\n", - " <td>1.584165</td>\n", - " <td>0.185357</td>\n", - " <td>3.021435</td>\n", - " </tr>\n", - " <tr>\n", - " <th>22</th>\n", - " <td>(class_anomaly)</td>\n", - " <td>(protocol_type_tcp)</td>\n", - " <td>0.466140</td>\n", - " <td>0.814782</td>\n", - " <td>0.390799</td>\n", - " <td>0.838372</td>\n", - " <td>1.028952</td>\n", - " <td>0.010996</td>\n", - " <td>1.145948</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>(same_srv_rate)</td>\n", - " <td>(class_normal)</td>\n", - " <td>0.622102</td>\n", - " <td>0.533860</td>\n", - " <td>0.509963</td>\n", - " <td>0.819742</td>\n", - " <td>1.535500</td>\n", - " <td>0.177848</td>\n", - " <td>2.585963</td>\n", - " </tr>\n", - " <tr>\n", - " <th>12</th>\n", - " <td>(class_normal)</td>\n", - " <td>(dst_host_same_srv_rate)</td>\n", - " <td>0.533860</td>\n", - " <td>0.498730</td>\n", - " <td>0.432637</td>\n", - " <td>0.810395</td>\n", - " <td>1.624918</td>\n", - " <td>0.166386</td>\n", - " <td>2.643758</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>(dst_host_count)</td>\n", - " <td>(protocol_type_tcp)</td>\n", - " <td>0.643022</td>\n", - " <td>0.814782</td>\n", - " <td>0.516077</td>\n", - " <td>0.802580</td>\n", - " <td>0.985024</td>\n", - " <td>-0.007846</td>\n", - " <td>0.938192</td>\n", - " </tr>\n", - " <tr>\n", - " <th>16</th>\n", - " <td>(class_normal)</td>\n", - " <td>(protocol_type_tcp)</td>\n", - " <td>0.533860</td>\n", - " <td>0.814782</td>\n", - " <td>0.423984</td>\n", - " <td>0.794185</td>\n", - " <td>0.974721</td>\n", - " <td>-0.010996</td>\n", - " <td>0.899924</td>\n", - " </tr>\n", - " <tr>\n", - " <th>7</th>\n", - " <td>(same_srv_rate)</td>\n", - " <td>(dst_host_same_srv_rate)</td>\n", - " <td>0.622102</td>\n", - " <td>0.498730</td>\n", - " <td>0.492339</td>\n", - " <td>0.791411</td>\n", - " <td>1.586854</td>\n", - " <td>0.182078</td>\n", - " <td>2.403153</td>\n", - " </tr>\n", - " <tr>\n", - " <th>10</th>\n", - " <td>(flag_SF)</td>\n", - " <td>(dst_host_same_srv_rate)</td>\n", - " <td>0.594355</td>\n", - " <td>0.498730</td>\n", - " <td>0.469117</td>\n", - " <td>0.789287</td>\n", - " <td>1.582595</td>\n", - " <td>0.172694</td>\n", - " <td>2.378928</td>\n", - " </tr>\n", - " <tr>\n", - " <th>28</th>\n", - " <td>(dst_host_srv_count)</td>\n", - " <td>(protocol_type_tcp)</td>\n", - " <td>0.435773</td>\n", - " <td>0.814782</td>\n", - " <td>0.333519</td>\n", - " <td>0.765349</td>\n", - " <td>0.939329</td>\n", - " <td>-0.021542</td>\n", - " <td>0.789332</td>\n", - " </tr>\n", - " <tr>\n", - " <th>19</th>\n", - " <td>(class_normal)</td>\n", - " <td>(dst_host_srv_count)</td>\n", - " <td>0.533860</td>\n", - " <td>0.435773</td>\n", - " <td>0.401437</td>\n", - " <td>0.751952</td>\n", - " <td>1.725557</td>\n", - " <td>0.168795</td>\n", - " <td>2.274666</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " antecedents consequents antecedent support \\\n", - "21 (logged_in) (protocol_type_tcp) 0.394768 \n", - "13 (dst_host_srv_count) (same_srv_rate) 0.435773 \n", - "8 (dst_host_same_srv_rate) (same_srv_rate) 0.498730 \n", - "23 (logged_in) (flag_SF) 0.394768 \n", - "24 (logged_in) (same_srv_rate) 0.394768 \n", - "15 (dst_host_srv_count) (dst_host_same_srv_rate) 0.435773 \n", - "26 (count) (dst_host_count) 0.364481 \n", - "25 (logged_in) (class_normal) 0.394768 \n", - "4 (class_normal) (same_srv_rate) 0.533860 \n", - "1 (flag_SF) (same_srv_rate) 0.594355 \n", - "17 (dst_host_srv_count) (flag_SF) 0.435773 \n", - "5 (class_normal) (flag_SF) 0.533860 \n", - "9 (dst_host_same_srv_rate) (flag_SF) 0.498730 \n", - "18 (dst_host_srv_count) (class_normal) 0.435773 \n", - "0 (same_srv_rate) (flag_SF) 0.622102 \n", - "11 (dst_host_same_srv_rate) (class_normal) 0.498730 \n", - "14 (dst_host_same_srv_rate) (dst_host_srv_count) 0.498730 \n", - "27 (logged_in) (dst_host_same_srv_rate) 0.394768 \n", - "20 (class_anomaly) (dst_host_count) 0.466140 \n", - "6 (flag_SF) (class_normal) 0.594355 \n", - "22 (class_anomaly) (protocol_type_tcp) 0.466140 \n", - "3 (same_srv_rate) (class_normal) 0.622102 \n", - "12 (class_normal) (dst_host_same_srv_rate) 0.533860 \n", - "2 (dst_host_count) (protocol_type_tcp) 0.643022 \n", - "16 (class_normal) (protocol_type_tcp) 0.533860 \n", - "7 (same_srv_rate) (dst_host_same_srv_rate) 0.622102 \n", - "10 (flag_SF) (dst_host_same_srv_rate) 0.594355 \n", - "28 (dst_host_srv_count) (protocol_type_tcp) 0.435773 \n", - "19 (class_normal) (dst_host_srv_count) 0.533860 \n", - "\n", - " consequent support support confidence lift leverage conviction \n", - "21 0.814782 0.394768 1.000000 1.227321 0.073118 inf \n", - "13 0.622102 0.430891 0.988796 1.589443 0.159795 33.728142 \n", - "8 0.622102 0.492339 0.987186 1.586854 0.182078 29.490107 \n", - "23 0.594355 0.387861 0.982504 1.653058 0.153229 23.184690 \n", - "24 0.622102 0.387702 0.982102 1.578682 0.142116 21.113444 \n", - "15 0.498730 0.426485 0.978685 1.962355 0.209152 23.516858 \n", - "26 0.643022 0.351818 0.965258 1.501129 0.117449 10.275159 \n", - "25 0.533860 0.378533 0.958874 1.796115 0.167782 11.334383 \n", - "4 0.622102 0.509963 0.955238 1.535500 0.177848 8.442437 \n", - "1 0.622102 0.562837 0.946971 1.522212 0.193088 7.126276 \n", - "17 0.594355 0.412234 0.945983 1.591612 0.153230 7.509556 \n", - "5 0.594355 0.502660 0.941557 1.584165 0.185357 6.940859 \n", - "9 0.594355 0.469117 0.940624 1.582595 0.172694 6.831795 \n", - "18 0.533860 0.401437 0.921206 1.725557 0.168795 5.915937 \n", - "0 0.594355 0.562837 0.904735 1.522212 0.193088 4.258046 \n", - "11 0.533860 0.432637 0.867479 1.624918 0.166386 3.517468 \n", - "14 0.435773 0.426485 0.855142 1.962355 0.209152 3.895025 \n", - "27 0.498730 0.337051 0.853796 1.711941 0.140169 3.428564 \n", - "20 0.643022 0.395403 0.848250 1.319163 0.095665 2.352412 \n", - "6 0.533860 0.502660 0.845722 1.584165 0.185357 3.021435 \n", - "22 0.814782 0.390799 0.838372 1.028952 0.010996 1.145948 \n", - "3 0.533860 0.509963 0.819742 1.535500 0.177848 2.585963 \n", - "12 0.498730 0.432637 0.810395 1.624918 0.166386 2.643758 \n", - "2 0.814782 0.516077 0.802580 0.985024 -0.007846 0.938192 \n", - "16 0.814782 0.423984 0.794185 0.974721 -0.010996 0.899924 \n", - "7 0.498730 0.492339 0.791411 1.586854 0.182078 2.403153 \n", - "10 0.498730 0.469117 0.789287 1.582595 0.172694 2.378928 \n", - "28 0.814782 0.333519 0.765349 0.939329 -0.021542 0.789332 \n", - "19 0.435773 0.401437 0.751952 1.725557 0.168795 2.274666 " - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "rules = association_rules(result_desc , metric = \"confidence\" , min_threshold = 0.75)\n", - "rules = rules.sort_values(['confidence','lift'], ascending=[False , False])\n", - "rules" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>antecedents</th>\n", - " <th>consequents</th>\n", - " <th>antecedent support</th>\n", - " <th>consequent support</th>\n", - " <th>support</th>\n", - " <th>confidence</th>\n", - " <th>lift</th>\n", - " <th>leverage</th>\n", - " <th>conviction</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>25</th>\n", - " <td>(logged_in)</td>\n", - " <td>(class_normal)</td>\n", - " <td>0.394768</td>\n", - " <td>0.53386</td>\n", - " <td>0.378533</td>\n", - " <td>0.958874</td>\n", - " <td>1.796115</td>\n", - " <td>0.167782</td>\n", - " <td>11.334383</td>\n", - " </tr>\n", - " <tr>\n", - " <th>18</th>\n", - " <td>(dst_host_srv_count)</td>\n", - " <td>(class_normal)</td>\n", - " <td>0.435773</td>\n", - " <td>0.53386</td>\n", - " <td>0.401437</td>\n", - " <td>0.921206</td>\n", - " <td>1.725557</td>\n", - " <td>0.168795</td>\n", - " <td>5.915937</td>\n", - " </tr>\n", - " <tr>\n", - " <th>11</th>\n", - " <td>(dst_host_same_srv_rate)</td>\n", - " <td>(class_normal)</td>\n", - " <td>0.498730</td>\n", - " <td>0.53386</td>\n", - " <td>0.432637</td>\n", - " <td>0.867479</td>\n", - " <td>1.624918</td>\n", - " <td>0.166386</td>\n", - " <td>3.517468</td>\n", - " </tr>\n", - " <tr>\n", - " <th>6</th>\n", - " <td>(flag_SF)</td>\n", - " <td>(class_normal)</td>\n", - " <td>0.594355</td>\n", - " <td>0.53386</td>\n", - " <td>0.502660</td>\n", - " <td>0.845722</td>\n", - " <td>1.584165</td>\n", - " <td>0.185357</td>\n", - " <td>3.021435</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>(same_srv_rate)</td>\n", - " <td>(class_normal)</td>\n", - " <td>0.622102</td>\n", - " <td>0.53386</td>\n", - " <td>0.509963</td>\n", - " <td>0.819742</td>\n", - " <td>1.535500</td>\n", - " <td>0.177848</td>\n", - " <td>2.585963</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " antecedents consequents antecedent support \\\n", - "25 (logged_in) (class_normal) 0.394768 \n", - "18 (dst_host_srv_count) (class_normal) 0.435773 \n", - "11 (dst_host_same_srv_rate) (class_normal) 0.498730 \n", - "6 (flag_SF) (class_normal) 0.594355 \n", - "3 (same_srv_rate) (class_normal) 0.622102 \n", - "\n", - " consequent support support confidence lift leverage conviction \n", - "25 0.53386 0.378533 0.958874 1.796115 0.167782 11.334383 \n", - "18 0.53386 0.401437 0.921206 1.725557 0.168795 5.915937 \n", - "11 0.53386 0.432637 0.867479 1.624918 0.166386 3.517468 \n", - "6 0.53386 0.502660 0.845722 1.584165 0.185357 3.021435 \n", - "3 0.53386 0.509963 0.819742 1.535500 0.177848 2.585963 " - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "rules[rules['consequents'] == {'class_normal'}]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "interpreter": { - "hash": "201ef9045d1e569690d4ef19acedf47659d611d6c0ceca4c00974770fb24d6e6" - }, - "kernelspec": { - "display_name": "Python 3.8.5 64-bit ('base': conda)", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.5" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} -- GitLab