diff --git a/Machine_Learning.ipynb b/Machine_Learning.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..98e6fa1e753c9d85375ae72dc40364e5fc028ace
--- /dev/null
+++ b/Machine_Learning.ipynb
@@ -0,0 +1,799 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# 구글 드라이브 연결 및 데이터 불러오기"
+      ],
+      "metadata": {
+        "id": "iN0i-sXwwJzr"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "jcYdSR0kD-Py"
+      },
+      "outputs": [],
+      "source": [
+        "# 본인 구글드라이브 연결\n",
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "cd drive/MyDrive/'Colab Notebooks'/데이터마이닝/"
+      ],
+      "metadata": {
+        "id": "faQ8kn45EcaS"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import numpy as np\n",
+        "import pandas as pd\n",
+        "import re\n",
+        "\n",
+        "# 데이터 불러오기\n",
+        "data=pd.read_csv('chatgpt_paraphrases.csv')"
+      ],
+      "metadata": {
+        "id": "2DVrCwEvwggM"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "data.head()\n",
+        "data.dtypes\n",
+        "data"
+      ],
+      "metadata": {
+        "id": "utnWdjs-wtIQ"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# 데이터 전처리 및 테스트 셋 분류"
+      ],
+      "metadata": {
+        "id": "qRtCapbgw6y1"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# 필요한 데이터 뽑아내기 (Collecting Nescessary Data)\n",
+        "category={}\n",
+        "for i in range(len(data)):\n",
+        "    chatgpt=data.iloc[i][\"paraphrases\"][1:-1].split(', ')\n",
+        "    for j in chatgpt[:1]:\n",
+        "        category[j[1:-1]]='chatgpt'\n",
+        "    category[data.iloc[i]['text']]=\"human\"\n",
+        "\n",
+        "# 데이터프레임 형식으로 바꾸기 (Converting Dictionary\n",
+        "data=pd.DataFrame(category.items(),columns=[\"text\",\"category\"])\n",
+        "data=data.sample(frac=1)\n",
+        "\n",
+        "data\n",
+        "\n",
+        "data[\"category\"].value_counts()"
+      ],
+      "metadata": {
+        "id": "P7j6MMy8w-6i"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from sklearn.model_selection import train_test_split\n",
+        "from sklearn.preprocessing import StandardScaler\n",
+        "\n",
+        "# 테스트 셋 분류\n",
+        "X=data['text']\n",
+        "y=data['category']\n",
+        "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n",
+        "\n",
+        "# Tfidf를 사용하여 벡터화 (Vectorizing Using Tfidf)\n",
+        "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+        "vectorizer = TfidfVectorizer()\n",
+        "X_train_tfidf = vectorizer.fit_transform(X_train)\n",
+        "X_test_tfidf = vectorizer.transform(X_test)"
+      ],
+      "metadata": {
+        "id": "0fJxwQVlyjkr"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# 모델 선정"
+      ],
+      "metadata": {
+        "id": "_nafbNL_M4Ub"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "모델 후보들\n",
+        "\n",
+        "KNN(K Nearest Neighbor),\n",
+        "SVC(Support Vector Machines),\n",
+        "RFC(Random Forest Classifier),\n",
+        "DTC(Decision Tree Classifier)\n",
+        "\n",
+        "를 사용할 계획이다."
+      ],
+      "metadata": {
+        "id": "v5uzNIPYx_yo"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## KNN(K Nearest Neighbor)"
+      ],
+      "metadata": {
+        "id": "fgK95wGDyRWi"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from sklearn.neighbors import KNeighborsClassifier\n",
+        "from sklearn.model_selection import GridSearchCV\n",
+        "from sklearn import metrics\n",
+        "\n",
+        "# Hyperparameter tuning using GridSearchCV\n",
+        "param_grid = {'n_neighbors': [1, 3, 5, 7, 9, 11], 'weights': ['uniform', 'distance']}\n",
+        "knn = KNeighborsClassifier()\n",
+        "grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')\n",
+        "grid_search.fit(X_train_tfidf, y_train)\n",
+        "\n",
+        "# Print the results for each hyperparameter combination\n",
+        "results = grid_search.cv_results_\n",
+        "for mean_score, params in zip(results['mean_test_score'], results['params']):\n",
+        "    knn_model = KNeighborsClassifier(**params)\n",
+        "    knn_model.fit(X_train_tfidf, y_train)\n",
+        "    y_pred = knn_model.predict(X_test_tfidf)\n",
+        "\n",
+        "    # Calculate precision, recall, f1-score, and Matthews correlation coefficient\n",
+        "    pre = metrics.precision_score(y_test, y_pred, average='weighted')\n",
+        "    rec = metrics.recall_score(y_test, y_pred, average='weighted')\n",
+        "    f1 = metrics.f1_score(y_test, y_pred, average='weighted')\n",
+        "    mcc = metrics.matthews_corrcoef(y_test, y_pred)\n",
+        "\n",
+        "    print(f\"Hyperparameters: {params}\")\n",
+        "    print(\"  Accuracy:\", mean_score)\n",
+        "    print(\"  Precision:\", pre)\n",
+        "    print(\"  Recall:\", rec)\n",
+        "    print(\"  F1 Score:\", f1)\n",
+        "    print(\"  Matthews Correlation Coefficient:\", mcc)\n",
+        "    print(\"-----------\")\n",
+        "\n",
+        "# Print the best hyperparameters\n",
+        "print(\"\\nBest Hyperparameters:\", grid_search.best_params_)\n",
+        "\n",
+        "# Evaluate the model with the best hyperparameters on the test set\n",
+        "best_knn = grid_search.best_estimator_\n",
+        "y_pred = best_knn.predict(X_test_tfidf)\n",
+        "\n",
+        "# Calculate accuracy, precision, recall, f1-score, and Matthews correlation coefficient\n",
+        "acc = metrics.accuracy_score(y_test, y_pred)\n",
+        "pre = metrics.precision_score(y_test, y_pred, average='weighted')\n",
+        "rec = metrics.recall_score(y_test, y_pred, average='weighted')\n",
+        "f1 = metrics.f1_score(y_test, y_pred, average='weighted')\n",
+        "mcc = metrics.matthews_corrcoef(y_test, y_pred)\n",
+        "\n",
+        "# Print the evaluation metrics\n",
+        "print(\"\\nEvaluation Metrics:\")\n",
+        "print(\"Accuracy:\", acc)\n",
+        "print(\"Precision:\", pre)\n",
+        "print(\"Recall:\", rec)\n",
+        "print(\"F1 Score:\", f1)\n",
+        "print(\"Matthews Correlation Coefficient:\", mcc)\n"
+      ],
+      "metadata": {
+        "id": "8_W7qSlNb1Hg"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "n_neighbors 숫자가 커질수록 mcc 값이 커지는것을 확인"
+      ],
+      "metadata": {
+        "id": "aoq-RQmGCAOX"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from sklearn.neighbors import KNeighborsClassifier\n",
+        "from sklearn.model_selection import GridSearchCV\n",
+        "from sklearn import metrics\n",
+        "\n",
+        "# Hyperparameter tuning using GridSearchCV\n",
+        "param_grid = {'n_neighbors': list(range(1,101)), 'weights': ['uniform', 'distance']}\n",
+        "knn = KNeighborsClassifier()\n",
+        "grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')\n",
+        "grid_search.fit(X_train_tfidf, y_train)\n",
+        "\n",
+        "# Print the results for each hyperparameter combination\n",
+        "results = grid_search.cv_results_\n",
+        "for mean_score, params in zip(results['mean_test_score'], results['params']):\n",
+        "    knn_model = KNeighborsClassifier(**params)\n",
+        "    knn_model.fit(X_train_tfidf, y_train)\n",
+        "    y_pred = knn_model.predict(X_test_tfidf)\n",
+        "\n",
+        "    # Calculate precision, recall, f1-score, and Matthews correlation coefficient\n",
+        "    pre = metrics.precision_score(y_test, y_pred, average='weighted')\n",
+        "    rec = metrics.recall_score(y_test, y_pred, average='weighted')\n",
+        "    f1 = metrics.f1_score(y_test, y_pred, average='weighted')\n",
+        "    mcc = metrics.matthews_corrcoef(y_test, y_pred)\n",
+        "\n",
+        "    print(f\"Hyperparameters: {params}\")\n",
+        "    print(\"  Accuracy:\", mean_score)\n",
+        "    print(\"  Precision:\", pre)\n",
+        "    print(\"  Recall:\", rec)\n",
+        "    print(\"  F1 Score:\", f1)\n",
+        "    print(\"  Matthews Correlation Coefficient:\", mcc)\n",
+        "    print(\"-----------\")\n",
+        "\n",
+        "# Print the best hyperparameters\n",
+        "print(\"\\nBest Hyperparameters:\", grid_search.best_params_)\n",
+        "\n",
+        "# Evaluate the model with the best hyperparameters on the test set\n",
+        "best_knn = grid_search.best_estimator_\n",
+        "y_pred = best_knn.predict(X_test_tfidf)\n",
+        "\n",
+        "# Calculate accuracy, precision, recall, f1-score, and Matthews correlation coefficient\n",
+        "acc = metrics.accuracy_score(y_test, y_pred)\n",
+        "pre = metrics.precision_score(y_test, y_pred, average='weighted')\n",
+        "rec = metrics.recall_score(y_test, y_pred, average='weighted')\n",
+        "f1 = metrics.f1_score(y_test, y_pred, average='weighted')\n",
+        "mcc = metrics.matthews_corrcoef(y_test, y_pred)\n",
+        "\n",
+        "# Print the evaluation metrics\n",
+        "print(\"\\nEvaluation Metrics:\")\n",
+        "print(\"Accuracy:\", acc)\n",
+        "print(\"Precision:\", pre)\n",
+        "print(\"Recall:\", rec)\n",
+        "print(\"F1 Score:\", f1)\n",
+        "print(\"Matthews Correlation Coefficient:\", mcc)\n"
+      ],
+      "metadata": {
+        "id": "0CGjPHCkby2q"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Best Hyperparameters: {'n_neighbors': 43, 'weights': 'distance'}\n",
+        "\n",
+        "Evaluation Metrics:\n",
+        "Accuracy: 0.72\n",
+        "Precision: 0.7218863015823609\n",
+        "Recall: 0.72\n",
+        "F1 Score: 0.7190533965955703\n",
+        "Matthews Correlation Coefficient: 0.4413253711772089"
+      ],
+      "metadata": {
+        "id": "srDAcfxLB74i"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## SVC(Support Vector Machines)"
+      ],
+      "metadata": {
+        "id": "IK5bBh1z00PG"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from sklearn.svm import SVC\n",
+        "from sklearn.model_selection import GridSearchCV\n",
+        "from sklearn import metrics\n",
+        "\n",
+        "# Hyperparameter tuning using GridSearchCV\n",
+        "param_grid = {'C': [0.01, 0.1, 1, 10, 100, 1000], 'kernel': ['linear', 'rbf']}\n",
+        "svc = SVC()\n",
+        "grid_search = GridSearchCV(svc, param_grid, cv=5, scoring='accuracy')\n",
+        "grid_search.fit(X_train_tfidf, y_train)\n",
+        "\n",
+        "# Print the results for each hyperparameter combination\n",
+        "results = grid_search.cv_results_\n",
+        "for mean_score, params in zip(results['mean_test_score'], results['params']):\n",
+        "    svc_model = SVC(**params)\n",
+        "    svc_model.fit(X_train_tfidf, y_train)\n",
+        "    y_pred = svc_model.predict(X_test_tfidf)\n",
+        "\n",
+        "    # Calculate precision, recall, f1-score, and Matthews correlation coefficient\n",
+        "    pre = metrics.precision_score(y_test, y_pred, average='weighted')\n",
+        "    rec = metrics.recall_score(y_test, y_pred, average='weighted')\n",
+        "    f1 = metrics.f1_score(y_test, y_pred, average='weighted')\n",
+        "    mcc = metrics.matthews_corrcoef(y_test, y_pred)\n",
+        "\n",
+        "    print(f\"Hyperparameters: {params}\")\n",
+        "    print(\"  Accuracy:\", mean_score)\n",
+        "    print(\"  Precision:\", pre)\n",
+        "    print(\"  Recall:\", rec)\n",
+        "    print(\"  F1 Score:\", f1)\n",
+        "    print(\"  Matthews Correlation Coefficient:\", mcc)\n",
+        "    print(\"-----------\")\n",
+        "\n",
+        "# Print the best hyperparameters\n",
+        "print(\"\\nBest Hyperparameters:\", grid_search.best_params_)\n",
+        "\n",
+        "# Evaluate the model with the best hyperparameters on the test set\n",
+        "best_svc = grid_search.best_estimator_\n",
+        "y_pred = best_svc.predict(X_test_tfidf)\n",
+        "\n",
+        "# Calculate accuracy, precision, recall, f1-score, and Matthews correlation coefficient\n",
+        "acc = metrics.accuracy_score(y_test, y_pred)\n",
+        "pre = metrics.precision_score(y_test, y_pred, average='weighted')\n",
+        "rec = metrics.recall_score(y_test, y_pred, average='weighted')\n",
+        "f1 = metrics.f1_score(y_test, y_pred, average='weighted')\n",
+        "mcc = metrics.matthews_corrcoef(y_test, y_pred)\n",
+        "\n",
+        "# Print the evaluation metrics\n",
+        "print(\"\\nEvaluation Metrics:\")\n",
+        "print(\"Accuracy:\", acc)\n",
+        "print(\"Precision:\", pre)\n",
+        "print(\"Recall:\", rec)\n",
+        "print(\"F1 Score:\", f1)\n",
+        "print(\"Matthews Correlation Coefficient:\", mcc)\n"
+      ],
+      "metadata": {
+        "id": "8yBxUmHC06bR"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Best Hyperparameters: {'C': 10, 'kernel': 'rbf'}\n",
+        "\n",
+        "Evaluation Metrics:\n",
+        "Accuracy: 0.7625\n",
+        "Precision: 0.7636107310458006\n",
+        "Recall: 0.7625\n",
+        "F1 Score: 0.7619170211585666\n",
+        "Matthews Correlation Coefficient: 0.5253533759188264"
+      ],
+      "metadata": {
+        "id": "IpOyMN_ZpI18"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## RFC(Random Forest Classifier)"
+      ],
+      "metadata": {
+        "id": "T1XYF1NDpOvg"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from sklearn.ensemble import RandomForestClassifier\n",
+        "from sklearn.model_selection import GridSearchCV\n",
+        "from sklearn import metrics\n",
+        "\n",
+        "# Hyperparameter tuning using GridSearchCV\n",
+        "param_grid = {'n_estimators': [50, 100, 200], 'max_depth': [None, 10, 20, 30], 'min_samples_split': [2, 5, 10], 'min_samples_leaf': [1, 2, 4]}\n",
+        "rfc = RandomForestClassifier()\n",
+        "grid_search = GridSearchCV(rfc, param_grid, cv=5, scoring='accuracy')\n",
+        "grid_search.fit(X_train_tfidf, y_train)\n",
+        "\n",
+        "# Print the results for each hyperparameter combination\n",
+        "results = grid_search.cv_results_\n",
+        "for mean_score, params in zip(results['mean_test_score'], results['params']):\n",
+        "    rfc_model = RandomForestClassifier(**params)\n",
+        "    rfc_model.fit(X_train_tfidf, y_train)\n",
+        "    y_pred = rfc_model.predict(X_test_tfidf)\n",
+        "\n",
+        "    # Calculate precision, recall, f1-score, and Matthews correlation coefficient\n",
+        "    pre = metrics.precision_score(y_test, y_pred, average='weighted')\n",
+        "    rec = metrics.recall_score(y_test, y_pred, average='weighted')\n",
+        "    f1 = metrics.f1_score(y_test, y_pred, average='weighted')\n",
+        "    mcc = metrics.matthews_corrcoef(y_test, y_pred)\n",
+        "\n",
+        "    print(f\"Hyperparameters: {params}\")\n",
+        "    print(\"  Accuracy:\", mean_score)\n",
+        "    print(\"  Precision:\", pre)\n",
+        "    print(\"  Recall:\", rec)\n",
+        "    print(\"  F1 Score:\", f1)\n",
+        "    print(\"  Matthews Correlation Coefficient:\", mcc)\n",
+        "    print(\"-----------\")\n",
+        "\n",
+        "# Print the best hyperparameters\n",
+        "print(\"\\nBest Hyperparameters:\", grid_search.best_params_)\n",
+        "\n",
+        "# Evaluate the model with the best hyperparameters on the test set\n",
+        "best_rfc = grid_search.best_estimator_\n",
+        "y_pred = best_rfc.predict(X_test_tfidf)\n",
+        "\n",
+        "# Calculate accuracy, precision, recall, f1-score, and Matthews correlation coefficient\n",
+        "acc = metrics.accuracy_score(y_test, y_pred)\n",
+        "pre = metrics.precision_score(y_test, y_pred, average='weighted')\n",
+        "rec = metrics.recall_score(y_test, y_pred, average='weighted')\n",
+        "f1 = metrics.f1_score(y_test, y_pred, average='weighted')\n",
+        "mcc = metrics.matthews_corrcoef(y_test, y_pred)\n",
+        "\n",
+        "# Print the evaluation metrics\n",
+        "print(\"\\nEvaluation Metrics:\")\n",
+        "print(\"Accuracy:\", acc)\n",
+        "print(\"Precision:\", pre)\n",
+        "print(\"Recall:\", rec)\n",
+        "print(\"F1 Score:\", f1)\n",
+        "print(\"Matthews Correlation Coefficient:\", mcc)\n"
+      ],
+      "metadata": {
+        "id": "e8-EauavpWGM"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## DTC(Decision Tree Classifier)"
+      ],
+      "metadata": {
+        "id": "6K6-8qFbpktL"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from sklearn.tree import DecisionTreeClassifier\n",
+        "from sklearn.model_selection import GridSearchCV\n",
+        "from sklearn import metrics\n",
+        "\n",
+        "# Hyperparameter tuning using GridSearchCV\n",
+        "param_grid = {'criterion': ['gini', 'entropy'], 'max_depth': [None, 10, 20, 30], 'min_samples_split': [2, 5, 10], 'min_samples_leaf': [1, 2, 4]}\n",
+        "dtc = DecisionTreeClassifier()\n",
+        "grid_search = GridSearchCV(dtc, param_grid, cv=5, scoring='accuracy')\n",
+        "grid_search.fit(X_train_tfidf, y_train)\n",
+        "\n",
+        "# Print the results for each hyperparameter combination\n",
+        "results = grid_search.cv_results_\n",
+        "for mean_score, params in zip(results['mean_test_score'], results['params']):\n",
+        "    dtc_model = DecisionTreeClassifier(**params)\n",
+        "    dtc_model.fit(X_train_tfidf, y_train)\n",
+        "    y_pred = dtc_model.predict(X_test_tfidf)\n",
+        "\n",
+        "    # Calculate precision, recall, f1-score, and Matthews correlation coefficient\n",
+        "    pre = metrics.precision_score(y_test, y_pred, average='weighted')\n",
+        "    rec = metrics.recall_score(y_test, y_pred, average='weighted')\n",
+        "    f1 = metrics.f1_score(y_test, y_pred, average='weighted')\n",
+        "    mcc = metrics.matthews_corrcoef(y_test, y_pred)\n",
+        "\n",
+        "    print(f\"Hyperparameters: {params}\")\n",
+        "    print(\"  Accuracy:\", mean_score)\n",
+        "    print(\"  Precision:\", pre)\n",
+        "    print(\"  Recall:\", rec)\n",
+        "    print(\"  F1 Score:\", f1)\n",
+        "    print(\"  Matthews Correlation Coefficient:\", mcc)\n",
+        "    print(\"-----------\")\n",
+        "\n",
+        "# Print the best hyperparameters\n",
+        "print(\"\\nBest Hyperparameters:\", grid_search.best_params_)\n",
+        "\n",
+        "# Evaluate the model with the best hyperparameters on the test set\n",
+        "best_dtc = grid_search.best_estimator_\n",
+        "y_pred = best_dtc.predict(X_test_tfidf)\n",
+        "\n",
+        "# Calculate accuracy, precision, recall, f1-score, and Matthews correlation coefficient\n",
+        "acc = metrics.accuracy_score(y_test, y_pred)\n",
+        "pre = metrics.precision_score(y_test, y_pred, average='weighted')\n",
+        "rec = metrics.recall_score(y_test, y_pred, average='weighted')\n",
+        "f1 = metrics.f1_score(y_test, y_pred, average='weighted')\n",
+        "mcc = metrics.matthews_corrcoef(y_test, y_pred)\n",
+        "\n",
+        "# Print the evaluation metrics\n",
+        "print(\"\\nEvaluation Metrics:\")\n",
+        "print(\"Accuracy:\", acc)\n",
+        "print(\"Precision:\", pre)\n",
+        "print(\"Recall:\", rec)\n",
+        "print(\"F1 Score:\", f1)\n",
+        "print(\"Matthews Correlation Coefficient:\", mcc)\n"
+      ],
+      "metadata": {
+        "id": "-MclX444pkCF"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# 최종 선택한 모델 교차 검증 수행"
+      ],
+      "metadata": {
+        "id": "6EgVN-BHGjjo"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from sklearn.ensemble import RandomForestClassifier\n",
+        "from sklearn.model_selection import GridSearchCV\n",
+        "from sklearn import metrics\n",
+        "\n",
+        "# Hyperparameter tuning using GridSearchCV\n",
+        "param_grid = {'n_estimators': [200], 'max_depth': [None], 'min_samples_split': [5], 'min_samples_leaf': [4]}\n",
+        "rfc = RandomForestClassifier()\n",
+        "grid_search = GridSearchCV(rfc, param_grid, cv=5, scoring='accuracy')\n",
+        "grid_search.fit(X_train_tfidf, y_train)\n",
+        "\n",
+        "# Print the results for each hyperparameter combination\n",
+        "results = grid_search.cv_results_\n",
+        "for mean_score, params in zip(results['mean_test_score'], results['params']):\n",
+        "    rfc_model = RandomForestClassifier(**params)\n",
+        "    rfc_model.fit(X_train_tfidf, y_train)\n",
+        "    y_pred = rfc_model.predict(X_test_tfidf)\n",
+        "\n",
+        "    # Calculate precision, recall, f1-score, and Matthews correlation coefficient\n",
+        "    pre = metrics.precision_score(y_test, y_pred, average='weighted')\n",
+        "    rec = metrics.recall_score(y_test, y_pred, average='weighted')\n",
+        "    f1 = metrics.f1_score(y_test, y_pred, average='weighted')\n",
+        "    mcc = metrics.matthews_corrcoef(y_test, y_pred)\n",
+        "\n",
+        "    print(f\"Hyperparameters: {params}\")\n",
+        "    print(\"  Accuracy:\", mean_score)\n",
+        "    print(\"  Precision:\", pre)\n",
+        "    print(\"  Recall:\", rec)\n",
+        "    print(\"  F1 Score:\", f1)\n",
+        "    print(\"  Matthews Correlation Coefficient:\", mcc)\n",
+        "    print(\"-----------\")\n",
+        "\n",
+        "# Print the best hyperparameters\n",
+        "print(\"\\nBest Hyperparameters:\", grid_search.best_params_)\n",
+        "\n",
+        "# Evaluate the model with the best hyperparameters on the test set\n",
+        "best_rfc = grid_search.best_estimator_\n",
+        "y_pred = best_rfc.predict(X_test_tfidf)\n",
+        "\n",
+        "# Calculate accuracy, precision, recall, f1-score, and Matthews correlation coefficient\n",
+        "acc = metrics.accuracy_score(y_test, y_pred)\n",
+        "pre = metrics.precision_score(y_test, y_pred, average='weighted')\n",
+        "rec = metrics.recall_score(y_test, y_pred, average='weighted')\n",
+        "f1 = metrics.f1_score(y_test, y_pred, average='weighted')\n",
+        "mcc = metrics.matthews_corrcoef(y_test, y_pred)\n",
+        "\n",
+        "# Print the evaluation metrics\n",
+        "print(\"\\nEvaluation Metrics:\")\n",
+        "print(\"Accuracy:\", acc)\n",
+        "print(\"Precision:\", pre)\n",
+        "print(\"Recall:\", rec)\n",
+        "print(\"F1 Score:\", f1)\n",
+        "print(\"Matthews Correlation Coefficient:\", mcc)\n"
+      ],
+      "metadata": {
+        "id": "LhvIh4Z4Gi5V"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from sklearn.model_selection import cross_val_score, KFold\n",
+        "\n",
+        "# Define the selected Random Forest Classifier with the best hyperparameters\n",
+        "best_rfc = RandomForestClassifier(n_estimators=200, max_depth=None, min_samples_split=5, min_samples_leaf=1)\n",
+        "\n",
+        "# Define the number of folds for K-fold cross-validation\n",
+        "n_folds = 5\n",
+        "\n",
+        "# Create a KFold object\n",
+        "kf = KFold(n_splits=n_folds, shuffle=True, random_state=42)\n",
+        "\n",
+        "# Perform K-fold cross-validation\n",
+        "cv_results = cross_val_score(best_rfc, X_train_tfidf, y_train, cv=kf, scoring='accuracy')\n",
+        "\n",
+        "# Print the accuracy for each fold\n",
+        "for i, acc in enumerate(cv_results, start=1):\n",
+        "    print(f\"Fold {i} Accuracy: {acc:.3f}\")\n",
+        "\n",
+        "# Print the average accuracy across all folds\n",
+        "print(f\"\\nAverage Accuracy across {n_folds} Folds: {cv_results.mean():.3f}\")\n"
+      ],
+      "metadata": {
+        "id": "ybNR8dB3IvyN"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "rfc.fit(X_train_tfidf,y_train)"
+      ],
+      "metadata": {
+        "id": "a6Ewr-8zPY1o"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# 결과 Confusion & ROC"
+      ],
+      "metadata": {
+        "id": "RqnkhfwfKepf"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from sklearn.metrics import confusion_matrix\n",
+        "y_pred =rfc.predict(X_test_tfidf)\n",
+        "cm = confusion_matrix(y_test, y_pred)\n",
+        "print(cm)\n",
+        "y_test.value_counts()\n",
+        "\n",
+        "import seaborn as sn\n",
+        "import pandas as pd\n",
+        "import matplotlib.pyplot as plt\n",
+        "df_cm = pd.DataFrame(cm, index = [i for i in [\"ChatGPT\",\"Human\"]],\n",
+        "                  columns = [i for i in [\"ChatGPT\",\"Human\"]])\n",
+        "plt.figure(figsize = (10,7))\n",
+        "sn.heatmap(df_cm, annot=True,cmap=\"YlGnBu\", fmt='g')"
+      ],
+      "metadata": {
+        "id": "n96jTKA6KcwS"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from sklearn.metrics import roc_curve,auc\n",
+        "y_prob = rfc.predict_proba(X_test_tfidf)[:, 1]\n",
+        "\n",
+        "fpr, tpr, thresholds = roc_curve(y_test, y_prob, pos_label='human')\n",
+        "\n",
+        "# Calculate the area under the ROC curve\n",
+        "roc_auc = auc(fpr, tpr)\n",
+        "\n",
+        "# Plot the ROC curve\n",
+        "plt.plot(fpr, tpr, color='blue', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)\n",
+        "plt.plot([0, 1], [0, 1], color='black', lw=2, linestyle='--')\n",
+        "plt.xlim([0.0, 1.0])\n",
+        "plt.ylim([0.0, 1.05])\n",
+        "plt.xlabel('False Positive Rate')\n",
+        "plt.ylabel('True Positive Rate')\n",
+        "plt.title('Receiver Operating Characteristic')\n",
+        "plt.legend(loc=\"lower right\")\n",
+        "plt.show()"
+      ],
+      "metadata": {
+        "id": "YTuhwiPmPyYo"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# 사용해보기"
+      ],
+      "metadata": {
+        "id": "SOlX7rEtJT5X"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def predict_text_category(model, text):\n",
+        "    text_vectorized = vectorizer.transform([text])\n",
+        "    prediction_prob = model.predict_proba(text_vectorized)\n",
+        "    predicted_class_idx = np.argmax(prediction_prob)\n",
+        "    unique_class_labels = np.unique(y_train)\n",
+        "    predicted_category = unique_class_labels[predicted_class_idx]\n",
+        "    return predicted_category"
+      ],
+      "metadata": {
+        "id": "-A0kdamK8H1s"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "text_to_predict = \"Waves pound across a stone jetty. A MAN sits fishing whilehis young son, BRANDO strolls toward the open sea. He pokesat rocks and seaweed with a fishing pole. He glances down atSomething wedged between the rocks beneath his feet. He pokesat it.\"\n",
+        "predicted_category = predict_text_category(rfc, text_to_predict)\n",
+        "print(\"Predicted Category:\", predicted_category)"
+      ],
+      "metadata": {
+        "id": "-I4AFZTs7nk3"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "text_to_predict = \"Arthur brushes past Mal, shaking his head. She nears Cobb. Looks out at the DROP.\"\n",
+        "predicted_category = predict_text_category(rfc, text_to_predict)\n",
+        "print(\"Predicted Category:\", predicted_category)"
+      ],
+      "metadata": {
+        "id": "D0h_FVDf96hm"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "text_to_predict = \"A script is a list of programmatically-written instructions that can be carried out on command.\"\n",
+        "predicted_category = predict_text_category(rfc, text_to_predict)\n",
+        "print(\"Predicted Category:\", predicted_category)"
+      ],
+      "metadata": {
+        "id": "y0aYPEJF-Gtk"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "text_to_predict = \"A controller is an individual who has responsibility for all accounting-related activities, including high-level accounting, managerial accounting, and finance activities, within a company.\"\n",
+        "predicted_category = predict_text_category(rfc, text_to_predict)\n",
+        "print(\"Predicted Category:\", predicted_category)"
+      ],
+      "metadata": {
+        "id": "4JYiO7P4-XCx"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file