diff --git a/21_2DM_YouTubeDislike_1.ipynb b/21_2DM_YouTubeDislike_1.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..ba820d1b7fbe52c291c182071e92bc2f67cc6ba5 --- /dev/null +++ b/21_2DM_YouTubeDislike_1.ipynb @@ -0,0 +1,2051 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "21-2DM_TeamProject_YouTubeDislike.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "code", + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/gdrive')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vQJ0kj57vTnG", + "outputId": "e2d2444e-431c-4a3c-f791-a8d73278462a" + }, + "execution_count": 47, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount(\"/content/gdrive\", force_remount=True).\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# 1. Data Collection & 2. Data Preprocessing(Cleaning and Integration)" + ], + "metadata": { + "id": "gmdvXi4AvUnz" + } + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": { + "id": "_rX1nfJcWnW6" + }, + "outputs": [], + "source": [ + "import pandas\n", + "import re\n", + "import random\n", + "from googleapiclient.discovery import build\n", + "\n", + "\n", + "def cleansing(text):\n", + " pattern = '[0-9]+:[0-9]+' # comment timestamp\n", + " text = re.sub(pattern=pattern, repl='', string=text)\n", + " pattern = '([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\\.[a-zA-Z0-9-.]+)' # E-mail제거\n", + " text = re.sub(pattern=pattern, repl='', string=text)\n", + " pattern = '(http|ftp|https)://(?:[-\\w.]|(?:%[\\da-fA-F]{2}))+' # URL제거\n", + " text = re.sub(pattern=pattern, repl='', string=text)\n", + " pattern = '([ㄱ-ㅎㅏ-ㅣ]+)' # 한글 자음, 모음 제거\n", + " text = re.sub(pattern=pattern, repl='', string=text)\n", + " pattern = '<[^>]*>' # HTML 태그 제거\n", + " text = re.sub(pattern=pattern, repl='', string=text)\n", + " pattern = '[^\\w\\s]' # 특수기호제거\n", + " text = re.sub(pattern=pattern, repl='', string=text)\n", + " return text\n", + "\n", + "def collection(videoId, outputName):\n", + " # key\n", + " _developerKey = \"\"\n", + " _videoId = videoId # 테스트 할 영상 ID\n", + "\n", + " # get video information\n", + " comments = list()\n", + " api_obj = build('youtube', 'v3', developerKey=_developerKey)\n", + " video_info = api_obj.videos().list(part='statistics, snippet', id=_videoId).execute()\n", + "\n", + " print(\"\\n==========\")\n", + " print(\"title: \", video_info['items'][0]['snippet']['title'])\n", + " print(\"view: \", video_info['items'][0]['statistics']['viewCount'])\n", + " print(\"like: \", video_info['items'][0]['statistics']['likeCount'], \" / dislike: \", video_info['items'][0]['statistics']['dislikeCount'])\n", + "\n", + " response = api_obj.commentThreads().list(part='snippet,replies', videoId=_videoId, maxResults=100, textFormat='plainText').execute()\n", + "\n", + " while response:\n", + " for item in response['items']:\n", + " comment = item['snippet']['topLevelComment']['snippet']\n", + " text = cleansing(comment['textDisplay']).strip()\n", + " if text and (not text.isspace()):\n", + " comments.append(text)\n", + "\n", + " if len(comments) >= 10000:\n", + " break\n", + "\n", + " if 'nextPageToken' in response:\n", + " response = api_obj.commentThreads().list(part='snippet,replies', videoId=_videoId,\n", + " pageToken=response['nextPageToken'], maxResults=100).execute()\n", + " else:\n", + " break\n", + "\n", + " if len(comments) > 1000:\n", + " comments = random.sample(comments, 1000)\n", + "\n", + " print(len(comments))\n", + " print(\"실제 좋싫비: \", int(video_info['items'][0]['statistics']['likeCount']) / (int(video_info['items'][0]['statistics']['likeCount']) + int(video_info['items'][0]['statistics']['dislikeCount'])))\n", + "\n", + "\n", + " df = pandas.DataFrame(comments)\n", + " df.to_excel(outputName+'_data.xlsx', header=False, index=None)\n", + " return video_info['items'][0]['statistics']['likeCount']" + ] + }, + { + "cell_type": "code", + "source": [ + "count_like = collection(\"gSIQM6cipNs\", \"results\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "i1OF_UGMVFCW", + "outputId": "ff59c819-5889-4fd1-9b40-ad22738b3a8c" + }, + "execution_count": 49, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "title: 펄어비스 도깨비 - 첫 공개 게임플레이\n", + "view: 1649019\n", + "like: 19846 / dislike: 444\n", + "1000\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# 2. Data Preprocessing (Feature Extraction)" + ], + "metadata": { + "id": "ryfjao5Jvv81" + } + }, + { + "cell_type": "markdown", + "metadata": { + "id": "i45d7E0L8bZ_" + }, + "source": [ + "### 2-1. 준비 사항" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "WkAHQrj2Vjbl", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "18092b9a-69f1-4908-8706-e81e2daea5d8" + }, + "source": [ + "# Hugging Face의 트랜스포머 모델을 설치\n", + "!pip install transformers" + ], + "execution_count": 50, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: transformers in /usr/local/lib/python3.7/dist-packages (4.12.5)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (1.19.5)\n", + "Requirement already satisfied: sacremoses in /usr/local/lib/python3.7/dist-packages (from transformers) (0.0.46)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers) (2.23.0)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.7/dist-packages (from transformers) (21.3)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.7/dist-packages (from transformers) (6.0)\n", + "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from transformers) (4.8.2)\n", + "Requirement already satisfied: huggingface-hub<1.0,>=0.1.0 in /usr/local/lib/python3.7/dist-packages (from transformers) (0.2.1)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers) (3.4.0)\n", + "Requirement already satisfied: tokenizers<0.11,>=0.10.1 in /usr/local/lib/python3.7/dist-packages (from transformers) (0.10.3)\n", + "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers) (4.62.3)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (2019.12.20)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0,>=0.1.0->transformers) (3.10.0.2)\n", + "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.0->transformers) (3.0.6)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->transformers) (3.6.0)\n", + "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2.10)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2021.10.8)\n", + "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (3.0.4)\n", + "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (1.24.3)\n", + "Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.1.0)\n", + "Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.15.0)\n", + "Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (7.1.2)\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "75dIz2fNWG8F" + }, + "source": [ + "import tensorflow as tf\n", + "import torch\n", + "\n", + "from transformers import BertTokenizer\n", + "from transformers import BertForSequenceClassification, AdamW, BertConfig\n", + "from transformers import get_linear_schedule_with_warmup\n", + "from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler\n", + "from keras.preprocessing.sequence import pad_sequences\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score\n", + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "import random\n", + "import time\n", + "import datetime" + ], + "execution_count": 51, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "h_U3uMySBCIV" + }, + "source": [ + "### 2-2. 데이터 로드" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ImBtAkSyTW1r", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "c093b573-85a8-4e5c-d7cb-811f1837e318" + }, + "source": [ + "# 네이버 영화리뷰 감정분석 데이터 다운로드\n", + "!git clone https://github.com/bab2min/corpus.git\n", + "\n", + "# 디렉토리의 파일 목록\n", + "!ls corpus/sentiment -la" + ], + "execution_count": 52, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "fatal: destination path 'corpus' already exists and is not an empty directory.\n", + "total 31520\n", + "drwxr-xr-x 2 root root 4096 Dec 9 05:17 .\n", + "drwxr-xr-x 4 root root 4096 Dec 9 05:17 ..\n", + "-rw-r--r-- 1 root root 20623547 Dec 9 05:17 naver_shopping.txt\n", + "-rw-r--r-- 1 root root 4617 Dec 9 05:17 README.md\n", + "-rw-r--r-- 1 root root 11631607 Dec 9 05:17 steam.txt\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "0LPEdb2tWfIU", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "ad8dd131-74a8-46ae-dd07-d41f3cdb5ce0" + }, + "source": [ + "# 데이터 로드\n", + "total_data = pd.read_table('corpus/sentiment/steam.txt',names=['label','reviews'])\n", + "print('전체 리뷰 개수 : ',len(total_data))" + ], + "execution_count": 53, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "전체 리뷰 개수 : 100000\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "tejY9ZhABYWl" + }, + "source": [ + "# # 훈련셋의 앞부분 출력\n", + "# total_data.head(10)\n", + "\n", + "# # total_data['label'] = np.select([total_data.ratings > 3], [1], default=0)\n", + "# total_data[:5]" + ], + "execution_count": 54, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "C6SsoAb-ZLpZ", + "outputId": "6ef80c02-f484-4009-bb6c-c7928966c787" + }, + "source": [ + "# 총 데이터\n", + "total_data['reviews'].nunique(), total_data['label'].nunique()\n", + "total_data.drop_duplicates(subset=['reviews'], inplace=True) # reviews 열에서 중복인 내용이 있다면 중복 제거\n", + "print('총 샘플의 수 :',len(total_data))" + ], + "execution_count": 55, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "총 샘플의 수 : 99892\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# 훈련 데이터와 테스트 데이터를 3:1 비율로 분리\n", + "train_data, test_data = train_test_split(total_data, test_size = 0.25, random_state = 42)\n", + "print('훈련용 리뷰의 개수 :', len(train_data))\n", + "print('테스트용 리뷰의 개수 :', len(test_data))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "PC0eCj-nfnDZ", + "outputId": "c2fe0db4-c4a9-4515-995f-aec8bfb931b7" + }, + "execution_count": 56, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "훈련용 리뷰의 개수 : 74919\n", + "테스트용 리뷰의 개수 : 24973\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XgjMzosCDD35" + }, + "source": [ + "### 2-3. 전처리 - 훈련셋" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "2GoESQ0jbybJ" + }, + "source": [ + "# 리뷰 문장 추출\n", + "sentences = train_data['reviews']\n", + "#sentences[:10]" + ], + "execution_count": 57, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# BERT의 입력 형식에 맞게 변환\n", + "sentences = [\"[CLS] \" + str(sentence) + \" [SEP]\" for sentence in sentences]\n", + "#sentences[:10]" + ], + "metadata": { + "id": "tRLpY5OgfsiG" + }, + "execution_count": 58, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "Ww1Or7bUoj2K" + }, + "source": [ + "# import matplotlib.pyplot as plt\n", + "\n", + "# fig,(ax1,ax2) = plt.subplots(1,2,figsize=(10,5))\n", + "# text_len = train_data[train_data['label']==1]['tokenized'].map(lambda x: len(x))\n", + "# ax1.hist(text_len, color='red')\n", + "# ax1.set_title('Positive Reviews')\n", + "# ax1.set_xlabel('length of samples')\n", + "# ax1.set_ylabel('number of samples')\n", + "# print('긍정 리뷰의 평균 길이 :', np.mean(text_len))\n", + "\n", + "# text_len = train_data[train_data['label']==0]['tokenized'].map(lambda x: len(x))\n", + "# ax2.hist(text_len, color='blue')\n", + "# ax2.set_title('Negative Reviews')\n", + "# fig.suptitle('Words in texts')\n", + "# ax2.set_xlabel('length of samples')\n", + "# ax2.set_ylabel('number of samples')\n", + "# print('부정 리뷰의 평균 길이 :', np.mean(text_len))\n", + "# plt.show()" + ], + "execution_count": 59, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "7hBblIVQcXJR", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "58029075-b9f5-4e2a-aab3-f7353f82e00e" + }, + "source": [ + "# 라벨 추출\n", + "labels = train_data['label'].values\n", + "labels" + ], + "execution_count": 60, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([1, 0, 0, ..., 1, 0, 1])" + ] + }, + "metadata": {}, + "execution_count": 60 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "PwEplfDvcnZG" + }, + "source": [ + "# BERT의 토크나이저로 문장을 토큰으로 분리\n", + "tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased', do_lower_case=False)\n", + "tokenized_texts = [tokenizer.tokenize(sent) for sent in sentences]\n", + "\n", + "# print (sentences[0])\n", + "# print (tokenized_texts[0])" + ], + "execution_count": 61, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "VJ76KiP_dLn-", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "1443b1a1-fe69-41e6-db60-a64ed69e7b9a" + }, + "source": [ + "# 입력 토큰의 최대 시퀀스 길이\n", + "MAX_LEN = 80\n", + "\n", + "# 토큰을 숫자 인덱스로 변환\n", + "input_ids = [tokenizer.convert_tokens_to_ids(x) for x in tokenized_texts]\n", + "\n", + "# 문장을 MAX_LEN 길이에 맞게 자르고, 모자란 부분을 패딩 0으로 채움\n", + "input_ids = pad_sequences(input_ids, maxlen=MAX_LEN, dtype=\"long\", truncating=\"post\", padding=\"post\")\n", + "\n", + "input_ids[0]" + ], + "execution_count": 62, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([ 101, 66402, 9836, 12692, 12965, 9955, 9137, 18382,\n", + " 77039, 9415, 118884, 11664, 9709, 12692, 12508, 9523,\n", + " 119081, 48345, 9670, 89523, 9654, 73306, 9913, 119228,\n", + " 66402, 58303, 48345, 9765, 38631, 33188, 48345, 102,\n", + " 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0])" + ] + }, + "metadata": {}, + "execution_count": 62 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "pKfL8SotdVaW", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "9725bd47-8bc6-4a1d-92a1-8311e51c3d94" + }, + "source": [ + "# 어텐션 마스크 초기화\n", + "attention_masks = []\n", + "\n", + "# 어텐션 마스크를 패딩이 아니면 1, 패딩이면 0으로 설정\n", + "# 패딩 부분은 BERT 모델에서 어텐션을 수행하지 않아 속도 향상\n", + "for seq in input_ids:\n", + " seq_mask = [float(i>0) for i in seq]\n", + " attention_masks.append(seq_mask)\n", + "\n", + "print(attention_masks[0])" + ], + "execution_count": 63, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "1f5Vq3-7eNKH", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "b8909609-3915-4bb2-d3cb-5cd9cb44bca8" + }, + "source": [ + "# 훈련셋과 검증셋으로 분리\n", + "train_inputs, validation_inputs, train_labels, validation_labels = train_test_split(input_ids,\n", + " labels, \n", + " random_state=2018, \n", + " test_size=0.1)\n", + "\n", + "# 어텐션 마스크를 훈련셋과 검증셋으로 분리\n", + "train_masks, validation_masks, _, _ = train_test_split(attention_masks, \n", + " input_ids,\n", + " random_state=2018, \n", + " test_size=0.1)\n", + "\n", + "# 데이터를 파이토치의 텐서로 변환\n", + "train_inputs = torch.tensor(train_inputs)\n", + "train_labels = torch.tensor(train_labels)\n", + "train_masks = torch.tensor(train_masks)\n", + "validation_inputs = torch.tensor(validation_inputs)\n", + "validation_labels = torch.tensor(validation_labels)\n", + "validation_masks = torch.tensor(validation_masks)\t\t\t\t\n", + "\n", + "print(train_inputs[0])\n", + "print(train_labels[0])\n", + "print(train_masks[0])\n", + "print(validation_inputs[0])\n", + "print(validation_labels[0])\n", + "print(validation_masks[0])" + ], + "execution_count": 64, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor([ 101, 9955, 118627, 12092, 9555, 11664, 8982, 19105, 8924,\n", + " 56710, 12508, 9283, 31401, 118632, 28578, 100, 9546, 118837,\n", + " 14153, 9960, 21711, 14843, 12508, 39420, 31401, 118632, 11903,\n", + " 9407, 118624, 10003, 14863, 12453, 9954, 35465, 12092, 100,\n", + " 9405, 12508, 23811, 24982, 48549, 102, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0])\n", + "tensor(0)\n", + "tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", + " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", + " 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", + " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", + " 0., 0., 0., 0., 0., 0., 0., 0.])\n", + "tensor([ 101, 9954, 118663, 119383, 18622, 119212, 9960, 16323, 24982,\n", + " 48549, 119, 102, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0])\n", + "tensor(1)\n", + "tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.,\n", + " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", + " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", + " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", + " 0., 0., 0., 0., 0., 0., 0., 0.])\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "I3vlyUJuVRo5" + }, + "source": [ + "# 배치 사이즈\n", + "batch_size = 60\n", + "\n", + "# 파이토치의 DataLoader로 입력, 마스크, 라벨을 묶어 데이터 설정\n", + "# 학습시 배치 사이즈 만큼 데이터를 가져옴\n", + "\n", + "train_data = TensorDataset(train_inputs, train_masks, train_labels)\n", + "train_sampler = RandomSampler(train_data)\n", + "train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)\n", + "\n", + "validation_data = TensorDataset(validation_inputs, validation_masks, validation_labels)\n", + "validation_sampler = SequentialSampler(validation_data)\n", + "validation_dataloader = DataLoader(validation_data, sampler=validation_sampler, batch_size=batch_size)" + ], + "execution_count": 65, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zkqUHx51dffp" + }, + "source": [ + "### 2-4. 전처리 - 테스트셋" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "xgrsNuArd4pj" + }, + "source": [ + "# 리뷰 문장 추출\n", + "sentences = test_data['reviews']\n", + "#sentences[:10]" + ], + "execution_count": 66, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "Gtz3QZt9d4pz" + }, + "source": [ + "# BERT의 입력 형식에 맞게 변환\n", + "sentences = [\"[CLS] \" + str(sentence) + \" [SEP]\" for sentence in sentences]\n", + "#sentences[:10]" + ], + "execution_count": 67, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "li8oRajbd4p3", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "a2afeb8f-a9ef-4c9c-8e9a-1e45fd6ef44d" + }, + "source": [ + "# 라벨 추출\n", + "labels = test_data['label'].values\n", + "labels" + ], + "execution_count": 68, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([0, 0, 1, ..., 1, 1, 0])" + ] + }, + "metadata": {}, + "execution_count": 68 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "lvpQ49nEd4p6" + }, + "source": [ + "# BERT의 토크나이저로 문장을 토큰으로 분리\n", + "tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased', do_lower_case=False)\n", + "tokenized_texts = [tokenizer.tokenize(sent) for sent in sentences]\n", + "\n", + "#print (sentences[0])\n", + "#print (tokenized_texts[0])" + ], + "execution_count": 69, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "HI9viuAvd4p_", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "dd0772f6-17f3-43fc-c4ef-6d0941727c02" + }, + "source": [ + "# 입력 토큰의 최대 시퀀스 길이\n", + "MAX_LEN = 80\n", + "\n", + "# 토큰을 숫자 인덱스로 변환\n", + "input_ids = [tokenizer.convert_tokens_to_ids(x) for x in tokenized_texts]\n", + "\n", + "# 문장을 MAX_LEN 길이에 맞게 자르고, 모자란 부분을 패딩 0으로 채움\n", + "input_ids = pad_sequences(input_ids, maxlen=MAX_LEN, dtype=\"long\", truncating=\"post\", padding=\"post\")\n", + "\n", + "input_ids[0]" + ], + "execution_count": 70, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([ 101, 9640, 48446, 66402, 10892, 9405, 11664, 8982,\n", + " 14867, 10003, 14863, 33188, 48345, 119, 9995, 118992,\n", + " 108436, 23969, 28911, 9029, 119266, 77884, 100, 102,\n", + " 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0])" + ] + }, + "metadata": {}, + "execution_count": 70 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "v1NKmP0Fd4qD", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "11c0e1f3-75b0-43a1-e14f-562828d91dc1" + }, + "source": [ + "# 어텐션 마스크 초기화\n", + "attention_masks = []\n", + "\n", + "# 어텐션 마스크를 패딩이 아니면 1, 패딩이면 0으로 설정\n", + "# 패딩 부분은 BERT 모델에서 어텐션을 수행하지 않아 속도 향상\n", + "for seq in input_ids:\n", + " seq_mask = [float(i>0) for i in seq]\n", + " attention_masks.append(seq_mask)\n", + "\n", + "print(attention_masks[0])" + ], + "execution_count": 71, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "RIkaYCGbd4qG", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "c7c0e706-2f80-47f6-9e61-162a56f5c212" + }, + "source": [ + "# 데이터를 파이토치의 텐서로 변환\n", + "test_inputs = torch.tensor(input_ids)\n", + "test_labels = torch.tensor(labels)\n", + "test_masks = torch.tensor(attention_masks)\n", + "\n", + "\n", + "print(test_inputs[0])\n", + "print(test_labels[0])\n", + "print(test_masks[0])" + ], + "execution_count": 72, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor([ 101, 9640, 48446, 66402, 10892, 9405, 11664, 8982, 14867,\n", + " 10003, 14863, 33188, 48345, 119, 9995, 118992, 108436, 23969,\n", + " 28911, 9029, 119266, 77884, 100, 102, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0])\n", + "tensor(0)\n", + "tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", + " 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", + " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", + " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", + " 0., 0., 0., 0., 0., 0., 0., 0.])\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "7gwdYv1Ad4qK" + }, + "source": [ + "# 배치 사이즈\n", + "batch_size = 60\n", + "\n", + "# 파이토치의 DataLoader로 입력, 마스크, 라벨을 묶어 데이터 설정\n", + "# 학습시 배치 사이즈 만큼 데이터를 가져옴\n", + "test_data = TensorDataset(test_inputs, test_masks, test_labels)\n", + "test_sampler = RandomSampler(test_data)\n", + "test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=batch_size)" + ], + "execution_count": 73, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FBvpU-Hfgcth" + }, + "source": [ + "### 2-5. 모델 생성" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "heToD1ev0mOg", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "c9c01380-2b5b-4df4-9efb-231aa4c3e4b0" + }, + "source": [ + "# GPU 디바이스 이름 구함\n", + "device_name = tf.test.gpu_device_name()\n", + "\n", + "# GPU 디바이스 이름 검사\n", + "if device_name == '/device:GPU:0':\n", + " print('Found GPU at: {}'.format(device_name))\n", + "else:\n", + " raise SystemError('GPU device not found')" + ], + "execution_count": 74, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Found GPU at: /device:GPU:0\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "f6enIxvt1FB2", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "8a420062-53a0-4000-d4fd-b4efdfe99351" + }, + "source": [ + "# 디바이스 설정\n", + "if torch.cuda.is_available(): \n", + " device = torch.device(\"cuda\")\n", + " print('There are %d GPU(s) available.' % torch.cuda.device_count())\n", + " print('We will use the GPU:', torch.cuda.get_device_name(0))\n", + "else:\n", + " device = torch.device(\"cpu\")\n", + " print('No GPU available, using the CPU instead.')" + ], + "execution_count": 75, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "There are 1 GPU(s) available.\n", + "We will use the GPU: Tesla K80\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "MS2MXSiLg5zC", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "ccf3fb70-9760-4df8-e732-d6452553eea8" + }, + "source": [ + "# 분류를 위한 BERT 모델 생성\n", + "model = BertForSequenceClassification.from_pretrained(\"bert-base-multilingual-cased\", num_labels=2)\n", + "model.cuda()" + ], + "execution_count": 76, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias']\n", + "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "BertForSequenceClassification(\n", + " (bert): BertModel(\n", + " (embeddings): BertEmbeddings(\n", + " (word_embeddings): Embedding(119547, 768, padding_idx=0)\n", + " (position_embeddings): Embedding(512, 768)\n", + " (token_type_embeddings): Embedding(2, 768)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (encoder): BertEncoder(\n", + " (layer): ModuleList(\n", + " (0): BertLayer(\n", + " (attention): BertAttention(\n", + " (self): BertSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): BertSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): BertIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " )\n", + " (output): BertOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (1): BertLayer(\n", + " (attention): BertAttention(\n", + " (self): BertSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): BertSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): BertIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " )\n", + " (output): BertOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (2): BertLayer(\n", + " (attention): BertAttention(\n", + " (self): BertSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): BertSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): BertIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " )\n", + " (output): BertOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (3): BertLayer(\n", + " (attention): BertAttention(\n", + " (self): BertSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): BertSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): BertIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " )\n", + " (output): BertOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (4): BertLayer(\n", + " (attention): BertAttention(\n", + " (self): BertSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): BertSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): BertIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " )\n", + " (output): BertOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (5): BertLayer(\n", + " (attention): BertAttention(\n", + " (self): BertSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): BertSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): BertIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " )\n", + " (output): BertOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (6): BertLayer(\n", + " (attention): BertAttention(\n", + " (self): BertSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): BertSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): BertIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " )\n", + " (output): BertOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (7): BertLayer(\n", + " (attention): BertAttention(\n", + " (self): BertSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): BertSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): BertIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " )\n", + " (output): BertOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (8): BertLayer(\n", + " (attention): BertAttention(\n", + " (self): BertSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): BertSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): BertIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " )\n", + " (output): BertOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (9): BertLayer(\n", + " (attention): BertAttention(\n", + " (self): BertSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): BertSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): BertIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " )\n", + " (output): BertOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (10): BertLayer(\n", + " (attention): BertAttention(\n", + " (self): BertSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): BertSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): BertIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " )\n", + " (output): BertOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (11): BertLayer(\n", + " (attention): BertAttention(\n", + " (self): BertSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): BertSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): BertIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " )\n", + " (output): BertOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " )\n", + " (pooler): BertPooler(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (activation): Tanh()\n", + " )\n", + " )\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (classifier): Linear(in_features=768, out_features=2, bias=True)\n", + ")" + ] + }, + "metadata": {}, + "execution_count": 76 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### 2-6. Prediction" + ], + "metadata": { + "id": "ILwVMwib29g5" + } + }, + { + "cell_type": "code", + "metadata": { + "id": "ZIdfbLTuWmxk" + }, + "source": [ + "# 옵티마이저 설정\n", + "optimizer = AdamW(model.parameters(),\n", + " lr = 2e-5, # 학습률\n", + " eps = 1e-8 # 0으로 나누는 것을 방지하기 위한 epsilon 값\n", + " )\n", + "\n", + "# 에폭수\n", + "epochs = 1\n", + "\n", + "# 총 훈련 스텝 : 배치반복 횟수 * 에폭\n", + "total_steps = len(train_dataloader) * epochs\n", + "\n", + "# 처음에 학습률을 조금씩 변화시키는 스케줄러 생성\n", + "scheduler = get_linear_schedule_with_warmup(optimizer, \n", + " num_warmup_steps = 0,\n", + " num_training_steps = total_steps)" + ], + "execution_count": 77, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gzCHV_ghj7DM" + }, + "source": [ + "### 2-7. 모델 학습" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "S0-p6pPVXCRe" + }, + "source": [ + "# 정확도 계산 함수\n", + "def flat_accuracy(preds, labels):\n", + " \n", + " pred_flat = np.argmax(preds, axis=1).flatten()\n", + " labels_flat = labels.flatten()\n", + "\n", + " return np.sum(pred_flat == labels_flat) / len(labels_flat)\n", + "\n", + "def precision(preds, labels):\n", + " # 실제 맞은거 / 내가 맞다고 한거\n", + " pred_flat = np.argmax(preds, axis=1).flatten() # 예측한 결과물 리스트\n", + " labels_flat = labels.flatten() # 정답 리스트\n", + " a=np.sum(pred_flat==True) + np.sum(labels_flat==True) - np.sum(pred_flat!=labels_flat)\n", + " b=np.sum(pred_flat==True) + np.sum(labels_flat==False) - np.sum(pred_flat==labels_flat)\n", + " return a/(a+b)\n", + "\n", + "def recall(preds,lables):\n", + " # 실제 맞은거/ 실제 맞은거 + 아니라고 했는데 맞은거 \n", + " pred_flat = np.argmax(preds, axis=1).flatten()\n", + " labels_flat = labels.flatten()\n", + " a= np.sum(pred_flat==True) + np.sum(labels_flat==True) - np.sum(pred_flat!=labels_flat)\n", + " b= np.sum(pred_flat==False) + np.sum(labels_flat ==True) - np.sum(pred_flat==labels_flat)\n", + " return a/(a+b)\n", + "\n", + "\n" + ], + "execution_count": 78, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "FJXISnJzCdLM" + }, + "source": [ + "# 시간 표시 함수\n", + "def format_time(elapsed):\n", + "\n", + " # 반올림\n", + " elapsed_rounded = int(round((elapsed)))\n", + " \n", + " # hh:mm:ss으로 형태 변경\n", + " return str(datetime.timedelta(seconds=elapsed_rounded))" + ], + "execution_count": 79, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "muU2kS2GCh4y", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "132d4da4-0ebe-431d-c7a6-4b66915234a6" + }, + "source": [ + "# 재현을 위해 랜덤시드 고정\n", + "seed_val = 42\n", + "random.seed(seed_val)\n", + "np.random.seed(seed_val)\n", + "torch.manual_seed(seed_val)\n", + "torch.cuda.manual_seed_all(seed_val)\n", + "\n", + "# 그래디언트 초기화\n", + "model.zero_grad()\n", + "\n", + "# 에폭만큼 반복\n", + "for epoch_i in range(0, epochs):\n", + " \n", + " # ========================================\n", + " # Training\n", + " # ========================================\n", + " \n", + " print(\"\")\n", + " print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))\n", + " print('Training...')\n", + "\n", + " # 시작 시간 설정\n", + " t0 = time.time()\n", + "\n", + " # 로스 초기화\n", + " total_loss = 0\n", + "\n", + " # 훈련모드로 변경\n", + " model.train()\n", + " \n", + " # 데이터로더에서 배치만큼 반복하여 가져옴\n", + " for step, batch in enumerate(train_dataloader):\n", + " # 경과 정보 표시\n", + " if step % 500 == 0 and not step == 0:\n", + " elapsed = format_time(time.time() - t0)\n", + " print(' Batch {:>5,} of {:>5,}. Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))\n", + "\n", + " # 배치를 GPU에 넣음\n", + " batch = tuple(t.to(device) for t in batch)\n", + " \n", + " # 배치에서 데이터 추출\n", + " b_input_ids, b_input_mask, b_labels = batch\n", + "\n", + " # Forward 수행 \n", + " outputs = model(b_input_ids, \n", + " token_type_ids=None, \n", + " attention_mask=b_input_mask, \n", + " labels=b_labels)\n", + " \n", + " # 로스 구함\n", + " loss = outputs[0]\n", + "\n", + " # 총 로스 계산\n", + " total_loss += loss.item()\n", + "\n", + " # Backward 수행으로 그래디언트 계산\n", + " loss.backward()\n", + "\n", + " # 그래디언트 클리핑\n", + " torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)\n", + "\n", + " # 그래디언트를 통해 가중치 파라미터 업데이트\n", + " optimizer.step()\n", + "\n", + " # 스케줄러로 학습률 감소\n", + " scheduler.step()\n", + "\n", + " # 그래디언트 초기화\n", + " model.zero_grad()\n", + "\n", + " # 평균 로스 계산\n", + " avg_train_loss = total_loss / len(train_dataloader) \n", + "\n", + " print(\"\")\n", + " print(\" Average training loss: {0:.2f}\".format(avg_train_loss))\n", + " \n", + " \n", + " print(\" Training epcoh took: {:}\".format(format_time(time.time() - t0)))\n", + " \n", + " # ========================================\n", + " # Validation\n", + " # ========================================\n", + "\n", + " print(\"\")\n", + " print(\"Running Validation...\")\n", + "\n", + " #시작 시간 설정\n", + " t0 = time.time()\n", + "\n", + " # 평가모드로 변경\n", + " model.eval()\n", + "\n", + " # 변수 초기화\n", + " eval_loss, eval_accuracy = 0, 0\n", + " eval_precision, eval_recall = 0, 0\n", + " nb_eval_steps, nb_eval_examples = 0, 0\n", + "\n", + " # 데이터로더에서 배치만큼 반복하여 가져옴\n", + " for batch in validation_dataloader:\n", + " # 배치를 GPU에 넣음\n", + " batch = tuple(t.to(device) for t in batch)\n", + " \n", + " # 배치에서 데이터 추출\n", + " b_input_ids, b_input_mask, b_labels = batch\n", + " \n", + " # 그래디언트 계산 안함\n", + " with torch.no_grad(): \n", + " # Forward 수행\n", + " outputs = model(b_input_ids, \n", + " token_type_ids=None, \n", + " attention_mask=b_input_mask)\n", + " \n", + " # 로스 구함\n", + " logits = outputs[0]\n", + "\n", + " # CPU로 데이터 이동\n", + " logits = logits.detach().cpu().numpy()\n", + " label_ids = b_labels.to('cpu').numpy()\n", + " \n", + " # 출력 로짓과 라벨을 비교하여 정확도 계산\n", + " tmp_eval_accuracy = flat_accuracy(logits, label_ids)\n", + " eval_accuracy += tmp_eval_accuracy\n", + " temp_eval_precision = precision(logits,label_ids)\n", + " eval_precision += temp_eval_precision\n", + " temp_eval_recall = recall(logits,label_ids)\n", + " eval_recall += temp_eval_recall\n", + " nb_eval_steps += 1\n", + "\n", + " print(\" Accuracy: {0:.5f}\".format(eval_accuracy/nb_eval_steps))\n", + " print(\" Precision: {0:.5f}\".format(eval_precision/nb_eval_steps))\n", + " print(\" Recall: {0:.5f}\".format(eval_recall/nb_eval_steps))\n", + " print(\" Validation took: {:}\".format(format_time(time.time() - t0)))\n", + "\n", + "print(\"\")\n", + "print(\"Training complete!\")" + ], + "execution_count": 80, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "======== Epoch 1 / 1 ========\n", + "Training...\n", + " Batch 500 of 1,124. Elapsed: 0:13:09.\n", + " Batch 1,000 of 1,124. Elapsed: 0:26:18.\n", + "\n", + " Average training loss: 0.52\n", + " Training epcoh took: 0:29:34\n", + "\n", + "Running Validation...\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:21: DeprecationWarning: elementwise comparison failed; this will raise an error in the future.\n", + "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:22: DeprecationWarning: elementwise comparison failed; this will raise an error in the future.\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Accuracy: 0.77688\n", + " Precision: 0.78540\n", + " Recall: 0.49996\n", + " Validation took: 0:01:13\n", + "\n", + "Training complete!\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hxlXEBA0WefL" + }, + "source": [ + "에폭마다 훈련셋과 검증셋을 반복하여 학습을 수행합니다. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6BVbl4Zjatzn" + }, + "source": [ + "### 2-8. 테스트셋 평가" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "c5KHb6RkbHdj", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "ffe3784b-c180-4e1a-c964-6a9bd66a45e5" + }, + "source": [ + "#시작 시간 설정\n", + "t0 = time.time()\n", + "\n", + "# 평가모드로 변경\n", + "model.eval()\n", + "\n", + "# 변수 초기화\n", + "eval_loss, eval_accuracy = 0, 0\n", + "eval_precision, eval_recall = 0, 0\n", + "nb_eval_steps, nb_eval_examples = 0, 0\n", + "\n", + "# 데이터로더에서 배치만큼 반복하여 가져옴\n", + "for step, batch in enumerate(test_dataloader):\n", + " # 경과 정보 표시\n", + " if step % 100 == 0 and not step == 0:\n", + " elapsed = format_time(time.time() - t0)\n", + " print(' Batch {:>5,} of {:>5,}. Elapsed: {:}.'.format(step, len(test_dataloader), elapsed))\n", + "\n", + " # 배치를 GPU에 넣음\n", + " batch = tuple(t.to(device) for t in batch)\n", + " \n", + " # 배치에서 데이터 추출\n", + " b_input_ids, b_input_mask, b_labels = batch\n", + " \n", + " # 그래디언트 계산 안함\n", + " with torch.no_grad(): \n", + " # Forward 수행\n", + " outputs = model(b_input_ids, \n", + " token_type_ids=None, \n", + " attention_mask=b_input_mask)\n", + " \n", + " # 로스 구함\n", + " logits = outputs[0]\n", + "\n", + " # CPU로 데이터 이동\n", + " logits = logits.detach().cpu().numpy()\n", + " label_ids = b_labels.to('cpu').numpy()\n", + " \n", + " # 출력 로짓과 라벨을 비교하여 정확도 계산\n", + " tmp_eval_accuracy = flat_accuracy(logits, label_ids)\n", + " eval_accuracy += tmp_eval_accuracy\n", + " temp_eval_precision = precision(logits,label_ids)\n", + " eval_precision += temp_eval_precision\n", + " temp_eval_recall = recall(logits,label_ids)\n", + " eval_recall += temp_eval_recall\n", + " nb_eval_steps += 1\n", + "\n", + "print(\"\")\n", + "print(\"Accuracy: {0:.5f}\".format(eval_accuracy/nb_eval_steps))\n", + "print(\" Precision: {0:.5f}\".format(eval_precision/nb_eval_steps))\n", + "print(\" Recall: {0:.5f}\".format(eval_recall/nb_eval_steps))\n", + "\n", + "print(\"Test took: {:}\".format(format_time(time.time() - t0)))" + ], + "execution_count": 81, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:21: DeprecationWarning: elementwise comparison failed; this will raise an error in the future.\n", + "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:22: DeprecationWarning: elementwise comparison failed; this will raise an error in the future.\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Batch 100 of 417. Elapsed: 0:00:59.\n", + " Batch 200 of 417. Elapsed: 0:01:57.\n", + " Batch 300 of 417. Elapsed: 0:02:56.\n", + " Batch 400 of 417. Elapsed: 0:03:55.\n", + "\n", + "Accuracy: 0.77245\n", + " Precision: 0.77396\n", + " Recall: 0.49996\n", + "Test took: 0:04:04\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "U7SzL1IBe1Dm" + }, + "source": [ + "### **2-9. 새로운 문장 테스트**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Tb4v_VfEfGQB" + }, + "source": [ + "# 입력 데이터 변환\n", + "def convert_input_data(sentences):\n", + "\n", + " # BERT의 토크나이저로 문장을 토큰으로 분리\n", + " tokenized_texts = [tokenizer.tokenize(sent) for sent in sentences]\n", + "\n", + " # 입력 토큰의 최대 시퀀스 길이\n", + " MAX_LEN = 128\n", + "\n", + " # 토큰을 숫자 인덱스로 변환\n", + " input_ids = [tokenizer.convert_tokens_to_ids(x) for x in tokenized_texts]\n", + " \n", + " # 문장을 MAX_LEN 길이에 맞게 자르고, 모자란 부분을 패딩 0으로 채움\n", + " input_ids = pad_sequences(input_ids, maxlen=MAX_LEN, dtype=\"long\", truncating=\"post\", padding=\"post\")\n", + "\n", + " # 어텐션 마스크 초기화\n", + " attention_masks = []\n", + "\n", + " # 어텐션 마스크를 패딩이 아니면 1, 패딩이면 0으로 설정\n", + " # 패딩 부분은 BERT 모델에서 어텐션을 수행하지 않아 속도 향상\n", + " for seq in input_ids:\n", + " seq_mask = [float(i>0) for i in seq]\n", + " attention_masks.append(seq_mask)\n", + "\n", + " # 데이터를 파이토치의 텐서로 변환\n", + " inputs = torch.tensor(input_ids)\n", + " masks = torch.tensor(attention_masks)\n", + "\n", + " return inputs, masks\n", + "\n", + "\n", + "# 문장 테스트\n", + "def test_sentences(sentences):\n", + "\n", + " # 평가모드로 변경\n", + " model.eval()\n", + "\n", + " # 문장을 입력 데이터로 변환\n", + " inputs, masks = convert_input_data(sentences)\n", + "\n", + " # 데이터를 GPU에 넣음\n", + " b_input_ids = inputs.to(device)\n", + " b_input_mask = masks.to(device)\n", + " \n", + " # 그래디언트 계산 안함\n", + " with torch.no_grad(): \n", + " # Forward 수행\n", + " outputs = model(b_input_ids, \n", + " token_type_ids=None, \n", + " attention_mask=b_input_mask)\n", + "\n", + " # 로스 구함\n", + " logits = outputs[0]\n", + "\n", + " # CPU로 데이터 이동\n", + " logits = logits.detach().cpu().numpy()\n", + "\n", + " return logits" + ], + "execution_count": 82, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "ZQezr0tljJlM" + }, + "source": [ + "import openpyxl\n", + "\n", + "def labeling(inputName):\n", + " sentences = pd.read_excel(inputName+'_data.xlsx', header = None)\n", + " workbook = openpyxl.load_workbook(inputName+'_data.xlsx')\n", + " sheet = workbook['Sheet1']\n", + "\n", + " result_label = []\n", + " for i in range(len(sentences)):\n", + " logits = test_sentences([sheet.cell(column=1, row=i+1).value])\n", + " result_label.append(np.argmax(logits))\n", + " #print(np.argmax(logits))\n", + " print(result_label)\n", + " df = pandas.DataFrame(result_label)\n", + " df.to_excel(inputName+'_label.xlsx', header=False, index=None)\n", + " return result_label" + ], + "execution_count": 83, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "-9MQ0SK0jofN" + }, + "source": [ + "# logits = test_sentences(['인가니 가장 큰 공포를 느낀다는 시빌미터에서 뛰어내리는 모형탑훈련 제가 직접 한번 해보겠습니다 이야야야야야야야야 쯔아크아아아아아아아가아가아아갸갸'])\n", + "# print(logits)\n", + "# print(np.argmax(logits))" + ], + "execution_count": 84, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "result_label = labeling(str(i))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "zp_p-rKb3ZQW", + "outputId": "756b2fcf-9a1d-41b5-eb5a-574d5aa821df" + }, + "execution_count": 85, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# 3. ANALYTICAL PROCESSING" + ], + "metadata": { + "id": "qpnB8P64Hxij" + } + }, + { + "cell_type": "code", + "source": [ + "def prediction(count_like, result_label): \n", + " count_like_label = result_label.count(1)\n", + " count_dislike_label = result_label.count(0)\n", + " print(\"count_like_label: \", count_like_label)\n", + " print(\"count_dislike_label: \", count_dislike_label)\n", + " print(\"댓글 좋싫비: \", count_like_label / (count_like_label + count_dislike_label))\n", + "\n", + " count_like = int(count_like)\n", + " print(\"count_like: \", count_like)\n", + " count_estimated_dislike = count_like * count_dislike_label / count_like_label / 17.7\n", + " print(\"count_estimated_dislike: \", int(count_estimated_dislike))" + ], + "metadata": { + "id": "0n8frvJuH3Q2" + }, + "execution_count": 107, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "prediction(count_like, result_label)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "uPQqLf9I3Jo2", + "outputId": "94da27ea-7a2c-4a10-ad77-d6952b39e799" + }, + "execution_count": 87, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "count_like_label: 160\n", + "count_dislike_label: 50\n", + "count_like: 19846\n", + "count_estimated_dislike: 6201\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# 4. 통합 실행" + ], + "metadata": { + "id": "c4mXG2_hU_qJ" + } + }, + { + "cell_type": "code", + "source": [ + "videoIds = [\"gSIQM6cipNs\", \"umG9aF-1BmM\", \"O7oVpVfpBvE\", \"gzIkbqA7p9k\", \"TRvUe_SRJ6g\", \"elGcWzBx0fQ\", \"Zq-taY1u7fw\", \"XkcOfZ83wrk\", \"R6OyrTHPrLY\", \"iGd7f1-ONSM\", \"Lcm0xqBgfRM\"]\n", + "for i in range(len(videoIds)):\n", + " count_like = collection(videoIds[i], str(i))\n", + " result_label = labeling(str(i))\n", + " prediction(count_like, result_label)\n", + "\n", + "# 실제 싫어요:댓글 싫어요= 17.70:1 정도" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cTzf-NxHT78b", + "outputId": "af02f41c-fd35-484f-b21b-39f4cd77718f" + }, + "execution_count": 108, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "==========\n", + "title: 펄어비스 도깨비 - 첫 공개 게임플레이\n", + "view: 1649177\n", + "like: 19846 / dislike: 444\n", + "1000\n", + "실제 좋싫비: 0.9781172991621488\n", + "[0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]\n", + "count_like_label: 506\n", + "count_dislike_label: 494\n", + "댓글 좋싫비: 0.506\n", + "count_like: 19846\n", + "count_estimated_dislike: 1094\n", + "\n", + "==========\n", + "title: [배드워즈] 럭키블럭판 돌잡이 챌린지! / 로블록스\n", + "view: 142061\n", + "like: 1868 / dislike: 68\n", + "438\n", + "실제 좋싫비: 0.9648760330578512\n", + "[1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1]\n", + "count_like_label: 269\n", + "count_dislike_label: 169\n", + "댓글 좋싫비: 0.6141552511415526\n", + "count_like: 1868\n", + "count_estimated_dislike: 66\n", + "\n", + "==========\n", + "title: 독가스 방구가 몰려온다! 마을을 지켜라! -캐릭온 마크 애니\n", + "view: 143422\n", + "like: 2220 / dislike: 69\n", + "215\n", + "실제 좋싫비: 0.9698558322411533\n", + "[1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1]\n", + "count_like_label: 145\n", + "count_dislike_label: 70\n", + "댓글 좋싫비: 0.6744186046511628\n", + "count_like: 2220\n", + "count_estimated_dislike: 60\n", + "\n", + "==========\n", + "title: [롤]응답하라 2018 (feat.PTSD)\n", + "view: 287080\n", + "like: 4214 / dislike: 140\n", + "735\n", + "실제 좋싫비: 0.9678456591639871\n", + "[0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0]\n", + "count_like_label: 391\n", + "count_dislike_label: 344\n", + "댓글 좋싫비: 0.5319727891156463\n", + "count_like: 4214\n", + "count_estimated_dislike: 209\n", + "\n", + "==========\n", + "title: 킹피스 요루와 견문색을 얻고 \"1200렙대 짱먹은\" 허기워기!!! 이것이 요루의 주인이다!!(킹피스 9화)\n", + "view: 132584\n", + "like: 4765 / dislike: 137\n", + "573\n", + "실제 좋싫비: 0.9720522235822113\n", + "[0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1]\n", + "count_like_label: 419\n", + "count_dislike_label: 154\n", + "댓글 좋싫비: 0.731239092495637\n", + "count_like: 4765\n", + "count_estimated_dislike: 98\n", + "\n", + "==========\n", + "title: 나무와 나무꾼\n", + "view: 633098\n", + "like: 6154 / dislike: 266\n", + "388\n", + "실제 좋싫비: 0.9585669781931464\n", + "[1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1]\n", + "count_like_label: 231\n", + "count_dislike_label: 157\n", + "댓글 좋싫비: 0.595360824742268\n", + "count_like: 6154\n", + "count_estimated_dislike: 236\n", + "\n", + "==========\n", + "title: 감스트 돌아온 김인직 감독되다! 1편 맨유감독 그리고 호날두 피파22\n", + "view: 697897\n", + "like: 5670 / dislike: 320\n", + "976\n", + "실제 좋싫비: 0.9465776293823038\n", + "[1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0]\n", + "count_like_label: 596\n", + "count_dislike_label: 380\n", + "댓글 좋싫비: 0.610655737704918\n", + "count_like: 5670\n", + "count_estimated_dislike: 204\n", + "\n", + "==========\n", + "title: [태번 마스터] 선술집 사장님이 되어 왕실 최고의 여관 만들기🍺 (※시간순삭) (Tavern Master)\n", + "view: 294484\n", + "like: 2225 / dislike: 99\n", + "339\n", + "실제 좋싫비: 0.9574010327022375\n", + "[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0]\n", + "count_like_label: 230\n", + "count_dislike_label: 109\n", + "댓글 좋싫비: 0.6784660766961652\n", + "count_like: 2225\n", + "count_estimated_dislike: 59\n", + "\n", + "==========\n", + "title: 끝장전 끝판왕들간의 숨막히는 한판 승부 ! 박상현 Soma vs 이재호 Light 스타 끝장전 sc1-120\n", + "view: 83055\n", + "like: 1072 / dislike: 51\n", + "96\n", + "실제 좋싫비: 0.9545859305431879\n", + "[0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0]\n", + "count_like_label: 51\n", + "count_dislike_label: 45\n", + "댓글 좋싫비: 0.53125\n", + "count_like: 1072\n", + "count_estimated_dislike: 53\n", + "\n", + "==========\n", + "title: 제작자도 모르는 루트를 찾아낸 고인물들 진짜 미쳤네ㄷㄷ\n", + "view: 632688\n", + "like: 7489 / dislike: 223\n", + "319\n", + "실제 좋싫비: 0.9710840248962656\n", + "[1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0]\n", + "count_like_label: 196\n", + "count_dislike_label: 123\n", + "댓글 좋싫비: 0.6144200626959248\n", + "count_like: 7489\n", + "count_estimated_dislike: 265\n", + "\n", + "==========\n", + "title: 25년 스토리를 일단락.. 드디어 밝혀진 검은조직의 정체!! (코난 스토리 #3)\n", + "view: 1076455\n", + "like: 13685 / dislike: 265\n", + "1000\n", + "실제 좋싫비: 0.9810035842293907\n", + "[1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1]\n", + "count_like_label: 723\n", + "count_dislike_label: 277\n", + "댓글 좋싫비: 0.723\n", + "count_like: 13685\n", + "count_estimated_dislike: 296\n" + ] + } + ] + } + ] +} \ No newline at end of file