From 484b0ad826ab3573aa1291a60040b11326c27d8d Mon Sep 17 00:00:00 2001
From: KimChiheon <kch21026@naver.com>
Date: Thu, 14 Jun 2018 11:31:20 +0900
Subject: [PATCH] Update korea_nlp_kkma.py

---
 korea_nlp/korea_nlp_kkma.py | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/korea_nlp/korea_nlp_kkma.py b/korea_nlp/korea_nlp_kkma.py
index 61580b6..4da9a16 100644
--- a/korea_nlp/korea_nlp_kkma.py
+++ b/korea_nlp/korea_nlp_kkma.py
@@ -35,4 +35,32 @@ class Tokenizer:
         diff_noun_list = list(set(noun_list) - set(compared_noun_list))
         diff_noun_list = list(set(diff_noun_list) - set(score_dic.keys()))
         noun_list = list(set(noun_list) - set(diff_noun_list))
-        return noun_listd
\ No newline at end of file
+        return noun_list
+            
+    def noun_extract_dup(self, sentence, score_dic):
+        scores = score_dic
+        tokenizer = MaxScoreTokenizer(scores=scores)
+        token = tokenizer.tokenize(sentence)
+        noun_list = []
+        compared_noun_list = self.t.nouns(sentence)
+        
+        for num, input in enumerate(token):
+            if (token[num] in scores) == True:
+                noun_list.append(token[num])
+            elif (token[num] in scores) == False:
+                twit_token = self.t.nouns(token[num])
+                noun_list= noun_list + twit_token
+        
+        diff_noun_list = list(set(noun_list) - set(compared_noun_list))
+        diff_noun_list = list(set(diff_noun_list) - set(score_dic.keys()))
+        noun_list = list(set(noun_list) - set(diff_noun_list))
+        return noun_list
+    
+    def noun_counter(self, sentence, score_dic, word):
+        noun_list = self.noun_extract(sentence,score_dic)
+        number = 0
+        for num, input in enumerate(noun_list):
+            if input == word:
+                number = number + 1
+        
+        return number
\ No newline at end of file
-- 
GitLab