Update korea_nlp_kkma.py

484b0ad8 · KimChiheon · eb2c2fac · 484b0ad8
Commit 484b0ad8 authored 7 years ago by KimChiheon
--- a/korea_nlp/korea_nlp_kkma.py
+++ b/korea_nlp/korea_nlp_kkma.py
@@ -35,4 +35,32 @@ class Tokenizer:
        diff_noun_list = list(set(noun_list) - set(compared_noun_list))
        diff_noun_list = list(set(diff_noun_list) - set(score_dic.keys()))
        noun_list = list(set(noun_list) - set(diff_noun_list))
-        return noun_listd
+        return noun_list
\ No newline at end of file
+    def noun_extract_dup(self, sentence, score_dic):
+        scores = score_dic
+        tokenizer = MaxScoreTokenizer(scores=scores)
+        token = tokenizer.tokenize(sentence)
+        noun_list = []
+        compared_noun_list = self.t.nouns(sentence)
+        for num, input in enumerate(token):
+            if (token[num] in scores) == True:
+                noun_list.append(token[num])
+            elif (token[num] in scores) == False:
+                twit_token = self.t.nouns(token[num])
+                noun_list= noun_list + twit_token
+        diff_noun_list = list(set(noun_list) - set(compared_noun_list))
+        diff_noun_list = list(set(diff_noun_list) - set(score_dic.keys()))
+        noun_list = list(set(noun_list) - set(diff_noun_list))
+        return noun_list
+    def noun_counter(self, sentence, score_dic, word):
+        noun_list = self.noun_extract(sentence,score_dic)
+        number = 0
+        for num, input in enumerate(noun_list):
+            if input == word:
+                number = number + 1
+        return number
\ No newline at end of file