From 484b0ad826ab3573aa1291a60040b11326c27d8d Mon Sep 17 00:00:00 2001 From: KimChiheon <kch21026@naver.com> Date: Thu, 14 Jun 2018 11:31:20 +0900 Subject: [PATCH] Update korea_nlp_kkma.py --- korea_nlp/korea_nlp_kkma.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/korea_nlp/korea_nlp_kkma.py b/korea_nlp/korea_nlp_kkma.py index 61580b6..4da9a16 100644 --- a/korea_nlp/korea_nlp_kkma.py +++ b/korea_nlp/korea_nlp_kkma.py @@ -35,4 +35,32 @@ class Tokenizer: diff_noun_list = list(set(noun_list) - set(compared_noun_list)) diff_noun_list = list(set(diff_noun_list) - set(score_dic.keys())) noun_list = list(set(noun_list) - set(diff_noun_list)) - return noun_listd \ No newline at end of file + return noun_list + + def noun_extract_dup(self, sentence, score_dic): + scores = score_dic + tokenizer = MaxScoreTokenizer(scores=scores) + token = tokenizer.tokenize(sentence) + noun_list = [] + compared_noun_list = self.t.nouns(sentence) + + for num, input in enumerate(token): + if (token[num] in scores) == True: + noun_list.append(token[num]) + elif (token[num] in scores) == False: + twit_token = self.t.nouns(token[num]) + noun_list= noun_list + twit_token + + diff_noun_list = list(set(noun_list) - set(compared_noun_list)) + diff_noun_list = list(set(diff_noun_list) - set(score_dic.keys())) + noun_list = list(set(noun_list) - set(diff_noun_list)) + return noun_list + + def noun_counter(self, sentence, score_dic, word): + noun_list = self.noun_extract(sentence,score_dic) + number = 0 + for num, input in enumerate(noun_list): + if input == word: + number = number + 1 + + return number \ No newline at end of file -- GitLab