diff --git a/korea_nlp/korea_nlp_kkma.py b/korea_nlp/korea_nlp_kkma.py index 61580b611ef496beba54df631dcb2e2863905e4a..4da9a1683e8eae9b9b0c9dda234b2c172d2ac61a 100644 --- a/korea_nlp/korea_nlp_kkma.py +++ b/korea_nlp/korea_nlp_kkma.py @@ -35,4 +35,32 @@ class Tokenizer: diff_noun_list = list(set(noun_list) - set(compared_noun_list)) diff_noun_list = list(set(diff_noun_list) - set(score_dic.keys())) noun_list = list(set(noun_list) - set(diff_noun_list)) - return noun_listd \ No newline at end of file + return noun_list + + def noun_extract_dup(self, sentence, score_dic): + scores = score_dic + tokenizer = MaxScoreTokenizer(scores=scores) + token = tokenizer.tokenize(sentence) + noun_list = [] + compared_noun_list = self.t.nouns(sentence) + + for num, input in enumerate(token): + if (token[num] in scores) == True: + noun_list.append(token[num]) + elif (token[num] in scores) == False: + twit_token = self.t.nouns(token[num]) + noun_list= noun_list + twit_token + + diff_noun_list = list(set(noun_list) - set(compared_noun_list)) + diff_noun_list = list(set(diff_noun_list) - set(score_dic.keys())) + noun_list = list(set(noun_list) - set(diff_noun_list)) + return noun_list + + def noun_counter(self, sentence, score_dic, word): + noun_list = self.noun_extract(sentence,score_dic) + number = 0 + for num, input in enumerate(noun_list): + if input == word: + number = number + 1 + + return number \ No newline at end of file