Skip to content
Snippets Groups Projects
Commit 484b0ad8 authored by KimChiheon's avatar KimChiheon
Browse files

Update korea_nlp_kkma.py

parent eb2c2fac
Branches
No related tags found
No related merge requests found
...@@ -35,4 +35,32 @@ class Tokenizer: ...@@ -35,4 +35,32 @@ class Tokenizer:
diff_noun_list = list(set(noun_list) - set(compared_noun_list)) diff_noun_list = list(set(noun_list) - set(compared_noun_list))
diff_noun_list = list(set(diff_noun_list) - set(score_dic.keys())) diff_noun_list = list(set(diff_noun_list) - set(score_dic.keys()))
noun_list = list(set(noun_list) - set(diff_noun_list)) noun_list = list(set(noun_list) - set(diff_noun_list))
return noun_listd return noun_list
\ No newline at end of file
def noun_extract_dup(self, sentence, score_dic):
scores = score_dic
tokenizer = MaxScoreTokenizer(scores=scores)
token = tokenizer.tokenize(sentence)
noun_list = []
compared_noun_list = self.t.nouns(sentence)
for num, input in enumerate(token):
if (token[num] in scores) == True:
noun_list.append(token[num])
elif (token[num] in scores) == False:
twit_token = self.t.nouns(token[num])
noun_list= noun_list + twit_token
diff_noun_list = list(set(noun_list) - set(compared_noun_list))
diff_noun_list = list(set(diff_noun_list) - set(score_dic.keys()))
noun_list = list(set(noun_list) - set(diff_noun_list))
return noun_list
def noun_counter(self, sentence, score_dic, word):
noun_list = self.noun_extract(sentence,score_dic)
number = 0
for num, input in enumerate(noun_list):
if input == word:
number = number + 1
return number
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment