From 3f53afe7996fb7298130c0577fe20c8f0724d4a4 Mon Sep 17 00:00:00 2001
From: Seok Won <alfex4936@gmail.com>
Date: Mon, 7 Dec 2020 11:26:26 +0900
Subject: [PATCH] Update ajou notice parser

Before: some notices have their name in notices

[writer]: [writer] title

After: remove writer in titles

[writer]: title
---
 python/src/AjouSlackProducer.py |  3 +++
 python/tests/test_parser.py     | 10 ++++++----
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/python/src/AjouSlackProducer.py b/python/src/AjouSlackProducer.py
index d1d09bc..3706dec 100644
--- a/python/src/AjouSlackProducer.py
+++ b/python/src/AjouSlackProducer.py
@@ -22,6 +22,9 @@ def acked(err, msg):
 
 # Make data into dictionary format
 def makeJson(postId, postTitle, postDate, postLink, postWriter):
+    duplicate = "[" + postWriter + "]"
+    if duplicate in postTitle:  # writer: [writer] title
+        postTitle = postTitle.replace(duplicate, "").strip()  # -> writer: title
     return {
         postId: {
             "TITLE": postTitle,
diff --git a/python/tests/test_parser.py b/python/tests/test_parser.py
index 91d2295..736a684 100644
--- a/python/tests/test_parser.py
+++ b/python/tests/test_parser.py
@@ -5,9 +5,13 @@ from bs4 import BeautifulSoup
 
 
 ADDRESS = "https://www.ajou.ac.kr/kr/ajou/notice.do"
+LENGTH = 10
 
 # Make data into dictionary format
 def makeJson(postId, postTitle, postDate, postLink, postWriter):
+    duplicate = "[" + postWriter + "]"
+    if duplicate in postTitle:  # writer: [writer] title
+        postTitle = postTitle.replace(duplicate, "").strip()  # -> writer: title
     return {
         postId: {
             "TITLE": postTitle,
@@ -19,7 +23,6 @@ def makeJson(postId, postTitle, postDate, postLink, postWriter):
 
 
 def parser():
-    LENGTH = 10
     req = requests.get(f"{ADDRESS}?mode=list&&articleLimit={LENGTH}&article.offset=0")
     req.encoding = "utf-8"
     html = req.text
@@ -47,7 +50,6 @@ def test_parse():
         postTitle = posts[i].text.strip()
         postDate = dates[i].text.strip()
         postWriter = writers[i].text
-
         assert int(postId) > 10000, f"postId is None."
         assert postLink is not None, f"postLink is None."
         assert postTitle is not None, f"postTitle is None."
@@ -55,10 +57,10 @@ def test_parse():
         assert postWriter is not None, f"postWriter is None."
 
         data = makeJson(postId, postTitle, postDate, postLink, postWriter)
-        print("data", json.dumps(data[postId]))
+        temp = json.dumps(data[postId])
+        print("data", json.loads(temp))
 
 
 if __name__ == "__main__":
     test_parse()
     # print(next(iter(read["POSTS"].keys())))  # Last Key
-
-- 
GitLab