From 3f53afe7996fb7298130c0577fe20c8f0724d4a4 Mon Sep 17 00:00:00 2001 From: Seok Won <alfex4936@gmail.com> Date: Mon, 7 Dec 2020 11:26:26 +0900 Subject: [PATCH] Update ajou notice parser Before: some notices have their name in notices [writer]: [writer] title After: remove writer in titles [writer]: title --- python/src/AjouSlackProducer.py | 3 +++ python/tests/test_parser.py | 10 ++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/python/src/AjouSlackProducer.py b/python/src/AjouSlackProducer.py index d1d09bc..3706dec 100644 --- a/python/src/AjouSlackProducer.py +++ b/python/src/AjouSlackProducer.py @@ -22,6 +22,9 @@ def acked(err, msg): # Make data into dictionary format def makeJson(postId, postTitle, postDate, postLink, postWriter): + duplicate = "[" + postWriter + "]" + if duplicate in postTitle: # writer: [writer] title + postTitle = postTitle.replace(duplicate, "").strip() # -> writer: title return { postId: { "TITLE": postTitle, diff --git a/python/tests/test_parser.py b/python/tests/test_parser.py index 91d2295..736a684 100644 --- a/python/tests/test_parser.py +++ b/python/tests/test_parser.py @@ -5,9 +5,13 @@ from bs4 import BeautifulSoup ADDRESS = "https://www.ajou.ac.kr/kr/ajou/notice.do" +LENGTH = 10 # Make data into dictionary format def makeJson(postId, postTitle, postDate, postLink, postWriter): + duplicate = "[" + postWriter + "]" + if duplicate in postTitle: # writer: [writer] title + postTitle = postTitle.replace(duplicate, "").strip() # -> writer: title return { postId: { "TITLE": postTitle, @@ -19,7 +23,6 @@ def makeJson(postId, postTitle, postDate, postLink, postWriter): def parser(): - LENGTH = 10 req = requests.get(f"{ADDRESS}?mode=list&&articleLimit={LENGTH}&article.offset=0") req.encoding = "utf-8" html = req.text @@ -47,7 +50,6 @@ def test_parse(): postTitle = posts[i].text.strip() postDate = dates[i].text.strip() postWriter = writers[i].text - assert int(postId) > 10000, f"postId is None." assert postLink is not None, f"postLink is None." assert postTitle is not None, f"postTitle is None." @@ -55,10 +57,10 @@ def test_parse(): assert postWriter is not None, f"postWriter is None." data = makeJson(postId, postTitle, postDate, postLink, postWriter) - print("data", json.dumps(data[postId])) + temp = json.dumps(data[postId]) + print("data", json.loads(temp)) if __name__ == "__main__": test_parse() # print(next(iter(read["POSTS"].keys()))) # Last Key - -- GitLab