Skip to content
Snippets Groups Projects
Commit 3f53afe7 authored by Seok Won's avatar Seok Won
Browse files

Update ajou notice parser

Before: some notices have their name in notices

[writer]: [writer] title

After: remove writer in titles

[writer]: title
parent 8ef04d59
No related branches found
No related tags found
No related merge requests found
...@@ -22,6 +22,9 @@ def acked(err, msg): ...@@ -22,6 +22,9 @@ def acked(err, msg):
# Make data into dictionary format # Make data into dictionary format
def makeJson(postId, postTitle, postDate, postLink, postWriter): def makeJson(postId, postTitle, postDate, postLink, postWriter):
duplicate = "[" + postWriter + "]"
if duplicate in postTitle: # writer: [writer] title
postTitle = postTitle.replace(duplicate, "").strip() # -> writer: title
return { return {
postId: { postId: {
"TITLE": postTitle, "TITLE": postTitle,
......
...@@ -5,9 +5,13 @@ from bs4 import BeautifulSoup ...@@ -5,9 +5,13 @@ from bs4 import BeautifulSoup
ADDRESS = "https://www.ajou.ac.kr/kr/ajou/notice.do" ADDRESS = "https://www.ajou.ac.kr/kr/ajou/notice.do"
LENGTH = 10
# Make data into dictionary format # Make data into dictionary format
def makeJson(postId, postTitle, postDate, postLink, postWriter): def makeJson(postId, postTitle, postDate, postLink, postWriter):
duplicate = "[" + postWriter + "]"
if duplicate in postTitle: # writer: [writer] title
postTitle = postTitle.replace(duplicate, "").strip() # -> writer: title
return { return {
postId: { postId: {
"TITLE": postTitle, "TITLE": postTitle,
...@@ -19,7 +23,6 @@ def makeJson(postId, postTitle, postDate, postLink, postWriter): ...@@ -19,7 +23,6 @@ def makeJson(postId, postTitle, postDate, postLink, postWriter):
def parser(): def parser():
LENGTH = 10
req = requests.get(f"{ADDRESS}?mode=list&&articleLimit={LENGTH}&article.offset=0") req = requests.get(f"{ADDRESS}?mode=list&&articleLimit={LENGTH}&article.offset=0")
req.encoding = "utf-8" req.encoding = "utf-8"
html = req.text html = req.text
...@@ -47,7 +50,6 @@ def test_parse(): ...@@ -47,7 +50,6 @@ def test_parse():
postTitle = posts[i].text.strip() postTitle = posts[i].text.strip()
postDate = dates[i].text.strip() postDate = dates[i].text.strip()
postWriter = writers[i].text postWriter = writers[i].text
assert int(postId) > 10000, f"postId is None." assert int(postId) > 10000, f"postId is None."
assert postLink is not None, f"postLink is None." assert postLink is not None, f"postLink is None."
assert postTitle is not None, f"postTitle is None." assert postTitle is not None, f"postTitle is None."
...@@ -55,10 +57,10 @@ def test_parse(): ...@@ -55,10 +57,10 @@ def test_parse():
assert postWriter is not None, f"postWriter is None." assert postWriter is not None, f"postWriter is None."
data = makeJson(postId, postTitle, postDate, postLink, postWriter) data = makeJson(postId, postTitle, postDate, postLink, postWriter)
print("data", json.dumps(data[postId])) temp = json.dumps(data[postId])
print("data", json.loads(temp))
if __name__ == "__main__": if __name__ == "__main__":
test_parse() test_parse()
# print(next(iter(read["POSTS"].keys()))) # Last Key # print(next(iter(read["POSTS"].keys()))) # Last Key
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment