diff --git a/python/tests/test_open.py b/python/tests/test_open.py new file mode 100644 index 0000000000000000000000000000000000000000..ac5806389306f6d7c9ccc70c0ea909d786b023ff --- /dev/null +++ b/python/tests/test_open.py @@ -0,0 +1,29 @@ +import json +import os +from contextlib import contextmanager +from pathlib import Path + + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) +JSON_PATH = os.path.join(BASE_DIR, "test.json") + + +@contextmanager +def jsonify(mode): + f = open(JSON_PATH, mode=mode) + read = json.load(f) + yield read + f.close() + + +def test_open(): + if not Path(JSON_PATH).is_file(): + base_data = {"POSTS": {}} + with open(JSON_PATH, "a+") as f_read: + f_read.write(json.dumps(base_data)) + + with jsonify("r+") as f: + data = f + + assert data is not None, "data is None." + diff --git a/python/tests/test_parser.py b/python/tests/test_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..91d2295fdfcb009d26511fb342c56b3ba70d8499 --- /dev/null +++ b/python/tests/test_parser.py @@ -0,0 +1,64 @@ +import json + +import requests +from bs4 import BeautifulSoup + + +ADDRESS = "https://www.ajou.ac.kr/kr/ajou/notice.do" + +# Make data into dictionary format +def makeJson(postId, postTitle, postDate, postLink, postWriter): + return { + postId: { + "TITLE": postTitle, + "DATE": postDate, + "LINK": ADDRESS + postLink, + "WRITER": postWriter, + } + } + + +def parser(): + LENGTH = 10 + req = requests.get(f"{ADDRESS}?mode=list&&articleLimit={LENGTH}&article.offset=0") + req.encoding = "utf-8" + html = req.text + soup = BeautifulSoup(html, "html.parser") + ids = soup.select("table > tbody > tr > td.b-num-box") + posts = soup.select("table > tbody > tr > td.b-td-left > div > a") + dates = soup.select("table > tbody > tr > td.b-td-left > div > div > span.b-date") + writers = soup.select( + "table > tbody > tr > td.b-td-left > div > div.b-m-con > span.b-writer" + ) + return ids, posts, dates, writers + + +# Test #1 +def test_parse(): + ids, posts, dates, writers = parser() + assert len(ids) == 10, f"Check your parser: {ids}" + assert len(posts) == 10, f"Check your parser: {posts}" + assert len(dates) == 10, f"Check your parser: {dates}" + assert len(writers) == 10, f"Check your parser: {writers}" + for i in range(LENGTH): + + postId = ids[i].text.strip() + postLink = posts[i].get("href") + postTitle = posts[i].text.strip() + postDate = dates[i].text.strip() + postWriter = writers[i].text + + assert int(postId) > 10000, f"postId is None." + assert postLink is not None, f"postLink is None." + assert postTitle is not None, f"postTitle is None." + assert postDate is not None, f"postDate is None." + assert postWriter is not None, f"postWriter is None." + + data = makeJson(postId, postTitle, postDate, postLink, postWriter) + print("data", json.dumps(data[postId])) + + +if __name__ == "__main__": + test_parse() + # print(next(iter(read["POSTS"].keys()))) # Last Key +