diff --git a/python/src/AjouSlackProducer.py b/python/src/AjouSlackProducer.py index 665e753f59b30976830e7865a26aab159ebff8f8..f389f2bd2a9151de639bc10a0f7737ad6dd27d0e 100644 --- a/python/src/AjouSlackProducer.py +++ b/python/src/AjouSlackProducer.py @@ -3,8 +3,9 @@ import json import os import time from pathlib import Path +from urllib.error import HTTPError +from urllib.request import urlopen -import requests from bs4 import BeautifulSoup from config import Config from confluent_kafka import Producer @@ -199,16 +200,16 @@ class AjouParserJSON: # Ajou notices parser def parser(self): + context = ssl._create_unverified_context() try: - req = requests.get( - f"{self.ADDRESS}?mode=list&&articleLimit=10&article.offset=0" + result = urlopen( + f"{self.ADDRESS}?mode=list&&articleLimit={self.LENGTH}&article.offset=0", + context=context, ) - req.raise_for_status() - except requests.exceptions.ConnectionError: + except HTTPError: print("Seems like the server is down now.") return None, None, None, None - req.encoding = "utf-8" - html = req.text + html = result.read() soup = BeautifulSoup(html, "html.parser") ids = soup.select("table > tbody > tr > td.b-num-box") posts = soup.select("table > tbody > tr > td.b-td-left > div > a") diff --git a/python/src/AjouSlackProducerMySQL.py b/python/src/AjouSlackProducerMySQL.py index 6f7a5473f1c31a8ee687c781dd94bf54c78fd755..1ed9fbfe8804b3b1dd46654440a929d695f8b081 100644 --- a/python/src/AjouSlackProducerMySQL.py +++ b/python/src/AjouSlackProducerMySQL.py @@ -1,10 +1,12 @@ import datetime import json import os +import ssl import time +from urllib.error import HTTPError +from urllib.request import urlopen import mysql.connector -import requests from bs4 import BeautifulSoup from config import Config from confluent_kafka import Producer @@ -187,16 +189,16 @@ class AjouParser: # Ajou notices parser def parser(self): + context = ssl._create_unverified_context() try: - req = requests.get( - f"{self.ADDRESS}?mode=list&&articleLimit=10&article.offset=0" + result = urlopen( + f"{self.ADDRESS}?mode=list&&articleLimit={self.LENGTH}&article.offset=0", + context=context, ) - req.raise_for_status() - except requests.exceptions.ConnectionError: + except HTTPError: print("Seems like the server is down now.") return None, None, None, None - req.encoding = "utf-8" - html = req.text + html = result.read() soup = BeautifulSoup(html, "html.parser") ids = soup.select("table > tbody > tr > td.b-num-box") posts = soup.select("table > tbody > tr > td.b-td-left > div > a") diff --git a/python/tests/test_parser.py b/python/tests/test_parser.py index 92f757d37e7768ae9d935ded63869eef51ab8a74..c08cc5504484d6b54e374cf9ef420d6c90f42d39 100644 --- a/python/tests/test_parser.py +++ b/python/tests/test_parser.py @@ -1,6 +1,7 @@ import json +import ssl -import requests +from urllib.request import urlopen from bs4 import BeautifulSoup @@ -24,9 +25,13 @@ def makeJson(postId, postTitle, postDate, postLink, postWriter): def parser(): - req = requests.get(f"{ADDRESS}?mode=list&&articleLimit={LENGTH}&article.offset=0") - req.encoding = "utf-8" - html = req.text + # req = requests.get(f"{ADDRESS}?mode=list&&articleLimit={LENGTH}&article.offset=0") + context = ssl._create_unverified_context() + result = urlopen( + f"{ADDRESS}?mode=list&&articleLimit={LENGTH}&article.offset=0", context=context + ) + + html = result.read() soup = BeautifulSoup(html, "html.parser") ids = soup.select("table > tbody > tr > td.b-num-box") posts = soup.select("table > tbody > tr > td.b-td-left > div > a") @@ -46,14 +51,14 @@ def test_parse(): assert len(writers) == 10, f"Check your parser: {writers}" for i in range(LENGTH): postTitle = posts[i].text.strip() - if FILTER_WORDS: - FILTERD = False - for filter in FILTER_WORDS: - if filter in postTitle: - FILTERD = True - break - if not FILTERD: - continue + # if FILTER_WORDS: + # FILTERD = False + # for filter in FILTER_WORDS: + # if filter in postTitle: + # FILTERD = True + # break + # if not FILTERD: + # continue postId = ids[i].text.strip() postLink = posts[i].get("href")